1. 简单例子
步骤
1.1 计算已知点和被求点的距离
1.2 按距离递增排序
1.3 求出距离最近的前k个点的类别最大值作为目标分类
from numpy import * import operator def createDateSet(): group = array([[1.0,1.1], [1.0,1.0], [0,0], [0,0.1]]) labels = [‘A‘, ‘A‘, ‘B‘, ‘B‘] return group, labels def classify0(inX, dataSet, labels, k): dataSetSize = dataSet.shape[0] diffMat = tile(inX, (dataSetSize,1)) - dataSet sqDiffMat = diffMat ** 2 sqDistances = sqDiffMat.sum(axis=1) distance = sqDistances ** 0.5 sortDistIndices = distance.argsort() classCount = {} for i in range(k): voteIlable = labels[sortDistIndices[i]] classCount[voteIlable] = classCount.get(voteIlable, 0) + 1 sortedClassCount = sorted(classCount.iteritems(), key = operator.itemgetter(1), reverse=True) return sortedClassCount[0][0] if __name__ == "__main__": group, labels = createDateSet() inX = [1.1, 0.2] k = 3 aimClass = classify0(inX, group, labels, k) print aimClass
语法解析
a. shape()得到矩阵的各个维度的长度
b. tile,举例
>>> a [1, 2] >>> tile(a, 2) array([1, 2, 1, 2]) >>> tile(a, (2,2)) array([[1, 2, 1, 2], [1, 2, 1, 2]]) >>> tile(a, (3, 2,2)) array([[[1, 2, 1, 2], [1, 2, 1, 2]], [[1, 2, 1, 2], [1, 2, 1, 2]], [[1, 2, 1, 2], [1, 2, 1, 2]]])
c. sortDistIndices = distance.argsort() 得到排序后的名次,越大名次越大
d. sortedClassCount = sorted(classCount.iteritems(), key = operator.itemgetter(1), reverse=True) 对字典的值进行逆序(降序)排序