kNN算法

Posted 大雄

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了kNN算法相关的知识,希望对你有一定的参考价值。

import numpy as np
import operator
import os
def createDataset():
        group=np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
        lables=[A,A,B,B]
        return  group,lables

def classify0(inX,dataSet,labels,k):
        dataSetSize=dataSet.shape[0]
        diffMat=np.tile(inX,(dataSetSize,1))-dataSet
        sqDiffMat=diffMat**2
        sqDistances=sqDiffMat.sum(axis=1)
        distances=sqDistances**0.5
        sortDistancesIndex=distances.argsort()
        classCount={}#TODO  toOrder dectionary
        for i in range(k):
                voteIlabel=labels[sortDistancesIndex[i]]
                classCount[voteIlabel]=classCount.get(voteIlabel,0)+1
        sortedClassCount=sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
        return sortedClassCount[0][0]
def filematrix(filename):
        fr=open(filename)
        arrayOfLines=fr.readlines()
        numberOfLines=len(arrayOfLines)
        returnMat=np.zeros((numberOfLines,3))
        classLableVector=[]
        index=0
        for line in arrayOfLines:
            line=line.strip()
            listFromLine=line.split(\t)
            returnMat[index,:]=listFromLine[0:3]
            classLableVector.append(int(listFromLine[-1]))
            index+=1
        return returnMat,classLableVector

def autoNorm(dataSet):
        minVals=dataSet.min(0)
        maxVals=dataSet.max(0)
        rangs=maxVals-minVals
        dtRow=dataSet.shape[0]
        normDataset=dataSet-np.tile(minVals,(dtRow,1))
        resultDataset=normDataset/np.tile(rangs,(dtRow,1))
        return resultDataset,rangs,minVals
def datingClassTest():
        hoRatio=0.10
        errorCount=0.0
        datingMat,datingLabels=filematrix(dts.txt);
        normMat,normRang,normMin=autoNorm(datingMat)
        dataRows=normMat.shape[0]
        testDataRows=int(dataRows*hoRatio)
        for i in range(testDataRows):
                classfileterResult=classfy0(normMat[i,:],normMat[testDataRows:dataRows,:],datingLabels[testDataRows:dataRows],3)
                print("这次分类结果是: %d,这个真实的结果为:%d"%(classfileterResult,datingLabels[i]))
                if(classfileterResult!= datingLabels[i]):errorCount+=1.0
                print("这次分类的总错误率为:%f"%(errorCount/float(testDataRows)))


def classifyPerson():
    resultList = [没有魅力, 魅力一般, 很有魅力]
    percentTats = float(input("每天所玩电子游戏的占比?"))
    ffMiles = float(input("每年的飞行里程数?"))
    iceCream = float(input("每周吃多少冰淇淋(升)?"))
    datingDataMat, datingLabels = filematrix(dts.txt)
    normMat, ranges, minVals = autoNorm(datingDataMat)
    inArr = np.array([ffMiles, percentTats, iceCream])
    classifierResult = classify0((inArr - minVals)/ranges, normMat, datingLabels,3)
    print (这个人让人感觉: , resultList[classifierResult - 1])
    
# 2:手写识别系统
#将一个32*32的二进制图像矩阵转换成1*1024的向量

def img2vector(filename):
    returnVect = np.zeros((1,1024))
    fr = open(filename)
    for i in range(32):
        lineStr = fr.readline()
        for j in range(32):
            returnVect[0, 32*i+j] = int(lineStr[j])
    return returnVect


#手写识别系统测试代码
def handwritingClassTest():
    hwLabels = []
    trainingFileList = os.listdir(trainingDigits)   #获取目录内容
    m = len(trainingFileList)
    trainingMat = np.zeros((m, 1024))
    for i in range(m):
        fileNameStr = trainingFileList[i]              #分割得到标签  从文件名解析得到分类数据
        fileStr = fileNameStr.split(.)[0]
        classStr = int(fileStr.split(_)[0])
        hwLabels.append(classStr)                 #测试样例标签
        trainingMat[i,:] = img2vector(trainingDigits/%s % fileNameStr)
    testFileList = os.listdir(testDigits)
    errorCount = 0.0
    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split(.)[0]
        classStr = int(fileStr.split(_)[0])
        vectorUnderTest = img2vector(testDigits/%s % fileNameStr)
        classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3)
        print (the classifier came back with: %d, the real answer is: %d % (classifierResult, classStr))
        if(classifierResult != classStr): errorCount += 1.0
    print ("\nthe total numbers of errors is : %d" % errorCount)
    print ("\nthe total error rate is: %f" % (errorCount/float(mTest)))

 

以上是关于kNN算法的主要内容,如果未能解决你的问题,请参考以下文章

监督学习算法_k-近邻(kNN)分类算法_源代码

(理论和代码相结合)KNN(最近邻)算法⭐

分类-KNN算法(代码复现和可视化)

⭐ (理论和代码相结合)KNN(最近邻)算法——分类问题和回归问题都能做的算法

模式识别实验二:K近邻算法(KNN)

万字详解·附代码机器学习分类算法之K近邻(KNN)