python学习笔记 python实现k-means聚类
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python学习笔记 python实现k-means聚类相关的知识,希望对你有一定的参考价值。
1 # -*- coding: utf-8 -*- 2 """ 3 Created on Thu Mar 16 14:52:58 2017 4 5 @author: Jarvis 6 """ 7 import tensorflow as tf 8 import numpy as np 9 import pandas as pd 10 import math 11 import random 12 from pandas import Series,DataFrame 13 def cal_dis(a,b): 14 sum = 0 15 for x,y in zip(a,b): 16 sum = sum+(x-y)*(x-y) 17 return math.sqrt(sum) 18 19 def is_same_series(a,b): 20 21 for x,y in zip(a,b): 22 if x != y: 23 return False 24 return True 25 def is_constant_vec(a,b): 26 if len(a) == 0 or len(b) == 0: 27 return False 28 29 for x,y in zip(a,b): 30 if not is_same_series(x,y): 31 return False 32 33 return True 34 35 def init_typeSet(init_set,type_num): 36 for i in range(type_num): 37 init_set.append([]) 38 return init_set 39 40 def Mindis_type_no(x,vecs,typ_num = 4): 41 mindis = cal_dis(x,vecs[0]) 42 ans = 0 43 for i in range(1,type_num): 44 tmp = cal_dis(x,vecs[i]) 45 if mindis > tmp: 46 ans= i 47 mindis = cal_dis(x,vecs[i]) 48 return ans 49 50 def reduce_mean(vecs): 51 52 reduce_vec = vecs[0].copy() 53 print(reduce_vec) 54 for i in range(1,len(vecs)): 55 print (vecs[i][5]) 56 print (reduce_vec[5]) 57 reduce_vec = reduce_vec+vecs[i] 58 59 print (reduce_vec[5]) 60 61 reduce_vec = reduce_vec/len(vecs) 62 # print(reduce_vec) 63 return reduce_vec 64 def get_vecs(sets): 65 vecs = [] 66 for i in sets: 67 vecs.append(data.ix[i]) 68 return vecs 69 70 raw_data_file = pd.read_csv(‘NDVI_NDWI_all.csv‘,header = None,encoding = ‘gbk‘) 71 data = (raw_data_file) 72 del data[0] 73 del data[1] 74 del data[2] 75 76 type_num = 4 77 init_type_vec = []#类质心 78 tmp_set = set([]) 79 data_size = len(data) 80 81 while (len(tmp_set) < type_num): 82 tmp_set.add(random.choice(range(data_size))) 83 84 for i in tmp_set: 85 tmp = data.ix[i] 86 init_type_vec.append(tmp) 87 88 pre_vec = [] 89 #print (is_constant_vec(pre_vec,init_type_vec)) 90 91 while( not is_constant_vec(pre_vec,init_type_vec)): 92 type_set = [] 93 type_set = init_typeSet(type_set,type_num) 94 for j in range(len(data)): 95 tmp_type = Mindis_type_no(data.ix[j],init_type_vec) 96 type_set[tmp_type].append(j) 97 #type_set[tmp_type].append(data.ix[j]) 98 if(len(pre_vec) == 0): 99 pre_vec = init_type_vec.copy() 100 else: 101 for i in range(type_num): 102 pre_vec[i] = init_type_vec[i] 103 need_cal_vecs = get_ves(type_set[i]) 104 init_type_vec[i] = reduce_mean(need_cal_vecs).copy() 105 for i in range(type_num): 106 print(‘--------------------------------‘) 107 print(type_set[i]) 108 109 110 with open(‘output.txt‘,"w"): 111 for i in range(type_num): 112 print ("type %d"%i) 113 for j in type_set[i]: 114 print(j) 115 116 #print(reduce_mean(type_set)) 117 #for i in range(type_num): 118 # pre_vec[i] = tf.reduce_mean()
以上是关于python学习笔记 python实现k-means聚类的主要内容,如果未能解决你的问题,请参考以下文章
python数据分析与挖掘学习笔记-公司客户价值判断分析与聚类算法