python实现决策树分类
Posted BabyGo000
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python实现决策树分类相关的知识,希望对你有一定的参考价值。
原始数据集:
变化后的数据集在程序代码中体现,这就不截图了
构建决策树的代码如下:
#coding :utf-8
\'\'\'
2017.6.25 author :Erin
function: "decesion tree" ID3
\'\'\'
import numpy as np
import pandas as pd
from math import log
import operator
def load_data():
#data=np.array(data)
data=[[\'teenager\' ,\'high\', \'no\' ,\'same\', \'no\'],
[\'teenager\', \'high\', \'no\', \'good\', \'no\'],
[\'middle_aged\' ,\'high\', \'no\', \'same\', \'yes\'],
[\'old_aged\', \'middle\', \'no\' ,\'same\', \'yes\'],
[\'old_aged\', \'low\', \'yes\', \'same\' ,\'yes\'],
[\'old_aged\', \'low\', \'yes\', \'good\', \'no\'],
[\'middle_aged\', \'low\' ,\'yes\' ,\'good\', \'yes\'],
[\'teenager\' ,\'middle\' ,\'no\', \'same\', \'no\'],
[\'teenager\', \'low\' ,\'yes\' ,\'same\', \'yes\'],
[\'old_aged\' ,\'middle\', \'yes\', \'same\', \'yes\'],
[\'teenager\' ,\'middle\', \'yes\', \'good\', \'yes\'],
[\'middle_aged\' ,\'middle\', \'no\', \'good\', \'yes\'],
[\'middle_aged\', \'high\', \'yes\', \'same\', \'yes\'],
[\'old_aged\', \'middle\', \'no\' ,\'good\' ,\'no\']]
features=[\'age\',\'input\',\'student\',\'level\']
return data,features
def cal_entropy(dataSet):
\'\'\'
输入data ,表示带最后标签列的数据集
计算给定数据集总的信息熵
{\'是\': 9, \'否\': 5}
0.9402859586706309
\'\'\'
numEntries = len(dataSet)
labelCounts = {}
for featVec in dataSet:
label = featVec[-1]
if label not in labelCounts.keys():
labelCounts[label] = 0
labelCounts[label] += 1
entropy = 0.0
for key in labelCounts.keys():
p_i = float(labelCounts[key]/numEntries)
entropy -= p_i * log(p_i,2)#log(x,10)表示以10 为底的对数
return entropy
def split_data(data,feature_index,value):
\'\'\'
划分数据集
feature_index:用于划分特征的列数,例如“年龄”
value:划分后的属性值:例如“青少年”
\'\'\'
data_split=[]#划分后的数据集
for feature in data:
if feature[feature_index]==value:
reFeature=feature[:feature_index]
reFeature.extend(feature[feature_index