import pandas as pd
datafile=r\'D:\\python学习\\data\\data.csv\'
resultfile=r\'D:\\python学习\\data\\explore.csv\'
data=pd.read_csv(datafile, encoding=\'utf-8\')
explore=data.describe(percentiles=[],include=\'all\').T
explore[\'null\']=len(data)-explore[\'count\']
explore=explore[[\'null\',\'max\',\'min\']]
explore.columns=[u\'空值数\',u\'最大值\',u\'最小值\']
explore.to_csv(resultfile)
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt;
ffp=data[\'FFP_DATE\'].apply(lambda x:datetime.strptime(x,\'%Y/%m/%d\'))
ffp_year=ffp.map(lambda x : x.year)
#绘制各年份会员入会人数直方图
fig=plt.figure(figsize=(8,5))
plt.rcParams[\'font.sans-serif\']=\'SimHei\'
plt.rcParams[\'axes.unicode_minus\']=False
plt.hist(ffp_year,bins=\'auto\',color=\'#0504aa\')
plt.xlabel(\'年份\')
plt.ylabel(\'入会人数\')
plt.title(\'各年份会员入会人数3127\')
plt.show()
plt.close
#提取会员年龄
age=data[\'AGE\'].dropna()
age=age.astype(\'int64\')
#绘制会员年龄分布箱型图
fig=plt.figure(figsize=(5,10))
plt.boxplot(age,
patch_artist=True,
labels=[\'会员年龄\'],
boxprops=\'facecolor\':\'lightblue\')
plt.title(\'会员年龄分布箱型图3127\')
plt.grid(axis=\'y\')
plt.show()plt.close
lv_four = pd.value_counts(data[\'FFP_TIER\'])[4]
lv_five = pd.value_counts(data[\'FFP_TIER\'])[5]
lv_six = pd.value_counts(data[\'FFP_TIER\'])[6]
fig = plt.figure(figsize=(8,5))
plt.bar(x=range(3),height=[lv_four,lv_five,lv_six],width=0.4,alpha=0.8,color=\'blue\')
plt.xticks([index for index in range(3)],[\'4\',\'5\',\'6\'])
plt.xlabel(\'会员等级\')
plt.ylabel(\'会员人数\')
plt.title(\'3127会员各级别人数\')
plt.show()
plt.close()
male = pd.value_counts(data[\'GENDER\'])[\'男\']
female = pd.value_counts(data[\'GENDER\'])[\'女\']
fig = plt.figure(figsize=(7,4))
plt.pie([male,female],labels=[\'男\',\'女\'],colors=[\'gold\',\'purple\'],autopct=\'%1.1f%%\')
plt.title(\'3127会员性别比例\')
plt.show()