[星期-时间]维度,解析日期绘制散点图,python
Posted zhangphil
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了[星期-时间]维度,解析日期绘制散点图,python相关的知识,希望对你有一定的参考价值。
[星期-时间]维度,解析日期绘制散点图,python
一批数据,数据里面每一行代表一条事件,每一条事件基本上都包含年月日时分,这样以来,时间就可以代表一次事件,把事件在x坐标轴分解时间(时分),y坐标轴分解为星期几,形成统计图表。最多的事件发生星期-时间点,用红色标记。
import re
import matplotlib.pyplot as plt
import dateutil.parser as ps
from collections import Counter
from pprint import pp
import numpy as np
from fuzzywuzzy import fuzz
FILE_PATH = r'数据文件路径'
KEYS = [r'关键词1', r'关键词2', r'关键词3']
threshold = 90
def change_str(date_s):
date_s = date_s.replace(r'年', '-')
date_s = date_s.replace(r'月', '-')
date_s = date_s.replace(r'日', ' ')
date_s = date_s.replace(r'时', ':')
date_s = date_s.replace(r'分', '')
return date_s
def read_file():
file = open(FILE_PATH, 'r', encoding='UTF-8')
all_case_time = []
case_count = 0
cnt = 0
for line in file:
cnt = cnt + 1
for k in KEYS:
pr = fuzz.partial_ratio(line, k)
if pr >= threshold:
print('-----')
print(f'第case_count件')
case_count = case_count + 1
try:
# 正则匹配 xxxx年xx月xx日xx时xx分
mat = re.search(r'\\d4\\年\\d1,2\\月\\d1,2\\日\\d1,2\\时\\d1,2\\分', line)
t_str = mat.group().replace('\\n', '') # 去掉正则匹配到但是多余的 \\n 换行符
try:
t_str = change_str(t_str)
all_case_time.append(ps.parse(t_str)) # 日期提取出来,放到数组中
except:
print('解析日期失败')
except:
t_str = '-解析异常-'
s = '第number行,相似度ratio,时间:case_time - content'
ss = s.format(number=cnt, ratio=pr, case_time=t_str, content=line)
pp(ss)
break
# 快速调试
# if case_count > 20:
# break
file.close()
return all_case_time
# 把数字0,1,2,3,4,5,6转换为星期*
# 0为星期一,6为星期日,依次类推
def number_to_weekday(number):
zh = ['一', '二', '三', '四', '五', '六', '日']
weekday = f'星期zh[number]'
return weekday
def date_to_points():
date_times = read_file()
hours = []
for i in range(24):
hours.append((i, i + 1))
points = []
for dt in date_times:
for h in hours:
if h[0] <= dt.hour < h[1]:
point = (h[0], dt.weekday(), h, dt) # weekday() # 0是星期一,6是星期日
points.append(point)
break
to_chart(points)
def to_chart(points):
print('points', points)
xy = []
for point in points:
xy.append((point[0], point[1]))
print('xy', xy)
# 找出出现次数最多的点数
c_xy = Counter(xy)
print('c_xy', c_xy)
# 再选取前n多的点数
max_count = c_xy.most_common(50)
print('max_count', max_count)
sizes = []
colors = []
for i in range(len(max_count)):
colors.append('gray')
sizes.append(30)
idx = 0
ax_x = []
ax_y = []
for p in max_count:
ax_x.append(p[0][0])
ax_y.append(p[0][1])
sizes[idx] = p[1] * 10 + 10
if idx < 5: # 前5红色
colors[idx] = 'red'
idx = idx + 1
plt.style.use('_mpl-gallery')
plt.rcParams['font.sans-serif'] = ['SimHei'] # 中文乱码
fig, ax = plt.subplots(figsize=(7, 5))
plt.subplots_adjust(left=0.1, bottom=0.1, right=0.9) # 调整边距
ax.scatter(ax_x, ax_y, s=sizes, c=colors, vmin=0, vmax=100)
x_labels = []
for i in range(24):
x_labels.append((i, i + 1))
y_labels = []
for i in range(7):
y_labels.append(number_to_weekday(i))
plt.xticks(ticks=np.arange(start=0, stop=24, step=1), labels=x_labels, rotation=45, size=8, color='red')
plt.yticks(ticks=np.linspace(start=0, stop=7, endpoint=False, num=7, dtype=np.int32), labels=y_labels, rotation=10)
plt.show()
if __name__ == '__main__':
date_to_points()
输出:
以上是关于[星期-时间]维度,解析日期绘制散点图,python的主要内容,如果未能解决你的问题,请参考以下文章
[星期维度]日志数据提取事件关键词,解析对应日期的星期计数,matplotlib绘制统计图,python