MATLAB | 可视化 | 罗密欧与朱丽叶的数据统计
Posted slandarer
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了MATLAB | 可视化 | 罗密欧与朱丽叶的数据统计相关的知识,希望对你有一定的参考价值。
前两天看到了一个非常好看的可视化案例(https://www.reddit.com/r/dataisbeautiful/comments/8ivizh/visualisation_of_words_spoken_between_romeo_and/):
继续手痒想绘制一下试试,但是原可视化并没有提供数据,我也懒得真的去统计各个人物之间交流单词数量,因此我统计了各个人物说话出现顺序,认为只要相邻就是互相之间有过交流,弄了个笼统的统计绘制了下图:
绘制效果
基础教程
数据处理
罗密欧与朱丽叶全文出自网站:
http://shakespeare.mit.edu/romeo_juliet/full.html
我将全文复制到romeo_juliet.txt
文件并将其导入MATLAB进行了粗略统计,得到的结果是一个20x20的矩阵:
% 基本数据
strData=readcell('romeo_juliet.txt');
strData=strData(:,1);
nameStr='Romeo','Juliet','Friar Laurence','Friar John','Abraham','Balthasar',...
'Montague','Lady Montague','Benvolio','Apothecary','Mercutio','Prince','Paris','Tybalt',...
'Sampson','Gregory','Lady Capulet','Capulet','Nurse','Peter';
% =========================================================================
% 统计各个人物前后出现次数
orderList=zeros(size(strData));
for i=1:length(nameStr)
orderList(strcmpi(nameStri,strData))=i;
end
orderList(orderList==0)=[];
corrMat=zeros(length(nameStr));
for i=1:length(orderList)-1
corrMat(orderList(i),orderList(i+1))=corrMat(orderList(i),orderList(i+1))+1;
end
corrMat=corrMat+corrMat.';
配色
配色数据提取的方式很多,可以qq取色或者ppt取色,我公众号上也有一些取色器工具,可以自行查找下载取用,这是图片中用到的配色:
colorList=[48,115,100;177,58,71;252,193,13;108,60,143]./255;
点位置
写了一个很短的程序,运行后生成图窗,在想要取点的位置点击就能获得放缩到[-1,1]范围内的数据点(红蓝十字所示),这个程序只是为了自己方便大家可自行改进:
function getlinesl(fileName)
tpic=imread(fileName);
ax=gca;hold on
ax.XLim=[-1,1];
ax.YLim=[-1,1];
image([-1,1],[-1,1],flipud(tpic))
[m,n,~]=size(tpic);
ax.DataAspectRatio=[m,n,1];
baHdl=plot(0,0,'b+','MarkerSize',12,'LineWidth',1.5);
rxHdl=plot(0,0,'rx','MarkerSize',12,'LineWidth',1.5);
pntSet=zeros(0,2);
set(gcf,'WindowButtonDownFcn',@buttondown)
function buttondown(~,~)
xy=get(gca,'CurrentPoint');
xp=xy(1,2);yp=xy(1,1);pos=[yp,xp];
if strcmp(get(gcf,'SelectionType'),'normal')
pntSet=[pntSet;pos];
elseif size(pntSet,1)>0
pntSet(end,:)=[];
end
baHdl.XData=pntSet(:,1);baHdl.YData=pntSet(:,2);
rxHdl.XData=pntSet(:,1);rxHdl.YData=pntSet(:,2);
pntSet
save data.mat pntSet
end
end
使用方法就是命令行运行getlinesl('test.png')
其中括号内是图片地址。
% 各个人物点位置
posXY=[-0.1598 -0.0682; 0.3117 -0.0060;-0.0389 0.4154;-0.1874 0.5155;
-0.3877 0.5691;-0.6693 0.5397;-0.8092 0.2599;-0.7522 -0.0959;
-0.5915 -0.3515;-0.4465 -0.5294;-0.2668 -0.6503;-0.0060 -0.7867;
0.2202 -0.6503; 0.4275 -0.5173; 0.5933 -0.3515; 0.7228 -0.1563;
0.8040 0.1045; 0.7642 0.3653; 0.5259 0.5622; 0.2323 0.5121];
绘制图像
% 坐标区域修饰
figure('Position',[400,100,850,850],'Name','slandarer')
ax=gca;hold on
ax.XLim=[-1,1];
ax.YLim=[-1,1];
ax.Color=[0,0,0];
ax.XTick=[];
ax.YTick=[];
ax.DataAspectRatio=[1,1,1];
maxWidth=max(corrMat(corrMat>0));
minWidth=min(corrMat(corrMat>0));
ttList=linspace(0,1,3)';
% 循环绘图
for i=1:size(corrMat,1)
for j=i+1:size(corrMat,2)
if corrMat(i,j)>0
tW=(corrMat(i,j)-minWidth)./(maxWidth-minWidth);
colorData=(1-ttList).*colorList(classNum(i),:)+ttList.*colorList(classNum(j),:);
CData(:,:,1)=colorData(:,1);
CData(:,:,2)=colorData(:,2);
CData(:,:,3)=colorData(:,3);
% 绘制连线
fill(linspace(posXY(i,1),posXY(j,1),3),...
linspace(posXY(i,2),posXY(j,2),3),[0,0,0],'LineWidth',tW.*6+2.5,...
'CData',CData,'EdgeColor','interp','EdgeAlpha',.5,'FaceAlpha',.5)
end
end
% 绘制人物圆点
scatter(posXY(i,1),posXY(i,2),30,'filled','LineWidth',1.2,...
'MarkerFaceColor',colorList(classNum(i),:),'MarkerEdgeColor',[.7,.7,.7]);
end
图例
% 绘制图例
lgdSet(length(classStr))=nan;
for i=1:length(classStr)
lgdSet(i)=fill([0,0],[0,0],colorList(i,:));
end
lgdHdl=legend(lgdSet,classStr,'Box','off','Location','South','Orientation','horizontal',...
'TextColor',[1,1,1],'FontName','Cambria','FontSize',12);
lgdHdl.ItemTokenSize=[8,8];
标题
% 绘制标题
text(0,0.8,'ROMEO & JULIET','FontSize',30,'FontWeight','bold','Color',...
[1,1,1],'HorizontalAlignment','center','FontName','Cambria')
text(0,0.68,'Number of conversations between characters','FontWeight','bold','Color',...
[1,1,1].*.7,'HorizontalAlignment','center','FontName','Cambria','FontSize',15)
绘制人名
% 绘制人物文字
for i=1:size(corrMat,1)
text(posXY(i,1),posXY(i,2)-.001,nameStri,'FontName','Cambria','FontSize',13,...
'HorizontalAlignment','center','Color',[1,1,1]);
end
完整代码
clc;clear
% 基本数据
strData=readcell('romeo_juliet.txt');
strData=strData(:,1);
nameStr='Romeo','Juliet','Friar Laurence','Friar John','Abraham','Balthasar',...
'Montague','Lady Montague','Benvolio','Apothecary','Mercutio','Prince','Paris','Tybalt',...
'Sampson','Gregory','Lady Capulet','Capulet','Nurse','Peter';
% 各个人物家族分类
classNum=[1,2,4,4,1,1,1,1,1,4,3,3,3,2,2,2,2,2,2,2];
classStr='House Montague','House Capulet','House Escalus','Other personages';
% 各个人物点位置
posXY=[-0.1598 -0.0682; 0.3117 -0.0060;-0.0389 0.4154;-0.1874 0.5155;
-0.3877 0.5691;-0.6693 0.5397;-0.8092 0.2599;-0.7522 -0.0959;
-0.5915 -0.3515;-0.4465 -0.5294;-0.2668 -0.6503;-0.0060 -0.7867;
0.2202 -0.6503; 0.4275 -0.5173; 0.5933 -0.3515; 0.7228 -0.1563;
0.8040 0.1045; 0.7642 0.3653; 0.5259 0.5622; 0.2323 0.5121];
colorList=[48,115,100;177,58,71;252,193,13;108,60,143]./255;
% =========================================================================
% 统计各个人物前后出现次数
orderList=zeros(size(strData));
for i=1:length(nameStr)
orderList(strcmpi(nameStri,strData))=i;
end
orderList(orderList==0)=[];
corrMat=zeros(length(nameStr));
for i=1:length(orderList)-1
corrMat(orderList(i),orderList(i+1))=corrMat(orderList(i),orderList(i+1))+1;
end
corrMat=corrMat+corrMat.';
% ============================================================理性的经济人,成就不了罗密欧与朱丽叶!(深刻)