词云代码
Posted 学习随笔
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了词云代码相关的知识,希望对你有一定的参考价值。
1 import sys 2 reload(sys) 3 sys.setdefaultencoding(‘utf-8‘) 4 5 from os import path 6 from PIL import Image 7 import numpy as np 8 import matplotlib.pyplot as plt 9 10 from wordcloud import WordCloud 11 import jieba 12 d = path.dirname(__file__) 13 14 stopWordFile = u‘stopwords.txt‘ 15 stopWordList = [] 16 for L in open(path.join(d , stopWordFile)).readlines(): 17 stopWordList.append(L.strip().decode(‘utf-8‘)) 18 stopWordList = set(stopWordList) 19 20 new = ‘words.txt‘ 21 22 text = open(path.join( d , new )).read().strip(‘\r‘) 23 wordDict = {} 24 for w in jieba.cut(text): 25 if w not in stopWordList: 26 wordDict.setdefault(w , 0) 27 wordDict[w] += 1 28 29 maskImg = ‘china.jpeg‘ 30 alice_mask = np.array( Image.open(path.join(d , maskImg))) 31 32 wc = WordCloud(background_color = ‘white‘, max_words = 2000 , mask = alice_mask , 33 font_path = path.join(d , ‘msyh.ttf‘)) 34 wc.generate_from_frequencies(wordDict) 35 36 wc.to_file(path.join(d, "example.png")) 37 38 # show 39 plt.imshow(wc, interpolation=‘bilinear‘) 40 plt.axis("off") 41 plt.figure() 42 plt.imshow(alice_mask, cmap=plt.cm.gray, interpolation=‘bilinear‘) 43 plt.axis("off") 44 plt.show()
以上是关于词云代码的主要内容,如果未能解决你的问题,请参考以下文章