python TextMining

Posted kingboy100

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python TextMining相关的知识,希望对你有一定的参考价值。

01.Crawling

url_request

 

 1 # -*- coding: utf-8 -*-
 2 """
 3 Created on Sun Feb 17 11:08:44 2019
 4 
 5 @author: 502-03
 6 1.anaconda Prompt??
 7 2.python -m pip install --upgrade pip
 8 3.pip install beautifulsoup4
 9 """
10 
11 import urllib.request #url ??
12 from bs4 import BeautifulSoup #html ??
13 
14 url="http://www.naver.com/index.html"
15 
16 #1.???? ?? ??
17 rst=urllib.request.urlopen(url)
18 print(rst) #<http.client.HTTPResponse object at 0x000000000E21F940>
19 
20 data_src=rst.read() #source read
21 print(data_src)
22 """
23 b‘<!doctype html>
















<html lang="ko">
<head>
24 ....
25 """
26 
27 #2.html ??:src->html??
28 html=data_src.decode(utf-8) #???
29 soup=BeautifulSoup(html,html.parser)
30 print(soup)
31 """
32 </script>
33 <title>NAVER</title>
34 </meta></meta></meta></meta></meta></head>
35 <style>
36 """
37 
38 #3.tag??  ?? 
39 link=soup.find(a) #??? ???
40 print(link)
41 """
42 <a href="#news_cast" onclick="document.getElementById(‘news_cast2‘).tabIndex = -1;
43 document.getElementById(‘news_cast2‘).focus();return false;">
44 <span>???? ????</span></a>
45 """
46 print(a tag ??,link.string)#a tag ?? ???? ????
47 
48 links=soup.find_all(a)#?? ????
49 print(a tag size,len(links))#a tag size 335
50 links_data=[] #? ???
51 for a in links:#339
52     print(a.string)
53     links_data.append(a.string)
54     
55 print("links_data",len(links_data))#links_data 341
56 print(links_data)
57 """
58 [‘???? ????‘, ‘?????? ????‘, ‘????? ????‘, ‘????? ????‘, ‘??? ????‘, 
59 ....
60 ‘??? ??‘, ‘????‘, ‘NAVER Corp.‘]
61 """

 

selector

 

  1 # -*- coding: utf-8 -*-
  2 """
  3 - ???(selector) ?? ?? ?? 
  4   -> ? ?? ???(css)
  5   -> id(#), class(.)
  6   -> select_one(‘???‘) : ?? ?? ?? 
  7   -> select(‘???‘) : ?? ? ?? ?? 
  8 """
  9 
 10 from bs4 import BeautifulSoup #html ??
 11 
 12 #1.html?? ????
 13 file=open("../data/selector.html",mode=r,encoding=utf-8) 
 14 data_src=file.read()
 15 print(data_src)
 16 """
 17 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 18 <html>
 19 <head>
 20 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
 21 <title>id/class ???, ? ???</title>
 22 <style type="text/css">
 23 """
 24 
 25 #2.html??
 26 html=BeautifulSoup(data_src,html.parser)
 27 print(html)
 28 """
 29 
 30 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 31 
 32 <html>
 33 <head>
 34 <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
 35 <title>id/class ???, ? ???</title>
 36 <style type="text/css">
 37 """
 38 
 39 #3.selector ?? ??
 40 
 41 #1) id???: selector("tab#id?"))
 42 table=html.select_one(table#tab)
 43 print(table)    
 44 """
 45 <table border="1" id="tab">
 46 <tr> <!-- 1? -->
 47 <!-- ?? ? : th -->
 48 <th id="id"> ?? </th>
 49 ...
 50 <td> [email protected] </td>
 51 </tr>
 52 </table>
 53 """   
 54 
 55 #2)id ???  >?? ??
 56 th=html.select_one(table#tab > tr > th)
 57 print(th)   #<th id="id"> ?? </th>
 58 
 59 ths=html.select(table#tab > tr > th)
 60 print(ths) 
 61 """
 62 [<th id="id"> ?? </th>, <th id="name"> ?? </th>, <th id="major"> ?? </th>, <th id="email"> ??? </th>]
 63 """
 64 
 65 print(len(ths)) #4
 66 
 67 for th in ths:#<th id="id"> ?? </th>
 68     print(th.string)
 69 """
 70  ?? 
 71  ?? 
 72  ?? 
 73  ??? 
 74 """
 75 
 76 #2) class ?? :select(‘tag.class?‘)
 77 # -5?->?? ?
 78 
 79 # (1) ??? ?? 
 80 trs=html.select(table#tab > tr.odd)
 81 print(trs)   
 82 """
 83 [<tr class="odd"> <!-- 3?(??) -->
 84 <td> 201602 </td>
 85 <td> ??? </td>
 86 <td> ???? </td>
 87 <td> [email protected] </td>
 88 </tr>, <tr class="odd"> <!-- 5? -->
 89 <td> 201604 </td>
 90 <td> ??? </td>
 91 <td> ???? </td>
 92 <td> [email protected] </td>
 93 </tr>]
 94 """
 95 tds_data=[]
 96 for tr in trs:# 2? 
 97     tds=tr.find_all(td)
 98     for td in tds:
 99         print(td.string)
100         tds_data.append(td.string)
101 """
102  201602 
103  ??? 
104  ???? 
105  [email protected] 
106  201604 
107  ??? 
108  ???? 
109  [email protected] 
110 """        
111 print(len(tds_data)) #8
112 
113 #(2) ?? ?? :tag[??=?]
114 trs=html.select(tr[class=odd])
115 print(trs)
116 """
117 [<tr class="odd"> <!-- 3?(??) -->
118 <td> 201602 </td>
119 <td> ??? </td>
120 <td> ???? </td>
121 <td> [email protected] </td>
122 </tr>, <tr class="odd"> <!-- 5? -->
123 <td> 201604 </td>
124 <td> ??? </td>
125 <td> ???? </td>
126 <td> [email protected] </td>
127 </tr>]
128 """

 

newsCrawling

 

 1 # -*- coding: utf-8 -*-
 2 """
 3 news Crawling
 4     url=‘http://media.daum.net‘
 5 """
 6 
 7 import requests  #url ?? 
 8 from bs4 import BeautifulSoup #html ??
 9 
10 url=http://media.daum.net
11 
12 #1.url ??
13 src=requests.get(url)
14 print(src) #<Response [200]>
15 
16 data_src=src.text # source text
17 
18 #2.html ??
19 html=BeautifulSoup(data_src,html.parser)
20 print(html)
21 
22 #3.select(tag[??=?])
23 #     <strong class="tit_g"><a href="http://v.media.daum.net/v/20190217083008573" class="link_txt">美, ??? ? ? ??? ?? ?? ???..韓 ?? ???</a><span class="txt_view">???</span></strong>
24 links=html.select("a[class=link_txt]")
25 print(len(links)) #102
26 
27 creawling_data=[]#????
28 cnt =0;
29 for link in links:
30     cnt+=1
31     cont=str(link.string) #?? ??
32     #print(cont)
33     print(cnt,-,cont.strip()) # ??? ,?? ,??? ??
34     creawling_data.append(cont.split())
35 
36 """
37 1 - ??? "???? ???..中 ????? ??? ?"
38 2 - "?? ????..????? ??? ? ??"
39 3 - ???? ???? ??? ??..??? ? ????
40 4 - ?·? ???? ‘??‘ ?? ???..???? ???...
41 5 - 中??? ???? ???.."?? ??? ?? ????...
42 """
43 
44 print(len(creawling_data))#102
45 
46 #?? ->text file save
47 file=open(../data/crawling_data.txt,
48      mode=w,encoding=utf-8)
49 
50 #list -> str ??
51 file.write(str(creawling_data))
52 file.close()
53 print("file save commit")

 

02.NLP

jpype_test

 

1 # -*- coding: utf-8 -*-
2 """
3 java ???? ????
4 """
5 import jpype
6 
7 path=jpype.getDefaultJVMPath()
8 print(path)#C:Program FilesJavajdk1.8.0_181jreinserverjvm.dll

 

konlpy

 

 1 # -*- coding: utf-8 -*-
 2 """
 3 konlpy??? ??
 4 pip install konlpy
 5 """
 6 
 7 from konlpy.tag import Kkma
 8 
 9 #Kkma object
10 kkma=Kkma()
11 
12 #?? -> ?? ??
13 para="??? ??? ?????.?? ??? ?? age? 28? ???."
14 
15 ex_sent=kkma.sentences(para)
16 print(ex_sent)
17 """
18 [‘??? ??? ?????.‘, ‘?? ??? ?? age? 28? ???.‘]
19 """
20 
21 #?? -> ?? ??
22 ex_nouns=kkma.nouns(para)
23 print(ex_nouns)
24 """
25 [‘???‘, ‘??‘, ‘??‘, ‘?‘, ‘???‘, ‘28‘, ‘28?‘, ‘?‘]
26 """
27 
28 #?? -> ??? ??
29 ex_pos=kkma.pos(para)
30 print(ex_pos)  #[(text,text class)]
31 """
32 [(‘???‘, ‘NNG‘), (‘??‘, ‘NNG‘), (‘?‘, ‘JKO‘), (‘??‘, ‘NNG‘), (‘?‘, ‘VCP‘), 
33 (‘???‘, ‘EFN‘), (‘.‘, ‘SF‘), (‘?‘, ‘NP‘), (‘?‘, ‘JX‘), (‘???‘, ‘NNG‘), 
34 (‘?‘, ‘VCP‘), (‘?‘, ‘ECE‘), (‘age‘, ‘OL‘), (‘?‘, ‘JX‘), (‘28‘, ‘NR‘), 
35 (‘?‘, ‘NNM‘), (‘?‘, ‘VCP‘), (‘???‘, ‘EFN‘), (‘.‘, ‘SF‘)]
36 """
37 ‘‘‘
38 ??? : ??? ??? ?? ??? ???? ??? ?? ?? 
39 NNG ?? ?? NNP ?? ?? NNB ?? ?? NR ?? NP ??? VV ?? 
40 VA ??? VX ?? ?? VCP ?? ??? VCN ?? ??? MM ??? 
41 MAG ?? ?? MAJ ?? ?? IC ??? JKS ?? ?? JKC ?? ?? 
42 JKG ??? ?? JKO ??? ?? JKB ??? ?? JKV ?? ?? 
43 JKQ ??? ?? JC ?? ?? JX ??? EP ????? EF ?? ?? 
44 EC ?? ?? ETN ??? ?? ?? ETM ??? ?? ?? XPN ?? ??? 
45 XSN ???? ??? XSV ?? ?? ??? XSA ??? ?? ??? XR ?? 
46 SF ???, ???, ??? SE ??? SS ???,???,?? 
47 SP ??,????,??,?? SO ???(??,??,??) 
48 SW ???? (??????,????) SH ?? SL ??? SN ?? 
49 NF ?????? NV ?????? NA ?????
50 ‘‘‘
51 
52 #NNG:?? ?? NNP:?? ??  NP:???
53 ex_pos2=[] #?? ??
54 for (text,text_class) in ex_pos:#(text,text class)
55     if text_class==NNG or text_class==NNP or text_class==NP:
56         ex_pos2.append(text)
57         
58 print(ex_pos2)#[‘???‘, ‘??‘, ‘??‘, ‘?‘, ‘???‘]

 

03.WordCloud

ex_nouns

 

  1 # -*- coding: utf-8 -*-
  2 """
  3 1.test file ??
  4 2.?? ?? : Kkma
  5 3.??? :??  ?? ?? ,?? ??
  6 4.word cloud???
  7 """
  8 from konlpy.tag import Kkma
  9 
 10 #object
 11 kkma=Kkma()
 12 
 13 #1.text file??
 14 file=open("../data/text_data.txt",mode=r,encoding="utf-8")
 15 docs=file.read() #text?? ??
 16 file.close()
 17 print(docs)
 18 """
 19 ??? ??? ?????. ?? ??? ??? ?????. 
 20 ??? ??? ?? ??? ???. Text mining ??? 2000? ??? ??? ????.
 21 """
 22 
 23 #1).doc -> sentence
 24 ex_sent=kkma.sentences(docs)
 25 print(len(ex_sent))#4
 26 print(ex_sent)
 27 """
 28 [‘??? ??? ?????.‘, 
 29 ‘?? ??? ??? ?????.‘, 
 30 ‘??? ??? ?? ??? ???.‘, 
 31 ‘Text mining ??? 2000? ??? ??? ????.‘]
 32 """
 33 
 34 for sent in ex_sent:
 35     print(sent)
 36 """
 37 ??? ??? ?????.
 38 ?? ??? ??? ?????.
 39 ??? ??? ?? ??? ???.
 40 Text mining ??? 2000? ??? ??? ????.
 41 """
 42 
 43 #2).docs -> nount ??
 44 ex_nouns=kkma.nouns(docs) #?? ?? ?? ??
 45 print(len(ex_nouns))#13
 46 print(ex_nouns)
 47 """
 48 [‘???‘, ‘??‘, ‘?‘, ‘???‘, ‘??‘, ‘???‘, 
 49 ‘??‘, ‘2000‘, ‘2000?‘, ‘?‘, ‘??‘, ‘??‘, ‘??‘]
 50 """
 51 
 52 from re import match
 53 
 54 #2~3.?? ?? ?? -> ??? ??(??,?? 1? ??)
 55 nouns_words =[]#list
 56 nouns_count={} #set or dict
 57 for sent in ex_sent:#???
 58     for nouns in kkma.nouns(sent):#???
 59         #??? (??,?? 1? ??)
 60         if len(str(nouns))>1 and not(match(^[0-9],nouns)):
 61             nouns_words.append(nouns)
 62             #key=word :value:count
 63             nouns_count[nouns]=nouns_count.get(nouns,0)+1
 64         
 65 print(len(nouns_words))#15->12
 66 """
 67 [‘???‘, ‘??‘, ‘???‘, ‘??‘, ‘??‘, ‘???‘,
 68  ‘??‘, ‘???‘, ‘??‘, ‘??‘, ‘??‘, ‘??‘]
 69 """
 70 
 71 print(nouns_count)
 72 """
 73 {‘???‘: 1, ‘??‘: 3, ‘???‘: 2, ‘??‘: 1, 
 74 ‘???‘: 1, ‘??‘: 1, ‘??‘: 1, ‘??‘: 1, ‘??‘: 1}
 75 """
 76 
 77 #4.word cloud???
 78 from collections import Counter
 79 
 80 #1)dict->Counter ??
 81 word_count=Counter(nouns_count)
 82 
 83 #2)top word
 84 top5=word_count.most_common(5)
 85 print(top5)
 86 """
 87 [(‘??‘, 3), (‘???‘, 2), (‘???‘, 1), (‘??‘, 1), (‘???‘, 1)]
 88 """
 89 #3)word cloud ??? :package ??
 90 
 91 
 92 import pytagcloud
 93 ‘‘‘
 94 Anaconda Prompt?? 
 95   pip install pygame
 96   pip install pytagcloud
 97   pip install simplejson
 98 ‘‘‘
 99 
100 # tag? color, size, tag ?? ?? 
101 word_count_list = pytagcloud.make_tags(top5, maxsize=80)
102 # maxsize : ?? ????
103 print(word_count_list)
104 ‘‘‘
105 [{‘color‘: (91, 34, 34), ‘size‘: 109, ‘tag‘: ‘??‘}, {‘color‘: (95, 159, 59), ‘size‘: 80, ‘tag‘: ‘???‘}, {‘color‘: (194, 214, 193), ‘size‘: 47, ‘tag‘: ‘???‘}]
106 ‘‘‘
107 
108 pytagcloud.create_tag_image(word_count_list,
109                             wordcloud.jpg, 
110                             size=(900, 600), 
111                             fontname=korean, rectangular=False)
112 ‘‘‘
113 ?? ?? ?? ??? Error ?? 
114 C:Anaconda3Libsite-packagespytagcloudfonts ????
115   1. fonts.json ?? ?? ?? 
116   [
117     {
118         "name": "korean",
119         "ttf": "malgun.ttf",
120   2. C:WindowsFonts ???? ‘?? ??‘ ?? ??/fonts ?? ????
121   3. create_tag_image(fontname=‘korean‘) ?? ??
122 ‘‘‘

 

news_wordCloud

 

 1 # -*- coding: utf-8 -*-
 2 """
 3 news crawling data file
 4     - word cloud ???
 5 """
 6 
 7 from konlpy.tag import Kkma
 8 
 9 #object ??
10 kkma =Kkma()
11 
12 #1.text file load
13 file=open("../data/crawling_data.txt",
14           encoding=utf-8)
15 crawling_data=file.read()
16 file.close()
17 print(crawling_data)
18 """
19 [[‘???‘, ‘"????‘, ‘???..中‘, ‘?????‘, ‘???‘, ‘?"‘], 
20 [‘"??‘, ‘????..?????‘, ‘???‘, ‘?‘, ‘??"‘], 
21 [‘????‘, ‘????‘, ‘???‘, ‘??..???‘, ‘?‘, ‘????‘], 
22 ...
23  [‘???‘, ‘????‘, ‘??‘, ‘???‘, ‘???‘, ‘??‘, ‘??‘, ‘??‘, ‘?????‘]]
24 """
25 
26 #2.docs-> sentences ?? 
27 ex_sent=kkma.sentences(crawling_data)
28 print(len(ex_sent)) #9
29 print(ex_sent)
30 
31 #3??->?? ??
32 # 4. text ??? : ??, 1? ?? ?? 
33 # 5. word count : dict
34 ex_nouns=[] #list
35 word_count={} #dict
36 for sent in ex_sent:# ???
37     for nouns in kkma.nouns(sent):# ??? 
38         if len(str(nouns))>1 and not(match(^[0-9],nouns)):
39             ex_nouns.append(nouns)
40             word_count[nouns]=word_count.get(nouns,0)+1
41 print(len(ex_nouns)) #439
42 print(ex_nouns)
43 print(word_count)
44 
45 # 5. Counter ?? : top10 ?? 
46 from collections import Counter
47 word_count=Counter(word_count)
48 top10=word_count.most_common(10)
49 print(top10)
50 """
51 [(‘?‘, 4), (‘???‘, 3), (‘?‘, 3), (‘?‘, 3), (‘?‘, 3), (‘?‘, 3), (‘?‘, 3), (‘???‘, 3), (‘??‘, 3), (‘?‘, 3)]
52 """
53 
54 
55 # 6. word cloud ??? 
56 import pytagcloud
57 ‘‘‘
58 Anaconda Prompt?? 
59   pip install pygame
60   pip install pytagcloud
61   pip install simplejson
62 ‘‘‘
63 # tag? color, size, tag ?? ?? 
64 word_count_list = pytagcloud.make_tags(top10, maxsize=80)
65 # maxsize : ?? ????
66 print(word_count_list)
67 ‘‘‘
68 [{‘color‘: (91, 34, 34), ‘size‘: 109, ‘tag‘: ‘??‘}, {‘color‘: (95, 159, 59), ‘size‘: 80, ‘tag‘: ‘???‘}, {‘color‘: (194, 214, 193), ‘size‘: 47, ‘tag‘: ‘???‘}]
69 ‘‘‘
70 
71 pytagcloud.create_tag_image(word_count_list,
72                             news_wordcloud.jpg, 
73                             size=(900, 600), 
74                             fontname=korean, rectangular=False)
75 ‘‘‘
76 ?? ?? ?? ??? Error ?? 
77 C:Anaconda3Libsite-packagespytagcloudfonts ????
78   1. fonts.json ?? ?? ?? 
79   [
80     {
81         "name": "korean",
82         "ttf": "malgun.ttf",
83   2. C:WindowsFonts ???? ‘?? ??‘ ?? ??/fonts ?? ????
84   3. create_tag_image(fontname=‘korean‘) ?? ??
85 ‘‘‘

 

04.SparseMatrix

TfidfVectorizer

 

 1 # -*- coding: utf-8 -*-
 2 """
 3 TfidfVectorizer ?? : Tfidf(??? ?? ??) ?? ?? ???
 4  1. ?? ??? : ?? -> ??(word) 
 5  2. ?? ??(word dict) : {word : ????}
 6  3. ?? ??(sparse matrix) : ?? ????? ?? ???(TF, TFiDF)
 7     1) TF : ?? ????? -> ??? ??(??? ?? ?? ??.)
 8     2) TFiDF : ?? ????? x ?? ?????? ??(?? ???)  
 9        -> TFiDF = tf(d, t) x log(n/df(t))
10 """
11 
12 from sklearn.feature_extraction.text import TfidfVectorizer
13 
14 #??
15 sentences = [
16     "Mr. Green killed Colonel Mustard in the study with the candlestick. Mr. Green is not a very nice fellow.",
17     "Professor Plum has a green plant in his study.",
18     "Miss Scarlett watered Professor Plum‘s green plant while he was away from his office last week."
19 ]
20 
21 #1.?? ???:??->??(word)
22 tfidf_fit=TfidfVectorizer().fit(sentences)
23 print(tfidf_fit)#object info
24 """
25 TfidfVectorizer(analyzer=‘word‘, binary=False, decode_error=‘strict‘,
26         dtype=<class ‘numpy.int64‘>, encoding=‘utf-8‘, input=‘content‘,
27         lowercase=True, max_df=1.0, max_features=None, min_df=1,
28         ngram_range=(1, 1), norm=‘l2‘, preprocessor=None, smooth_idf=True,
29         stop_words=None, strip_accents=None, sublinear_tf=False,
30         token_pattern=‘(?u)\b\w\w+\b‘, tokenizer=None, use_idf=True,
31         vocabulary=None)
32 """
33 
34 #2.?? ??(word dict) : {word:????}
35 voca=tfidf_fit.vocabulary_
36 print(word size=,len(voca))#word size= 31
37 print(voca)# ‘mr‘: 14,   #14? ????
38 """
39 {‘mr‘: 14, ‘green‘: 5, ‘killed‘: 11, ‘colonel‘: 2, ‘mustard‘: 15, ‘in‘: 9,
40  ‘the‘: 24, ‘study‘: 23, ‘with‘: 30, ‘candlestick‘: 1, ‘is‘: 10, ‘not‘: 17, 
41  ‘very‘: 25, ‘nice‘: 16, ‘fellow‘: 3, ‘professor‘: 21, ‘plum‘: 20, ‘has‘: 6,
42  ‘plant‘: 19, ‘his‘: 8, ‘miss‘: 13, ‘scarlett‘: 22, ‘watered‘: 27, ‘while‘: 29, 
43  ‘he‘: 7, ‘was‘: 26, ‘away‘: 0, ‘from‘: 4, ‘office‘: 18, ‘last‘: 12, ‘week‘: 28}
44 """
45 
46 #??? ????:word embedding
47 
48 #3.?? ??(sparse matrix) :text:?? DTM(?:D ,?:T)
49 tfidf=TfidfVectorizer()#object
50 sparse_tfidf=tfidf.fit_transform(sentences)
51 print(type(sparse_tfidf))#<class ‘scipy.sparse.csr.csr_matrix‘>
52 print(sparse_tfidf.shape)#DTM=(3?(Doc), 31?(Term))
53 print("1.scipy.sparse.matrix")
54 print(sparse_tfidf)
55 """
56   (row:doc,col:term) ??? (weight)=Tfidf
57   (0, 14)       0.4411657657527482:‘mr‘
58   (0, 5)        0.26055960805891015:‘green‘
59   (1, 5)        0.2690399207469689  :‘green‘
60   (1, 8)        0.34643788271971976
61   (2, 5)        0.15978698032384395
62   (2, 21)       0.2057548299742193
63   (2, 20)       0.2057548299742193
64   ...
65 """
66 
67 print("2.numpy sparse.matrix")
68 #scipy->numpy ???
69 tfidf_arr=sparse_tfidf.toarray()
70 print(tfidf_arr.shape) #(3, 31)
71 print(type(tfidf_arr))#<class ‘numpy.ndarray‘>
72 print(tfidf_arr)
73 """
74 [[0.         0.22058288 0.22058288 0.22058288 0.         0.26055961
75   0.         0.         0.         0.16775897 0.22058288 0.22058288
76   0.         0.         0.44116577 0.22058288 0.22058288 0.22058288
77   0.         0.         0.         0.         0.         0.16775897
78   0.44116577 0.22058288 0.         0.         0.         0.
79   0.22058288]
80  [0.         0.         0.         0.         0.         0.26903992
81   0.45552418 0.         0.34643788 0.34643788 0.         0.
82   0.         0.         0.         0.         0.         0.
83   0.         0.34643788 0.34643788 0.34643788 0.         0.34643788
84   0.         0.         0.         0.         0.         0.
85   0.        ]
86  [0.27054288 0.         0.         0.         0.27054288 0.15978698
87   0.         0.27054288 0.20575483 0.         0.         0.
88   0.27054288 0.27054288 0.         0.         0.         0.
89   0.27054288 0.20575483 0.20575483 0.20575483 0.27054288 0.
90   0.         0.         0.27054288 0.27054288 0.27054288 0.27054288
91   0.        ]]
92 """
93 
94 """
95 1.scipy sparse matrix
96   -> tensorflow model
97 2.numpy sparse matrix
98   -> sklean model
99 """

 

以上是关于python TextMining的主要内容,如果未能解决你的问题,请参考以下文章

python 有用的Python代码片段

Python 向 Postman 请求代码片段

python [代码片段]一些有趣的代码#sort

使用 Python 代码片段编写 LaTeX 文档

python 机器学习有用的代码片段

python 代码片段和解决方案