python [Python] Steam게임평크롤링코드
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python [Python] Steam게임평크롤링코드相关的知识,希望对你有一定的参考价值。
import urllib
import urllib.request
import urllib.parse
import bs4
import re
import os
import time
from concurrent.futures import ThreadPoolExecutor
def deleteTag(x):
return re.sub("<[^>]*>", "", x)
def getComments(code):
def makeArgs(code, page):
params = {
'code': code,
'type': 'after',
'isActualPointWriteExecute': 'false',
'isMileageSubscriptionAlready': 'false',
'isMileageSubscriptionReject': 'false',
'page': page
}
return urllib.parse.urlencode(params)
def innerHTML(s, sl=0):
ret = ''
for i in s.contents[sl:]:
if i is str:
ret += i.strip()
else:
ret += str(i)
return ret
def fText(s):
if len(s): return innerHTML(s[0]).strip()
return ''
retList = []
colSet = set()
print("Processing: %d" % code)
page = 1
while 1:
try:
f = urllib.request.urlopen(
"http://movie.naver.com/movie/bi/mi/pointWriteFormList.nhn?" + makeArgs(code, page))
data = f.read().decode('utf-8')
except:
break
soup = bs4.BeautifulSoup(re.sub("&#(?![0-9])", "", data), "html.parser")
cs = soup.select(".score_result li")
if not len(cs): break
for link in cs:
try:
url = link.select('.score_reple em a')[0].get('onclick')
except:
print(page)
print(data)
raise ""
m = re.search('[0-9]+', url)
if m:
url = m.group(0)
else:
url = ''
if url in colSet: return retList
colSet.add(url)
cat = fText(link.select('.star_score em'))
cont = fText(link.select('.score_reple p'))
cont = re.sub('<span [^>]+>.+?</span>', '', cont)
retList.append((url, cat, cont))
page += 1
return retList
def fetch(i):
outname = 'comments/%d.txt' % i
try:
if os.stat(outname).st_size > 0: return
except:
None
rs = getComments(i)
if not len(rs): return
f = open(outname, 'w', encoding='utf-8')
f.write('INSERT IGNORE INTO movie VALUES ')
for idx, r in enumerate(rs):
if idx: f.write(',\n')
f.write("(%d,%s,%s,'%s')" % (i, r[0], r[1], r[2].replace("'", "''").replace("\\", "\\\\")))
f.write(';\n')
f.close()
time.sleep(1)
with ThreadPoolExecutor(max_workers=5) as executor:
# 영화 고유 ID값의 범위를 몰라서 대략 아래처럼 잡았습니다.
for i in range(10000, 200000):
executor.submit(fetch, i)
출처: http://bab2min.tistory.com/556 [나의 큰 O는 logx야..]
출처: http://bab2min.tistory.com/556 [나의 큰 O는 logx야..]
以上是关于python [Python] Steam게임평크롤링코드的主要内容,如果未能解决你的问题,请参考以下文章
markdown 私人네임네임맹#python #tutorial
markdown 네임스페이스와스코프#python #tutorial
python爬虫 爬取steam热销游戏
利用Python白玩steam游戏,我是专业的
4-Python游戏编程-贪吃蛇(教程+源码)steam少儿编程课件
2-Python游戏编程-拼图游戏(教程+源码)steam少儿编程课件