查排名
Posted simadongyang
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了查排名相关的知识,希望对你有一定的参考价值。
#!/usr/bin/env python
# -*- coding:utf-8 -*-
from selenium.common.exceptions import TimeoutException
import os, time, random
from multiprocessing import Pool
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
from pyquery import PyQuery as pq
import pymysql
import time
import asdl
import random
import requests
import urllib.request
from urllib import parse
def get_platform(url):
url_index = url.index(‘://‘)
url_index += 3
new_url = url[url_index:]
url_index = new_url.index(‘/‘)
platform = new_url[:url_index]
return platform
def get_url(url):
resp=urllib.request.urlopen(url)
data=resp.read().decode(‘utf-8‘)
data_index = data.lower().find(‘url=‘)
if data_index != -1:
data_index += 4
new_data = data[data_index:]
data_index = new_data.find(‘"‘)
url = new_data[:data_index]
return url
def saveData(sql):
db = pymysql.connect(host="47.94.36.26",user="seo",passwd=‘djAcfKNHxF‘,db=‘seo‘,charset=‘utf8‘)
cursor = db.cursor(cursor=pymysql.cursors.DictCursor)
try:
cursor.execute(sql)
db.commit()
print(‘ok‘)
except:
db.rollback()
print(‘error‘)
def chrom(data):
words = data[‘words‘]
uid = data[‘uid‘]
rule = data[‘rule‘]
author = data[‘author‘]
#拼接url
url = ‘https://m.baidu.com/s?‘
dict1 ={‘word‘: words}
url_data = parse.urlencode(dict1)
url = url + url_data
#百度贴吧bug
headers = {
‘User-Agent‘:‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/63.0.3239.108 Safari/537.36‘,
‘Cookie‘:‘gr_user_id=1f9ea7ea-462a-4a6f-9d55-156631fc6d45; bid=vPYpmmD30-k; ll="118282"; ue="codin; __utmz=30149280.1499577720.27.14.utmcsr=douban.com|utmccn=(referral)|utmcmd=referral|utmcct=/doulist/240962/; __utmv=30149280.3049; _vwo_uuid_v2=F04099A9dd; viewed="27607246_26356432"; ap=1; ps=y; push_noty_num=0; push_doumail_num=0; dbcl2="30496987:gZxPfTZW4y0"; ck=13ey; _pk_ref.100001.8cb4=%5B%22%22%2C%22%22%2C1515153574%2C%22https%3A%2F%2Fbook.douban.com%2Fmine%22%5D; __utma=30149280.833870293.1473539740.1514800523.1515153574.50; __utmc=30149280; _pk_id.100001.8cb4=255d8377ad92c57e.1473520329.20.1515153606.1514628010.‘
}
res = requests.get(url, headers = headers)
data = res.text
#获取数据
doc = pq(data)
results = doc(doc.html()).find(‘#results‘).children(‘div‘)
#分析数据
n = 0
for i in results:
adv = doc(i).find("span:contains(‘广告‘)")
if not adv:
n += 1
doc_i = doc(i)
title_find = doc_i.find(":contains(‘"+rule+"‘)").text()
if title_find :
#标题
title = doc_i.find("h3").text()
try:
print(‘标题:‘+title)
url = doc_i.find("a").attr(‘href‘)
url = get_url(url)
platform = get_platform(url)
ctime = int(time.time())
ranking = n
sql = "insert into ganen_keys_results(words,ranking,title,url,rule,platform,uid,author,create_time) "
sql += " values(‘%s‘,‘%s‘,‘%s‘,‘%s‘,‘%s‘,‘%s‘,‘%s‘,‘%s‘,‘%s‘)"%(words,ranking,title,url,rule,platform,uid,author,ctime)
saveData(sql)
except:
print(‘error:‘+title)
if __name__==‘__main__‘:
db = pymysql.connect(host="47.94.36.26",user="seo",passwd=‘djAcfKNHxF‘,db=‘seo‘,charset=‘utf8‘)
cursor = db.cursor(cursor=pymysql.cursors.DictCursor)
author = ‘张欢‘
print(‘当前的用户为:‘+author)
sql = "select * from ganen_keys_cover where author = ‘%s‘"%(author)
cursor.execute(sql)
cover = cursor.fetchall()
stime = int(time.time())
if cover:
p = Pool(20)
for i in cover:
p.apply_async(chrom,args=(i,))
p.close()
p.join()
else:
print(‘无数据‘)
T = 0
etime = int(time.time())
ctime = etime - stime
print(‘运行时间:‘+str(ctime))
以上是关于查排名的主要内容,如果未能解决你的问题,请参考以下文章