可以放在服务器上当作计划任务来运行,监测用户某一个时间段内的微博动态,并使用短信平台发送友情提示信息。
from datetime import datetime, timedelta
import requests
import sqlite3
from bs4 import BeautifulSoup
from functools import reduce
import re
from uuid import uuid3, NAMESPACE_DNS
import random
import math
import os
import json
# from selenium import webdriver
# from selenium.webdriver.common.keys import Keys
import time
# from selenium.webdriver.support.wait import WebDriverWait
"""
因为设置的自动运行的时间是从23点到次日的1点,所以方法中用存文件的方式来判断是否应该发短信有问题。
需要判断,当时间为0-1之间时,将日期-1再进行操作。
作者:[email protected]
环境:python3, Windows 7 or later, server or personal version.
"""
cwd = ‘E:\pytest‘
os.chdir(cwd)
cwd = os.getcwd()
log_file = ‘weibo_notification.log‘
# [[uid, phone], [uid, phone], [uid, phone]]
weibo_id = [[‘your_weibo_id‘, ‘your_relative_user_phone_number‘], [‘your_weibo_id‘, ‘your_relative_user_phone_number‘]]
api = ‘http://service.weibo.com/widget/widget_blog.php?uid=‘
db_path = os.path.join(cwd, ‘weibo.db‘)
# ex: 123ac724f41234ccfa1234aec123432b (fake)
API_KEY = ‘your_yunpian_sms_key‘
# ex: 2124387 (fake)
TEMPLATE_ID = ‘your_yunpian_int_template_id‘
SMS_API = ‘https://sms.yunpian.com/v2/sms/tpl_single_send.json‘
# today = datetime.today()
# 时间间隔阈值
THRESHOLD = 5
EMOJI = [‘(′ ▽〃)‘, ‘( ′o)‘, ‘(′ェ)‘, ‘(′ε )‘, ‘(=′ー)‘, ‘( ′θ)‘, ‘(′○)‘, ‘( ′-)‘, ‘(′?=)‘, ‘(′▽)‘, ‘(′ノ0)‘, ‘( ′ロ )‘,
‘(′ ︿ )‘, ‘(′?)‘, ‘(′m) ‘, ‘(′0ノ*)‘, ‘(@。ε。@)‘, ‘(=′?=) ‘, ‘(●′ω●)‘, ‘(′~●)‘, ‘(′へ、 )‘, ‘(〃′o)‘, ‘( ;′⌒`)‘,
‘(????)‘, ‘(??`ω′?)‘, ‘(σ`?д?)‘, ‘(o?ω?o)‘, ‘( ???)‘, ‘(???)‘, ‘(〃?o?〃)‘, ‘(^??)‘, ‘(。???。)‘, ‘(?0?)‘,
‘(?ε?●)‘, ‘(??ω?)‘, ‘(。?д?。)‘, ‘(???)‘, ‘(?□?、)‘, ‘(?-?。)‘, ‘(?▽?。)‘, ‘(??。)‘, ‘(?O?。)‘, ‘(ノ△?。)‘, ‘(@???)‘,
‘(??)‘, ‘(?ε?;)‘, ‘(? o ?)‘, ‘(?ェ?o)‘, ‘(′???『)‘, ‘(ΘΘ)‘, ‘(Θ~Θ〃)‘, ‘(ΘoΘ)‘, ‘(ΘェΘ)‘, ‘(Θ?Θ#)‘, ‘(ΘдΘ;)‘,
‘(Θ皿Θメ)‘, ‘(ΘーΘ*)‘, ‘(Θ0Θ●)‘, ‘(Θ▽Θ)‘, ‘(ΘεΘ◎)‘, ‘(Θ◇Θ。)‘, ‘(ΘへΘ)‘, ‘(Θ?Θ=)‘, ‘(Θ、Θ)‘, ‘(Θ△Θ@)‘, ‘(Θ3Θ)‘,
‘(°ー°〃)‘, ‘(#°Д°)‘, ‘(。□。)‘, ‘(。?。)‘, ‘(。?^)‘, ‘( ^ー。)‘, ‘(@。ー。 @)‘, ‘(。?^☆)‘, ‘(o。◇。)‘, ‘( 。 ▽ 。)‘, ‘(#。ε。#)‘,
‘(。?^d)‘, ‘(。?。;)‘, ‘(。皿。メ)‘, ‘(* 。 3 ^)‘, ‘(〃。 o 。〃)‘, ‘( °◇ °)‘, ‘( 。⊿。)‘, ‘(°□°;)‘, ‘( ロ)‘, ‘(。Д。 ;)‘,
‘(*。ノO。)‘, ‘(; 。。)‘, ‘(><)‘, ‘(;><)‘, ‘(><)‘, ‘(>.<)‘,
‘(>o<)‘, ‘(>O<)‘, ‘(o>▽<)‘, ‘(>◇< )‘, ‘(>▽<)‘, ‘(;>△<)‘, ‘( >з<)‘, ‘(o>ェ<)‘, ‘(>д<)‘, ‘(>皿<)‘, ‘(><、)‘,
‘(/<。)‘, ‘(>。;)‘, ‘(>。ヘ)‘, ‘(ノ<)‘, ‘(>。☆)‘, ‘(>y<;)‘,
‘(-ω- )‘, ‘(+ω+)‘, ‘(ω )‘, ‘( ^ω^)‘, ‘(;ω;)‘, ‘(。ω。)‘, ‘(『ω′)‘, ‘(?ω?=)‘, ‘( ̄ω ̄)‘, ‘(〃ω〃)‘, ‘(≧ω≦)‘, ‘(。?ω?)‘,
‘(=?ω?=)‘, ‘( >ω<)‘, ‘(′?ω?`)‘, ‘(/ω\)‘, ‘(/ω?\)‘, ‘(/?ω?\)‘, ‘(ΘωΘ)‘, ‘(ΦωΦ)‘]
def log(s, t):
"""
运行日志
:param s:任意字符串
:param t:1:INFO, 2:ERROR
:return:
"""
global log_file, cwd
date_ = datetime.today().ctime()
types = [‘ ‘, ‘[INFO]\t‘, ‘[ERROR]\t‘]
with open(os.path.join(cwd, log_file), ‘at‘) as f:
info = ‘ - ‘.join((types[int(t)], date_, s, ‘\n‘))
f.write(info)
print(info)
def parse_weibo(uid, mobile):
"""
前期使用Requests库,比较简单。
根据微博WidgetURL解析微博数据,生成器
repost: 转发理由
type_: 0 原创, 1 转发
0 -> False
return: {id:id, content:content, repost:repost, time:time_, date:date_, type:type_, imgs:imgs}
"""
global api
url = api + uid
log(url, 1)
ua = ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/63.0.3239.108 Safari/537.36‘
header = {‘User-Agent‘: ua}
res = requests.get(url, headers=header)
res.encoding = ‘utf-8‘
soup = BeautifulSoup(res.text, ‘html5lib‘)
weibos = soup(class_=‘wgtCell‘)
nick_name = soup(class_=‘userNm‘)[0].text
global THRESHOLD
for weibo in weibos:
plain_txt = weibo(class_=‘wgtCell_txt‘, limit=1)
plain = reduce(lambda x, y: x.text + y.text, plain_txt)
plains = str(plain).split(‘<br/>‘)
imgs = weibo(‘img‘)
t = weibo(class_=‘wgtCell_tm‘, limit=1)
img = imgs[0][‘src‘] if len(imgs) > 0 else ‘‘
contents = plain_txt[0].contents
type_ = 0 if str(contents[0]).startswith(‘转发了‘) else 1
repost = ‘‘ if type_ else plains[-1]
content = reduce(lambda x, y: str(x).strip() + str(y).strip(), contents) if type_ else ‘‘.join(plains[:-1])
t_span = str(t[0](‘a‘)[0].text)
if t_span.startswith(‘今天‘):
time_ = t_span.split(‘ ‘)[1]
date_ = datetime.now().date().strftime(‘%Y-%m-%d‘)
elif t_span.endswith(‘前‘):
if int(re.findall(‘(\d+)‘, t_span)[0]) < THRESHOLD:
send_notification(mobile)
else:
log(‘Less than threshold.‘ + str(THRESHOLD) + ‘\t‘ + str(uid), 1)
time_ = datetime.now() - timedelta(minutes=int(re.findall(‘(\d+)‘, t_span)[0]))
time_ = str(time_.hour) + ‘:‘ + str(time_.minute)
date_ = datetime.now().date().strftime(‘%Y-%m-%d‘)
else:
date_, time_ = t_span.split(‘ ‘)
m, d, _ = re.split(‘\D‘, date_)
date_ = (int(datetime.now().year), int(m), int(d))
date_ = reduce(lambda x, y: str(x) + ‘-‘ + str(y), date_)
wid = gid(date_ + time_ + content + str(uid)).__str__()
result = (content, repost, time_, date_, type_, img, uid, wid, nick_name)
yield result
def check_like(uid, mobile):
"""
这里必须使用selenium库来解析微博用户点赞页面
使用selenium需要按照报错信息下载对应的浏览器的webdriver并放到当前工作目录。
:return:
"""
global cwd, weibo_id
log(‘ID: ‘ + uid + ‘=====>‘, 1)
browser = webdriver.Firefox()
retry = 3
url = ‘https://weibo.com/‘ + uid + ‘/like‘
browser.get(url)
browser.implicitly_wait(40)
while retry > 0:
retry -= 1
try:
browser.find_element_by_class_name(‘WB_tab_a‘).send_keys(Keys.DOWN)
source = browser.page_source
log(‘Successfully get HTML‘, 1)
break
except Exception as e:
log(e.__str__(), 2)
time.sleep(2)
now = datetime.now()
year, month, day = now.year, now.month, now.day
# 如果时间是0-3点,日期-1.
hour = now.hour
if 0 <= hour <= 4:
day -= 1
# ============================
day = ‘0‘ + str(day) if day < 10 else day
month = ‘0‘ + str(month) if month < 10 else month
base_file = str(year) + str(month) + str(day) + ‘_‘ + uid + ‘_like.txt‘
# base_file = os.path.join(cwd, base_file)
log(base_file, 1)
# 获取当前点赞的微博数量
finds = re.findall(r‘共(\d+)条‘, source)
try:
like_num = int(finds.pop().strip())
log(‘Success: HTML‘, 1)
except Exception as e:
log(e.__str__(), 2)
log(‘获取HTML失败。‘, 2)
log(source, 2)
return 0
log(‘Liked Number: ‘ + str(like_num), 1)
# 存一个当天的base文件,记录从23点开始的点赞数量。
# 当天第一次运行,存入点赞数量
if os.path.exists(base_file):
with open(base_file, ‘rt‘) as f:
last_like_num = f.readline().strip()
log(‘Liked number: ‘ + str(like_num), 1)
else:
last_like_num = None
log(‘No exist like num‘, 1)
with open(base_file, ‘wt‘) as f:
log(‘Last like num: ‘ + str(last_like_num), 1)
# 如果存了上一次的数字
flag = False
if last_like_num:
last_like_num = int(last_like_num)
if like_num > last_like_num:
flag = True
else:
flag = False
like_num = last_like_num
f.write(str(like_num))
log(str(like_num), 1)
if flag:
send_notification(mobile)
log(‘Sent Msg to ‘ + mobile, 1)
else:
log(‘Not Send Msg.‘, 1)
browser.close()
def deposit_weibo(item):
"""
存储丢进来的微博
CREATE TABLE weibo (
id INTEGER PRIMARY KEY AUTOINCREMENT,
nickname TEXT NOT NULL,
content TEXT,
repost TEXT,
time DATETIME NOT NULL,
date DATE NOT NULL,
type INT NOT NULL,
imgs TEXT,
uid INT NOT NULL,
wid TEXT NOT NULL
UNIQUE
);
:param item: 微博内容 tuple
:return: 存储结果,True/False
"""
conn = sqlite3.connect(db_path)
c = conn.cursor()
try:
c.execute(
‘INSERT INTO weibo (content, repost, time, date, type, imgs, uid, wid, nickname) VALUES (?,?,?,?,?,?,?,?,?)‘,
item)
# 微博wid相同,与定义相悖,表示已经存储过了,就不再重复存储。
except Exception as e:
del e
conn.commit()
conn.close()
return False
conn.commit()
conn.close()
return True
def gid(s):
"""
根据 微博的一些文本、时间戳、用户ID 生成微博UUID。这与SQLITE3 DDL中的wid unique一致。
"""
return uuid3(NAMESPACE_DNS, s)
def send_notification(mobile):
"""
给一个人发短信,这里使用的是云片网的模板单条发送的api
"""
today = datetime.today().now()
year, month, day, hour = today.year, today.month, today.day, today.hour
if 3 >= hour >= 0:
day -= 1
fname = str(year) + str(month) + str(day) + ‘_‘ + mobile + ‘.txt‘
if os.path.exists(fname):
log(‘Already sent sms, do nothing.‘, 1)
else:
with open(fname, ‘at‘) as f:
global EMOJI, API_KEY, TEMPLATE_ID, SMS_API
url = SMS_API
key = API_KEY
tpl_id = TEMPLATE_ID
# 这个要和你在短信平台上设置的模板一致。只需要组合成一段文本。最好的方式是urlencode()一下
tpl_value = ‘#emoji#=‘ + EMOJI[math.floor(random.random() * len(EMOJI))] + ‘么么哒~??‘
params = {‘apikey‘: key, ‘tpl_id‘: tpl_id, ‘tpl_value‘: tpl_value, ‘mobile‘: mobile}
res = requests.post(url, data=params)
log(json.dumps(res.json()), 1)
print(res.json())
log(‘Sent msg to ‘ + mobile, 1)
json.dump(res.json(), f)
def main():
log(‘==========================================‘, 0)
for u in weibo_id:
uid = u[0]
mobile = u[1]
# if mobile != ‘13477692921‘:
# continue
log(‘Mobile: ‘ + mobile + ‘ UID: ‘ + uid, 1)
# check_like(uid, mobile)
items = 0
for weibo in parse_weibo(uid, mobile):
if deposit_weibo(weibo):
items += 1
log(‘Found new items: ‘ + str(items), 1)
if __name__ == ‘__main__‘:
main()