python 使用pycurl的cURL Fitbit社区组,使用BeautifulSoup刮取页面,发送给Redis(写入获取数据以创建Fitbit仪表板
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python 使用pycurl的cURL Fitbit社区组,使用BeautifulSoup刮取页面,发送给Redis(写入获取数据以创建Fitbit仪表板相关的知识,希望对你有一定的参考价值。
#!/usr/bin/python
from bs4 import BeautifulSoup
import pycurl
import re
import os
from urllib import urlencode
from io import BytesIO
from StringIO import StringIO
import sys
import redis
import time
class getFitbitData:
cookieDir = './fbcookie.txt' #where we're storing our cookies
#Config Redis server we're connecting to
pool = redis.ConnectionPool( host='0.0.0.0', port=6379,password='ifneeded',db=12 )
redisServer = redis.Redis( connection_pool=pool )
pipe = redisServer.pipeline()
# 2D array group name used in Redis key, and groups fitbit URL
groups = [["XXXX","https://www.fitbit.com/group/XXXXXX"],["XXXX","https://www.fitbit.com/group/XXXXXX"]]
date = time.strftime("%Y-%m-%d")
def __init__( self ):
#Where we store our Unity Crap
self.password = 'yourpass'
self.user = 'youruser'
def getHTML( self, groupURL, page ):
print "authenticate"
buffer = BytesIO()
c = pycurl.Curl()
c.setopt(c.SSL_VERIFYPEER, False)
c.setopt(c.FOLLOWLOCATION, True)
c.setopt(c.TIMEOUT, 60)
c.setopt(c.USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0')
c.setopt(c.URL, 'https://www.fitbit.com/login')
c.setopt(c.WRITEFUNCTION, buffer.write)
c.perform()
html = str(buffer.getvalue())
#Get hidden values for post
if "_sourcePage" in html:
rex = re.compile( "input type=\"hidden\" name=\"_sourcePage\" value=\"(.*?)\"")
sourcepage = rex.search( html ).groups()[0]
if "__fp" in html:
rex = re.compile( "input type=\"hidden\" name=\"__fp\" value=\"(.*)\"")
fp = rex.search( html ).groups()[0]
datastuff = {'login':'Log In','disableThirdPartyLogin':'false','email':self.user,'password':self.password,'rememberMe':'true'}
#post datastuff
c.setopt(c.USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0')
c.setopt(c.URL, 'https://www.fitbit.com/login' )
c.setopt(c.COOKIEJAR, self.cookieDir)
c.setopt(c.COOKIEFILE, self.cookieDir )
c.setopt(c.WRITEFUNCTION, buffer.write)
c.setopt(c.FOLLOWLOCATION, True)
c.setopt(c.POST, True)
c.setopt(c.POSTFIELDS, urlencode( datastuff ))
c.perform()
buffer.flush()
buffer = BytesIO()
c.setopt(c.USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0')
c.setopt(c.URL, groupURL+'/leaders?timeWindow=CURRENT_MONTH&page='+page)
c.setopt(c.COOKIEJAR, self.cookieDir)
c.setopt(c.COOKIEFILE, self.cookieDir )
c.setopt(c.WRITEFUNCTION, buffer.write)
c.perform()
html = str(buffer.getvalue())
return html
# c.close()
def parseHTML( self, html, group ):
soup = BeautifulSoup(html, "html.parser")
count = 0
# find all a hrefs with class formlink
for leftCell in soup.find_all("div", {"class": "leaderboardCell left"}):
for mylink in leftCell.find_all("div", {"class": "info"}):
for link in mylink.find_all("a"):
name = (link.get_text())
for link in mylink.find_all("li", {"class": "stat ellipsis"}):
t = (link.get_text())
t = "".join(t.split())
t = t[:-5]
steps = int(t.replace(',', ''))
for link in mylink.find_all("li", {"class": "average ellipsis"}):
a = (link.get_text()[:-5])
avg = int(a.replace(',', ''))
print name
print steps
self.redisServer.zadd("all:steps",name,steps)
self.redisServer.zadd(group+":steps",name,steps)
count += 1
return count
fit = getFitbitData()
#Run through groups if more then 25 listings on a page go to the next page
for group in fit.groups:
html = fit.getHTML( group[1], "0" )
listCount = fit.parseHTML( html, group[0] )
page = 1
while listCount == 25:
print "run again"
html = fit.getHTML( group[1], str(page) )
listCount = fit.parseHTML( html, group[0] )
page +=1
# Calculate Average - Pull all data from redis
# get total and start counter for every step count greater then 0
s = 0
d = 0
z = fit.redisServer.zrange(group[0]+':steps',0,-1,withscores=True)
for x in z:
if x[1] > 0:
s = s +x[1]
d += 1
# math for average
avg = s/d
fit.redisServer.delete(group[0]+":avg:"+fit.date)
fit.redisServer.sadd(group[0]+":avg:"+fit.date,avg)
print "main done"
以上是关于python 使用pycurl的cURL Fitbit社区组,使用BeautifulSoup刮取页面,发送给Redis(写入获取数据以创建Fitbit仪表板的主要内容,如果未能解决你的问题,请参考以下文章
请问一下python里的pycurl初始化,pycurl.Curl()必须能够联网才能初始化正确吗? 我本想在局域网内部测试。