python 使用pycurl的cURL Fitbit社区组,使用BeautifulSoup刮取页面,发送给Redis(写入获取数据以创建Fitbit仪表板



篇首语:本文由小常识网(小编为大家整理,主要介绍了python 使用pycurl的cURL Fitbit社区组,使用BeautifulSoup刮取页面,发送给Redis(写入获取数据以创建Fitbit仪表板相关的知识,希望对你有一定的参考价值。

from bs4 import BeautifulSoup
import pycurl
import re
import os
from urllib import urlencode
from io import BytesIO
from StringIO import StringIO
import sys
import redis
import time

class getFitbitData:

	cookieDir	= './fbcookie.txt'	#where we're storing our cookies

	#Config Redis server we're connecting to
	pool = redis.ConnectionPool( host='', port=6379,password='ifneeded',db=12 )
	redisServer = redis.Redis( connection_pool=pool )
	pipe = redisServer.pipeline()

	# 2D array group name used in Redis key, and groups fitbit URL
	groups = [["XXXX",""],["XXXX",""]]

	date = time.strftime("%Y-%m-%d")

	def __init__( self ):
		#Where we store our Unity Crap
		self.password =	'yourpass'
		self.user = 'youruser'

	def getHTML( self, groupURL, page ):
		print "authenticate"
		buffer = BytesIO()
		c = pycurl.Curl()
		c.setopt(c.SSL_VERIFYPEER, False)
		c.setopt(c.FOLLOWLOCATION, True)
		c.setopt(c.TIMEOUT, 60)
		c.setopt(c.USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0')
		c.setopt(c.URL, '')
		c.setopt(c.WRITEFUNCTION, buffer.write)
		html = str(buffer.getvalue())

		#Get hidden values for post
		if "_sourcePage" in html:
			rex = re.compile( "input type=\"hidden\" name=\"_sourcePage\" value=\"(.*?)\"")
			sourcepage = html ).groups()[0]

		if "__fp" in html:
			rex = re.compile( "input type=\"hidden\" name=\"__fp\" value=\"(.*)\"")
			fp = html ).groups()[0]

		datastuff = {'login':'Log In','disableThirdPartyLogin':'false','email':self.user,'password':self.password,'rememberMe':'true'}

		#post datastuff
		c.setopt(c.USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0')
		c.setopt(c.URL, '' )
		c.setopt(c.COOKIEJAR, self.cookieDir)
		c.setopt(c.COOKIEFILE, self.cookieDir )
		c.setopt(c.WRITEFUNCTION, buffer.write)
		c.setopt(c.FOLLOWLOCATION, True)
		c.setopt(c.POST, True)
		c.setopt(c.POSTFIELDS, urlencode( datastuff ))

		buffer = BytesIO()
		c.setopt(c.USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0')
		c.setopt(c.URL, groupURL+'/leaders?timeWindow=CURRENT_MONTH&page='+page)
		c.setopt(c.COOKIEJAR, self.cookieDir)
		c.setopt(c.COOKIEFILE, self.cookieDir )
		c.setopt(c.WRITEFUNCTION, buffer.write)
		html = str(buffer.getvalue())
		return html
		# c.close()

	def parseHTML( self, html, group ):
		soup = BeautifulSoup(html, "html.parser")
		count = 0

		# find all a hrefs with class formlink
		for leftCell in soup.find_all("div", {"class": "leaderboardCell left"}):
			for mylink in leftCell.find_all("div", {"class": "info"}):
				for link in mylink.find_all("a"):
					name = (link.get_text())
				for link in mylink.find_all("li", {"class": "stat ellipsis"}):
					t = (link.get_text())
					t = "".join(t.split())
					t = t[:-5]
					steps = int(t.replace(',', ''))
				for link in mylink.find_all("li", {"class": "average ellipsis"}):
					a = (link.get_text()[:-5])
					avg = int(a.replace(',', ''))
				print name
				print steps

				count += 1

		return count

fit =  getFitbitData()

#Run through groups if more then 25 listings on a page go to the next page
for group in fit.groups:
	html = fit.getHTML( group[1], "0" )
	listCount = fit.parseHTML( html, group[0] )
	page = 1
	while listCount == 25:
		print "run again"
		html = fit.getHTML( group[1], str(page) )
		listCount = fit.parseHTML( html, group[0] )
		page +=1

	# Calculate Average - Pull all data from redis
	# get total and start counter for every step count greater then 0
	s = 0
	d = 0
	z = fit.redisServer.zrange(group[0]+':steps',0,-1,withscores=True)
	for x in z:
		if x[1] > 0:
			s = s +x[1]
			d += 1

	# math for average
	avg = s/d

print "main done"

以上是关于python 使用pycurl的cURL Fitbit社区组,使用BeautifulSoup刮取页面,发送给Redis(写入获取数据以创建Fitbit仪表板的主要内容,如果未能解决你的问题,请参考以下文章





请问一下python里的pycurl初始化,pycurl.Curl()必须能够联网才能初始化正确吗? 我本想在局域网内部测试。
