Tumblr标记列表生成器

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Tumblr标记列表生成器相关的知识,希望对你有一定的参考价值。

Tumblr taglist generator returns your Tumblog tags under JSON format. Written in Python and utilized Google App Engine.
  1. import cgi
  2.  
  3. import md5
  4.  
  5. import time
  6.  
  7. import datetime
  8.  
  9. import math
  10.  
  11. from xml.dom import minidom
  12.  
  13. import urllib
  14.  
  15. import sys
  16.  
  17.  
  18.  
  19. from google.appengine.api import users
  20.  
  21. from google.appengine.ext import webapp
  22.  
  23. from google.appengine.ext.webapp.util import run_wsgi_app
  24.  
  25. from google.appengine.ext import db
  26.  
  27. from google.appengine.api import urlfetch
  28.  
  29. from django.utils import simplejson as json
  30.  
  31.  
  32.  
  33.  
  34.  
  35. # Database Classes
  36.  
  37. class TumblrCache(db.Model):
  38.  
  39. cache_id = db.StringProperty(required=True)
  40.  
  41. content = db.StringProperty(multiline=True)
  42.  
  43. date = db.DateTimeProperty(auto_now_add=True)
  44.  
  45.  
  46.  
  47. class Flush(webapp.RequestHandler):
  48.  
  49. def get(self):
  50.  
  51. self.response.headers['Content-Type'] = 'text/plain'
  52.  
  53. w = self.response.out
  54.  
  55.  
  56.  
  57. query = TumblrCache.all()
  58.  
  59. for q in query:
  60.  
  61. q.delete()
  62.  
  63.  
  64.  
  65. w.write('FLUSHED!')
  66.  
  67.  
  68.  
  69.  
  70.  
  71. class MainPage(webapp.RequestHandler):
  72.  
  73. def get(self):
  74.  
  75. self.response.headers['Content-Type'] = 'text/plain'
  76.  
  77.  
  78.  
  79. class TagCloud(webapp.RequestHandler):
  80.  
  81. def get(self):
  82.  
  83. self.response.headers['Content-Type'] = 'text/plain'
  84.  
  85. w = self.response.out
  86.  
  87. # Thu thap cac bien can thiet
  88.  
  89. tumblr_api = cgi.escape(self.request.get('url').encode('utf-8')) + '/api/read'
  90.  
  91. tumblr_api = 'http://im.doquangtu.net/api/read'
  92.  
  93. cache_id = md5.new(tumblr_api).hexdigest()
  94.  
  95. # Neu da co cache, thi tra lai cache
  96.  
  97. try:
  98.  
  99. cache = TumblrCache.gql("WHERE cache_id = :1 LIMIT 1", cache_id)[0]
  100.  
  101. except:
  102.  
  103. cache = TumblrCache(cache_id=cache_id)
  104.  
  105.  
  106.  
  107. content = cache.content
  108.  
  109. if content == None:
  110.  
  111. content = ''
  112.  
  113.  
  114.  
  115. d = (((cache.date.toordinal()-719163)*24+cache.date.hour)*60+cache.date.minute)*60+cache.date.second
  116.  
  117. d = time.time() - d
  118.  
  119.  
  120.  
  121. if d > 24*60*60 or len(content) < 5:
  122.  
  123. # Da qua 24h cache trong he thong
  124.  
  125. form_fields = {
  126.  
  127. "filter": "text"
  128.  
  129. }
  130.  
  131.  
  132.  
  133. loop = True
  134.  
  135. start = 0
  136.  
  137. taglist = {}
  138.  
  139.  
  140.  
  141.  
  142.  
  143.  
  144.  
  145. try:
  146.  
  147. while loop == True:
  148.  
  149. form_fields['start'] = int(start) * 20
  150.  
  151. form_data = urllib.urlencode(form_fields)
  152.  
  153. result = urlfetch.fetch(tumblr_api + '?filter=text&start=' + str(form_fields['start']))
  154.  
  155. result = result.content #.encode('utf-8')
  156.  
  157.  
  158.  
  159. xdom = minidom.parseString(result)
  160.  
  161. try:
  162.  
  163. posts = xdom.firstChild.getElementsByTagName("posts")[0].getElementsByTagName("post")
  164.  
  165. for post in posts:
  166.  
  167. post_id = post.getAttribute("id")
  168.  
  169. tags = post.getElementsByTagName("tag")
  170.  
  171. try:
  172.  
  173. for tag in tags:
  174.  
  175. # Duyet qua tags
  176.  
  177. t = "" . join(t.nodeValue for t in tag.childNodes if t.nodeType == t.TEXT_NODE)
  178.  
  179. t = t.strip()
  180.  
  181. #w.write(t)
  182.  
  183. if t not in taglist:
  184.  
  185. taglist[t] = 1
  186.  
  187. else:
  188.  
  189. taglist[t] = taglist[t] + 1
  190.  
  191. except:
  192.  
  193. # Loi & khong tim thay tags nao
  194.  
  195. i = 0
  196.  
  197. except:
  198.  
  199. # Loi & khong tim thay posts nao
  200.  
  201. i = 0
  202.  
  203.  
  204.  
  205. # kiem tra xem co thoat vong lap hay chua
  206.  
  207. try:
  208.  
  209. posts = xdom.firstChild.getElementsByTagName("posts")[0].getAttribute("total")
  210.  
  211. posts = int(math.ceil(float(posts) / float(20)) - 1)
  212.  
  213. if start + 1 > posts:
  214.  
  215. loop = False
  216.  
  217. else:
  218.  
  219. start = start + 1
  220.  
  221. loop = True
  222.  
  223. except:
  224.  
  225. loop = False
  226.  
  227. except:
  228.  
  229. loop = False
  230.  
  231.  
  232.  
  233. # Viet tags
  234.  
  235. txt = json.JSONEncoder().encode( taglist )
  236.  
  237.  
  238.  
  239. cache.content = txt
  240.  
  241. cache.date = datetime.datetime.today()
  242.  
  243. cache.put()
  244.  
  245.  
  246.  
  247. w.write(taglist)
  248.  
  249. else:
  250.  
  251. # Chua qua 24h cache trong he thong
  252.  
  253. # Tra ve cache data
  254.  
  255. content = cache.content
  256.  
  257. w.write( "var myJSONObject = " + content + ";" )
  258.  
  259.  
  260.  
  261. application = webapp.WSGIApplication(
  262.  
  263. [('/', MainPage),
  264.  
  265. ('/tagcloud', TagCloud),
  266.  
  267. ('/flush', Flush)],
  268.  
  269. debug=True)
  270.  
  271.  
  272.  
  273.  
  274.  
  275.  
  276.  
  277. def main():
  278.  
  279. run_wsgi_app(application)
  280.  
  281.  
  282.  
  283. if __name__ == "__main__":
  284.  
  285. main()

以上是关于Tumblr标记列表生成器的主要内容,如果未能解决你的问题,请参考以下文章

架构丰富的代码片段也应该用于产品列表吗?

json Tumblr用户列表

在列表中存储标记位置

Python代码阅读(第40篇):通过两个列表生成字典

Tumblr 和 HTML5 - Square Grid 的画布?

CSS 显示代码片段:使用PRE标记包裹长行