Tumblr标记列表生成器

Posted 2021-02-26
tags:
篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了Tumblr标记列表生成器相关的知识，希望对你有一定的参考价值。
Tumblr taglist generator returns your Tumblog tags under JSON format. Written in Python and utilized Google App Engine.
import cgi
 
import md5
 
import time
 
import datetime
 
import math
 
from xml.dom import minidom
 
import urllib
 
import sys
 
 
 
from google.appengine.api import users
 
from google.appengine.ext import webapp
 
from google.appengine.ext.webapp.util import run_wsgi_app
 
from google.appengine.ext import db
 
from google.appengine.api import urlfetch
 
from django.utils import simplejson as json
 
 
 
 
 
# Database Classes
 
class TumblrCache(db.Model):
 
    cache_id = db.StringProperty(required=True)
 
    content = db.StringProperty(multiline=True)
 
    date = db.DateTimeProperty(auto_now_add=True)
 
 
 
class Flush(webapp.RequestHandler):
 
  def get(self):
 
      self.response.headers['Content-Type'] = 'text/plain'
 
      w = self.response.out
 
 
 
      query = TumblrCache.all()
 
      for q in query:
 
          q.delete()
 
 
 
      w.write('FLUSHED!')
 
 
 
 
 
class MainPage(webapp.RequestHandler):
 
  def get(self):
 
      self.response.headers['Content-Type'] = 'text/plain'
 
 
 
class TagCloud(webapp.RequestHandler):
 
  def get(self):
 
      self.response.headers['Content-Type'] = 'text/plain'
 
      w = self.response.out
 
      # Thu thap cac bien can thiet
 
      tumblr_api = cgi.escape(self.request.get('url').encode('utf-8')) + '/api/read'
 
      tumblr_api = 'http://im.doquangtu.net/api/read'
 
      cache_id = md5.new(tumblr_api).hexdigest()
 
      # Neu da co cache, thi tra lai cache
 
      try:
 
          cache = TumblrCache.gql("WHERE cache_id = :1 LIMIT 1", cache_id)[0]
 
      except:
 
          cache = TumblrCache(cache_id=cache_id)
 
 
 
      content = cache.content
 
      if content == None:
 
          content = ''
 
 
 
      d = (((cache.date.toordinal()-719163)*24+cache.date.hour)*60+cache.date.minute)*60+cache.date.second
 
      d = time.time() - d
 
 
 
      if d > 24*60*60 or len(content) < 5:
 
          # Da qua 24h cache trong he thong
 
            form_fields = {
 
              "filter": "text"
 
            }
 
 
 
            loop = True
 
            start = 0
 
            taglist = {}
 
 
 
 
 
 
 
            try:
 
                while loop == True:
 
                    form_fields['start'] = int(start) * 20
 
                    form_data = urllib.urlencode(form_fields)
 
                    result = urlfetch.fetch(tumblr_api + '?filter=text&start=' + str(form_fields['start']))
 
                    result = result.content #.encode('utf-8')
 
 
 
                    xdom = minidom.parseString(result)
 
                    try:
 
                        posts = xdom.firstChild.getElementsByTagName("posts")[0].getElementsByTagName("post")
 
                        for post in posts:
 
                            post_id = post.getAttribute("id")
 
                            tags = post.getElementsByTagName("tag")
 
                            try:
 
                                for tag in tags:
 
                                    # Duyet qua tags
 
                                    t = "" . join(t.nodeValue for t in tag.childNodes if t.nodeType == t.TEXT_NODE)
 
                                    t = t.strip()
 
                                    #w.write(t)
 
                                    if t not in taglist:
 
                                        taglist[t] = 1
 
                                    else:
 
                                        taglist[t] = taglist[t] + 1
 
                            except:
 
                                # Loi & khong tim thay tags nao
 
                                i = 0
 
                    except:
 
                        # Loi & khong tim thay posts nao
 
                        i = 0
 
 
 
                    # kiem tra xem co thoat vong lap hay chua
 
                    try:
 
                        posts = xdom.firstChild.getElementsByTagName("posts")[0].getAttribute("total")
 
                        posts = int(math.ceil(float(posts) / float(20)) - 1)
 
                        if start + 1 > posts:
 
                            loop = False
 
                        else:
 
                            start = start + 1
 
                            loop = True
 
                    except:
 
                        loop = False
 
            except:
 
                loop = False
 
 
 
            # Viet tags
 
            txt = json.JSONEncoder().encode( taglist )
 
 
 
            cache.content = txt
 
            cache.date = datetime.datetime.today()
 
            cache.put()            
 
 
 
            w.write(taglist)
 
      else:
 
          # Chua qua 24h cache trong he thong
 
          # Tra ve cache data 
 
          content = cache.content
 
          w.write( "var myJSONObject = " + content + ";" )
 
 
 
application = webapp.WSGIApplication(
 
                                     [('/', MainPage),
 
                                      ('/tagcloud', TagCloud),
 
                                      ('/flush', Flush)],
 
                                     debug=True)
 
 
 
 
 
 
 
def main():
 
  run_wsgi_app(application)
 
 
 
if __name__ == "__main__":
 
  main()
以上是关于Tumblr标记列表生成器的主要内容，如果未能解决你的问题，请参考以下文章