ruby 2ch履带

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了ruby 2ch履带相关的知识,希望对你有一定的参考价值。

# coding: utf-8
require 'nokogiri'
require 'mongo'
require 'open-uri'
require 'pp'

def extract_threads
  l = open('http://hayabusa3.2ch.net/appli/subback.html').read.force_encoding('cp932').encode('utf-8')
  n = Nokogiri::HTML.parse(l)
  n.css('#trad a').select { |node| node.children[0].to_s =~ /.*スレ/ }.map { |node| node['href'].split('/')[0].to_i }
end

def extract_posts(thread_id, from)
  l = open("http://hayabusa3.2ch.net/test/read.cgi/appli/#{thread_id}/#{from}-").read.force_encoding('cp932').encode('utf-8', invalid: :replace, undef: :replace)
  n = Nokogiri::HTML.parse(l)
  n.css('.thread dd').map do |dd|
    dt = dd.previous
    match_data = dt.to_s.match(/<dt>(\d*).*(\d{4})\/(\d{2})\/(\d{2}).*(\d{2}):(\d{2}):(\d{2})/)
    if match_data
      post_id, year, month, day, hour, minute, second = match_data.captures.map(&:to_i)
      if post_id > 1
        body = dd.to_s.gsub(/<\/?dd>/, '').gsub("\n", '').split('<br>')
        { thread_id: thread_id, post_id: post_id, posted_at: Time.local(year, month, day, hour, minute, second), body: body }
      else
        nil
      end
    else
      nil
    end
  end.compact
end

while true do
  begin
    puts "#{Time.now} crawling..."
    hogehoge = Mongo::Connection.new.db("hogehoge")
    posts = hogehoge.collection("posts")

    thread_ids = extract_threads
    lasts = extract_threads.map do |thread_id|
      posts.
        find(thread_id: thread_id).
        map { |post| post['post_id'] }.max || 0
    end

    thread_ids.zip(lasts).each do |thread_id, last|
      next if last >= 1000
      puts "extracting post thread:#{thread_id}, range:#{last + 1}-"
      extracted_posts = extract_posts(thread_id, last + 1)
      puts "got #{extracted_posts.size} posts"
      extracted_posts.each { |post| posts.insert post }
    end
  rescue => e
    puts e.message
  end

  sleep 300
end

以上是关于ruby 2ch履带的主要内容,如果未能解决你的问题,请参考以下文章

matlab计算铰接式履带车辆转向性能

text craigslist履带式起重机

python SIte履带

在Unity3D中开发的坦克履带模拟器Tank Track Simulator

履带式与刮板式

ActionScript 3 AS3履带式装载机进展