ruby 2ch履带
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了ruby 2ch履带相关的知识,希望对你有一定的参考价值。
# coding: utf-8
require 'nokogiri'
require 'mongo'
require 'open-uri'
require 'pp'
def extract_threads
l = open('http://hayabusa3.2ch.net/appli/subback.html').read.force_encoding('cp932').encode('utf-8')
n = Nokogiri::HTML.parse(l)
n.css('#trad a').select { |node| node.children[0].to_s =~ /.*スレ/ }.map { |node| node['href'].split('/')[0].to_i }
end
def extract_posts(thread_id, from)
l = open("http://hayabusa3.2ch.net/test/read.cgi/appli/#{thread_id}/#{from}-").read.force_encoding('cp932').encode('utf-8', invalid: :replace, undef: :replace)
n = Nokogiri::HTML.parse(l)
n.css('.thread dd').map do |dd|
dt = dd.previous
match_data = dt.to_s.match(/<dt>(\d*).*(\d{4})\/(\d{2})\/(\d{2}).*(\d{2}):(\d{2}):(\d{2})/)
if match_data
post_id, year, month, day, hour, minute, second = match_data.captures.map(&:to_i)
if post_id > 1
body = dd.to_s.gsub(/<\/?dd>/, '').gsub("\n", '').split('<br>')
{ thread_id: thread_id, post_id: post_id, posted_at: Time.local(year, month, day, hour, minute, second), body: body }
else
nil
end
else
nil
end
end.compact
end
while true do
begin
puts "#{Time.now} crawling..."
hogehoge = Mongo::Connection.new.db("hogehoge")
posts = hogehoge.collection("posts")
thread_ids = extract_threads
lasts = extract_threads.map do |thread_id|
posts.
find(thread_id: thread_id).
map { |post| post['post_id'] }.max || 0
end
thread_ids.zip(lasts).each do |thread_id, last|
next if last >= 1000
puts "extracting post thread:#{thread_id}, range:#{last + 1}-"
extracted_posts = extract_posts(thread_id, last + 1)
puts "got #{extracted_posts.size} posts"
extracted_posts.each { |post| posts.insert post }
end
rescue => e
puts e.message
end
sleep 300
end
以上是关于ruby 2ch履带的主要内容,如果未能解决你的问题,请参考以下文章
matlab计算铰接式履带车辆转向性能
text craigslist履带式起重机
python SIte履带
在Unity3D中开发的坦克履带模拟器Tank Track Simulator
履带式与刮板式
ActionScript 3 AS3履带式装载机进展