pachong.rb
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
URL = ‘bangumi.tv/character/‘
READY = []
Dir.glob(‘download/*‘).each do |f|
if f =~ /download\/(\d+)/
READY << $1.to_i
end
end
Dir.glob(‘fail/*‘).each do |f|
if f =~ /fail\/(\d+)/
READY << $1.to_i
end
end
Dir.glob(‘error/*‘).each do |f|
if f =~ /error\/(\d+)/
READY << $1.to_i
end
end
READY.uniq!
def download(i)
log = ‘‘
fn = i.to_s
system "wget #{URL}#{fn}"
lines = []
if !FileTest.exist?(fn)
return ‘‘
end
File.open(fn, ‘r‘) do |f|
lines = f.readlines
end
find = false
lines.each do |l|
if l =~ /<title>(.+)<\/title>/
name, description = $1.split(‘|‘).collect { |e| e.strip }
log << "#{i}: #{name}, #{description}\n"
end
if l =~ /href="(.+)" class="cover thickbox"/
url = ‘http:‘ + $1
url.slice!(/\?.+$/)
log << url + "\n"
system "wget #{url}"
system "rm #{fn}"
find = true
break
end
end
if !find
system "mv #{fn} fail\\"
log << "\n"
end
return log
end
i = ARGV[0].to_i
n = ARGV[1].to_i
log = ‘‘
n.times do
log << download(i) if !READY.include?(i)
i += 1
end
system "mv *.jpg download\\"
File.open(‘pachong.txt‘, ‘a‘) do |f|
f << log
end
before running
- install wget and ruby.
- create folder
download
andfail
- modified forloop.bat,
- line5,
(start, step = 50, end = start + 1000)
. (20 threads). - line7, second parameter for
pachong.rb
should >= step
- line5,
- run forloop.bat
- When mostly all pictures are downloaded, run
ruby run.rb 50
tips
- This script may lose some picture. Just try more times, pictrue in folder would be ignored.
- If any cmd window get stuck, press enter to skip current
wget
command.
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
Dir.glob(‘*‘).each do |f|
if f =~ /^\d+/
system "mv #{f} error\\"
end
end
system "mv *.jpg download\\"
Limit = ARGV[0]? ARGV[0].to_i : 50
READY = []
Dir.glob(‘download/*‘).each do |f|
if f =~ /download\/(\d+)/
READY << $1.to_i
end
end
Dir.glob(‘fail/*‘).each do |f|
if f =~ /fail\/(\d+)/
READY << $1.to_i
end
end
Dir.glob(‘error/*‘).each do |f|
if f =~ /error\/(\d+)/
READY << $1.to_i
end
end
r = READY.sort
show = true
j = 0
start = []
step = []
for i in 20001..40000
if show
if !r.include?(i)
start << i
show = !show
j = i
end
else
if r.include?(i)
step << i - j
print "#{j} -> #{i} : #{i-j}\n"
show = !show
end
end
end
print "total: #{step.sum}\n"
n = 0
i = 0
while start[i]
if step[i] > Limit
if step[i] > 2 * Limit
start << start[i] + 2 * Limit
step << step[i] - 2 * Limit
step[i] = 2 * Limit
end
start[i] += 1
printf "#{start[i]} + #{step[i]}\n"
system "start /min cmd /c ruby pachong.rb #{start[i]} #{step[i]}"
sleep(1)
n += 1
break if n > 20
end
i += 1
end