python 第一博客 - 中 - 链接 - 7
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python 第一博客 - 中 - 链接 - 7相关的知识,希望对你有一定的参考价值。
#!/usr/bin/python3
from fake_useragent import UserAgent
import argparse
import colorama
import json
import os
import re
import requests
import sys
def get_valid_filename(s):
''' strips out special characters and replaces spaces with underscores, len 200 to avoid file_name_too_long error '''
s = str(s).strip().replace(' ', '_')
return re.sub(r'[^\w.]', '', s)[:200]
def erase_previous_line():
# cursor up one line
sys.stdout.write("\033[F")
# clear to the end of the line
sys.stdout.write("\033[K")
def get_pictures_from_subreddit(data, subreddit):
for i in range(len(data)):
current_post = data[i]['data']
image_url = current_post['url']
if '.png' in image_url:
extension = '.png'
elif '.jpg' in image_url or '.jpeg' in image_url:
extension = '.jpeg'
elif 'imgur' in image_url:
image_url += '.jpeg'
extension = '.jpeg'
else:
continue
erase_previous_line()
print('downloading pictures from r/' + subreddit + '.. ' + str((i*100)//len(data)) + '%')
# redirects = False prevents thumbnails denoting removed images from getting in
image = requests.get(image_url, allow_redirects=False)
if(image.status_code == 200):
try:
output_filehandle = open(
subreddit + '/' + get_valid_filename(current_post['title']) + extension, mode='bx')
output_filehandle.write(image.content)
except:
pass
def main():
colorama.init()
ua = UserAgent()
parser = argparse.ArgumentParser(description='Fetch images from a subreddit (eg: python3 grab_pictures.py -s itookapicture CozyPlaces -n 100 -t all)')
parser.add_argument('-s', '--subreddit', nargs='+', type=str, metavar='',
required=True, help='Exact name of the subreddits you want to grab pictures')
parser.add_argument('-n', '--number', type=int, metavar='', default=50,
help='Optionally specify number of images to be downloaded (default=50)')
parser.add_argument('-t', '--top', type=str, metavar='', choices=['day', 'week', 'month', 'year', 'all'],
default='week', help='Optionally specify whether top posts of [day, week, month, year or all] (default=week)')
args = parser.parse_args()
for j in range(len(args.subreddit)):
print('Connecting to r/' + args.subreddit[j])
url = 'https://www.reddit.com/r/' + args.subreddit[j] + '/top/.json?sort=top&t=' + \
args.top + '&limit=' + str(args.number)
response = requests.get(url, headers={'User-agent': ua.random})
if not response.ok:
print("Error check the name of the subreddit", response.status_code)
exit()
if not os.path.exists(args.subreddit[j]):
os.mkdir(args.subreddit[j])
# notify connected and downloading pictures from subreddit
erase_previous_line()
print('downloading pictures from r/' + args.subreddit[j] + '..')
data = response.json()['data']['children']
get_pictures_from_subreddit(data,args.subreddit[j])
erase_previous_line()
print('Downloaded pictures from r/' + args.subreddit[j])
if __name__ == '__main__':
main()
以上是关于python 第一博客 - 中 - 链接 - 7的主要内容,如果未能解决你的问题,请参考以下文章