爬虫大作业

Posted 090伍明航

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了爬虫大作业相关的知识,希望对你有一定的参考价值。

词云生成
import
jieba import PIL from wordcloud import WordCloud import matplotlib.pyplot as p import os info = open(\'wmh.txt\',\'r\',encoding=\'utf-8\').read() text = \'\' text += \' \'.join(jieba.lcut(info)) wc = WordCloud(font_path=\'./fonts/simhei.ttf\',background_color=\'White\',max_words=50) wc.generate_from_text(text) p.imshow(wc) p.axis("off") p.show() wc.to_file(\'wmh.jpg\')

文本生成

# -*- coding: utf-8 -*-

#   __
#  /__)  _  _     _   _ _/   _
# / (   (- (/ (/ (- _)  /  _)
#          /

"""
Requests HTTP Library
~~~~~~~~~~~~~~~~~~~~~

Requests is an HTTP library, written in Python, for human beings. Basic GET
usage:

   >>> import requests
   >>> r = requests.get(\'https://www.python.org\')
   >>> r.status_code
   200
   >>> \'Python is a programming language\' in r.content
   True

... or POST:

   >>> payload = dict(key1=\'value1\', key2=\'value2\')
   >>> r = requests.post(\'http://httpbin.org/post\', data=payload)
   >>> print(r.text)
   {
     ...
     "form": {
       "key2": "value2",
       "key1": "value1"
     },
     ...
   }

The other HTTP methods are supported - see `requests.api`. Full documentation
is at <http://python-requests.org>.

:copyright: (c) 2017 by Kenneth Reitz.
:license: Apache 2.0, see LICENSE for more details.
"""

import urllib3
import chardet
import warnings
from .exceptions import RequestsDependencyWarning


def check_compatibility(urllib3_version, chardet_version):
    urllib3_version = urllib3_version.split(\'.\')
    assert urllib3_version != [\'dev\']  # Verify urllib3 isn\'t installed from git.

    # Sometimes, urllib3 only reports its version as 16.1.
    if len(urllib3_version) == 2:
        urllib3_version.append(\'0\')

    # Check urllib3 for compatibility.
    major, minor, patch = urllib3_version  # noqa: F811
    major, minor, patch = int(major), int(minor), int(patch)
    # urllib3 >= 1.21.1, <= 1.22
    assert major == 1
    assert minor >= 21
    assert minor <= 22

    # Check chardet for compatibility.
    major, minor, patch = chardet_version.split(\'.\')[:3]
    major, minor, patch = int(major), int(minor), int(patch)
    # chardet >= 3.0.2, < 3.1.0
    assert major == 3
    assert minor < 1
    assert patch >= 2


# Check imported dependencies for compatibility.
try:
    check_compatibility(urllib3.__version__, chardet.__version__)
except (AssertionError, ValueError):
    warnings.warn("urllib3 ({0}) or chardet ({1}) doesn\'t match a supported "
                  "version!".format(urllib3.__version__, chardet.__version__),
                  RequestsDependencyWarning)

# Attempt to enable urllib3\'s SNI support, if possible
try:
    from urllib3.contrib import pyopenssl
    pyopenssl.inject_into_urllib3()
except ImportError:
    pass

# urllib3\'s DependencyWarnings should be silenced.
from urllib3.exceptions import DependencyWarning
warnings.simplefilter(\'ignore\', DependencyWarning)

from .__version__ import __title__, __description__, __url__, __version__
from .__version__ import __build__, __author__, __author_email__, __license__
from .__version__ import __copyright__, __cake__

from . import utils
from . import packages
from .models import Request, Response, PreparedRequest
from .api import request, get, head, post, patch, put, delete, options
from .sessions import session, Session
from .status_codes import codes
from .exceptions import (
    RequestException, Timeout, URLRequired,
    TooManyRedirects, HTTPError, ConnectionError,
    FileModeWarning, ConnectTimeout, ReadTimeout
)

# Set default logging handler to avoid "No handler found" warnings.
import logging
try:  # Python 2.7+
    from logging import NullHandler
except ImportError:
    class NullHandler(logging.Handler):
        def emit(self, record):
            pass

logging.getLogger(__name__).addHandler(NullHandler())

# FileModeWarnings go off per the default.
warnings.simplefilter(\'default\', FileModeWarning, append=True)

 

以上是关于爬虫大作业的主要内容,如果未能解决你的问题,请参考以下文章

爬虫大作业

爬虫大作业

HTML5期末大作业:餐饮美食网站设计——咖啡(10页) HTML+CSS+JavaScript 学生DW网页设计作业成品 web课程设计网页规划与设计 咖啡网页设计 美食餐饮网页设计...(代码片段

Python大作业——爬虫+可视化+数据分析+数据库(可视化篇)

Python大作业——爬虫+可视化+数据分析+数据库(数据分析篇)

Python课程设计大作业:利用爬虫获取NBA比赛数据并进行机器学习预测NBA比赛结果