text pyconjp_2018_scraping_samples.ipynb

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了text pyconjp_2018_scraping_samples.ipynb相关的知识,希望对你有一定的参考价值。

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Example Domain']\n"
     ]
    }
   ],
   "source": [
    "# BS4\n",
    "# pip install beautifulsoup4\n",
    "from bs4 import BeautifulSoup\n",
    "import requests\n",
    "\n",
    "resp = requests.get('https://www.example.com/')\n",
    "bs_obj = BeautifulSoup(resp.content, 'lxml')\n",
    "\n",
    "print([i.text for i in bs_obj.find_all('h1')])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Example Domain\n"
     ]
    }
   ],
   "source": [
    "# Selenium\n",
    "# pip install selenium\n",
    "# brew cask install chromedriver\n",
    "from selenium import webdriver\n",
    "\n",
    "options = webdriver.ChromeOptions()\n",
    "options.add_argument('--headless')\n",
    "driver = webdriver.Chrome(options=options)\n",
    "\n",
    "driver.get('http://www.example.com/')\n",
    "print(driver.title)\n",
    "driver.quit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Example Domain\n"
     ]
    }
   ],
   "source": [
    "with webdriver.Chrome(options=options) as driver:\n",
    "    driver.get('http://www.example.com/')\n",
    "    print(driver.title)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Example Domain']\n"
     ]
    }
   ],
   "source": [
    "# pip install requests_html\n",
    "from requests_html import HTMLSession\n",
    "\n",
    "session = HTMLSession()\n",
    "resp = session.get('http://www.example.com/')\n",
    "print([i.text for i in resp.html.find('h1')])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Example Domain\n"
     ]
    }
   ],
   "source": [
    "# pip install pyppeteer\n",
    "import asyncio\n",
    "from pyppeteer import launch\n",
    "\n",
    "\n",
    "async def main():\n",
    "    browser = await launch()\n",
    "    page = await browser.newPage()\n",
    "    await page.goto('http://example.com')\n",
    "    print(await page.title())\n",
    "    await browser.close()\n",
    "    \n",
    "loop = asyncio.get_event_loop()\n",
    "loop.run_until_complete(main())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Example Domain\n"
     ]
    }
   ],
   "source": [
    "from pyquery import PyQuery\n",
    "\n",
    "pq = PyQuery(url='https://example.com')\n",
    "print(pq.find('h1').text())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['株式会社SQUEEZE',\n '株式会社MonotaRO',\n 'LINE株式会社',\n 'Retty株式会社',\n 'iRidge, Inc.',\n '株式会社いい生活',\n 'ミラクル・リナックス株式会社',\n '株式会社スカラコミュニケーションズ',\n '株式会社ビザスク',\n '株式会社ヌーラボ',\n 'TIS株式会社',\n 'PAY.JP',\n 'カラフル・ボード株式会社',\n '株式会社ビープラウド',\n '株式会社リーディング・エッジ社',\n '株式会社Nexedi',\n 'タロスカイ',\n 'ワイアーチ',\n '株式会社日本システム技研',\n 'SideCI',\n '日本経済新聞社',\n 'Elastic',\n '株式会社データミックス',\n 'XICA',\n '株式会社 ARISE analytics',\n '株式会社ミクシィ・リクルートメント',\n '株式会社JX通信社',\n 'シルバーエッグ・テクノロジー株式会社',\n 'Gandi.net',\n 'GROOVE X株式会社',\n 'HDE, Inc.',\n '株式会社DataSign',\n 'Credit Engine, Inc.',\n '富士通クラウドテクノロジーズ株式会社',\n 'Supership株式会社',\n '株式会社ブレインパッド',\n '株式会社CMSコミュニケーションズ',\n '株式会社slideship',\n '日本マイクロソフト株式会社',\n 'スフィアリンクス株式会社',\n 'gihyo.jp',\n 'Think IT',\n '株式会社オライリー・ジャパン',\n 'Togetter',\n 'CodeZine',\n 'エンジニアtype']"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# requests-htmlでなんかのページスクレイピングする\n",
    "\n",
    "from requests_html import HTMLSession\n",
    "\n",
    "session = HTMLSession()\n",
    "resp = session.get('https://pycon.jp/2017/ja/sponsors/')\n",
    "sel = '.sponsor-content h4'\n",
    "elems = resp.html.find(sel)\n",
    "[i.text for i in elems]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['株式会社SQUEEZE',\n '株式会社MonotaRO',\n 'LINE株式会社',\n 'Retty株式会社',\n 'iRidge, Inc.',\n '株式会社いい生活',\n 'ミラクル・リナックス株式会社',\n '株式会社スカラコミュニケーションズ',\n '株式会社ビザスク',\n '株式会社ヌーラボ',\n 'TIS株式会社',\n 'PAY.JP',\n 'カラフル・ボード株式会社',\n '株式会社ビープラウド',\n '株式会社リーディング・エッジ社',\n '株式会社Nexedi',\n 'タロスカイ',\n 'ワイアーチ',\n '株式会社日本システム技研',\n 'SideCI',\n '日本経済新聞社',\n 'Elastic',\n '株式会社データミックス',\n 'XICA',\n '株式会社 ARISE analytics',\n '株式会社ミクシィ・リクルートメント',\n '株式会社JX通信社',\n 'シルバーエッグ・テクノロジー株式会社',\n 'Gandi.net',\n 'GROOVE X株式会社',\n 'HDE, Inc.',\n '株式会社DataSign',\n 'Credit Engine, Inc.',\n '富士通クラウドテクノロジーズ株式会社',\n 'Supership株式会社',\n '株式会社ブレインパッド',\n '株式会社CMSコミュニケーションズ',\n '株式会社slideship',\n '日本マイクロソフト株式会社',\n 'スフィアリンクス株式会社',\n 'gihyo.jp',\n 'Think IT',\n '株式会社オライリー・ジャパン',\n 'Togetter',\n 'CodeZine',\n 'エンジニアtype']"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from requests.exceptions import ConnectionError, TooManyRedirects, HTTPError\n",
    "from requests_html import HTMLSession\n",
    "from retry import retry\n",
    "\n",
    "@retry(tries=3, delay=2, backoff=2)\n",
    "def get_resp():\n",
    "    try:\n",
    "        session = HTMLSession()\n",
    "        return session.get('https://pycon.jp/2017/ja/sponsors/')\n",
    "    except ConnectionError:\n",
    "        print('NetworkError')\n",
    "        raise\n",
    "    except TooManyRedirects:\n",
    "        print('TooManyRedirects')\n",
    "        raise\n",
    "    except HTTPError:\n",
    "        print('BadResponse')\n",
    "        raise\n",
    "\n",
    "\n",
    "try:\n",
    "    resp = get_resp()\n",
    "except:\n",
    "    print('Response not found')\n",
    "\n",
    "sel = '.sponsor-content h4'\n",
    "elems = resp.html.find(sel)\n",
    "[i.text for i in elems]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['1.\\n99\\nThe Legend of Zelda: Ocarina of Time (N64)\\nUser: 9.1\\nNov 23, 1998', \"2.\\n98\\nTony Hawk's Pro Skater 2 (PS)\\nUser: 7.4\\nSep 20, 2000\", '3.\\n98\\nGrand Theft Auto IV (PS3)\\nUser: 7.5\\nApr 29, 2008', '4.\\n98\\nSoulCalibur (DC)\\nUser: 8.7\\nSep 8, 1999', '5.\\n98\\nGrand Theft Auto IV (X360)\\nUser: 7.9\\nApr 29, 2008', '6.\\n97\\nSuper Mario Galaxy (WII)\\nUser: 9.0\\nNov 12, 2007', '7.\\n97\\nSuper Mario Galaxy 2 (WII)\\nUser: 9.1\\nMay 23, 2010', '8.\\n97\\nGrand Theft Auto V (XONE)\\nUser: 7.8\\nNov 18, 2014', '9.\\n97\\nGrand Theft Auto V (PS3)\\nUser: 8.3\\nSep 17, 2013', '10.\\n97\\nGrand Theft Auto V (X360)\\nUser: 8.3\\nSep 17, 2013', \"11.\\n97\\nTony Hawk's Pro Skater 2 (DC)\\nUser: 6.2\\nNov 6, 2000\", '12.\\n97\\nThe Legend of Zelda: Breath of the Wild (Switch)\\nUser: 8.5\\nMar 3, 2017', \"13.\\n97\\nTony Hawk's Pro Skater 3 (PS2)\\nUser: 7.4\\nOct 28, 2001\", '14.\\n97\\nPerfect Dark (N64)\\nUser: 8.9\\nMay 22, 2000', '15.\\n97\\nGrand Theft Auto V (PS4)\\nUser: 8.3\\nNov 18, 2014', '16.\\n97\\nMetroid Prime (GC)\\nUser: 9.2\\nNov 17, 2002', '17.\\n97\\nGrand Theft Auto III (PS2)\\nUser: 8.5\\nOct 22, 2001', '18.\\n97\\nSuper Mario Odyssey (Switch)\\nUser: 8.9\\nOct 27, 2017', '19.\\n97\\nHalo: Combat Evolved (XBOX)\\nUser: 8.6\\nNov 14, 2001', '20.\\n97\\nNFL 2K1 (DC)\\nUser: 6.4\\nSep 7, 2000', '21.\\n96\\nHalf-Life 2 (PC)\\nUser: 9.1\\nNov 16, 2004', '22.\\n96\\nGrand Theft Auto V (PC)\\nUser: 7.7\\nApr 14, 2015', '23.\\n96\\nThe Legend of Zelda: Breath of the Wild (WIIU)\\nUser: 8.2\\nMar 3, 2017', '24.\\n96\\nBioShock (X360)\\nUser: 8.9\\nAug 21, 2007', '25.\\n96\\nGoldenEye 007 (N64)\\nUser: 9.1\\nAug 25, 1997', '26.\\n96\\nUncharted 2: Among Thieves (PS3)\\nUser: 8.9\\nOct 13, 2009', '27.\\n96\\nResident Evil 4 (GC)\\nUser: 9.4\\nJan 11, 2005', '28.\\n96\\nThe Orange Box (X360)\\nUser: 8.8\\nOct 10, 2007', '29.\\n96\\nThe Orange Box (PC)\\nUser: 9.2\\nOct 10, 2007', '30.\\n96\\nBatman: Arkham City (PS3)\\nUser: 8.7\\nOct 18, 2011', '31.\\n96\\nTekken 3 (PS)\\nUser: 9.1\\nApr 29, 1998', '32.\\n96\\nMass Effect 2 (X360)\\nUser: 8.9\\nJan 26, 2010', '33.\\n96\\nThe Legend of Zelda: Twilight Princess (GC)\\nUser: 8.9\\nDec 11, 2006', '34.\\n96\\nThe Elder Scrolls V: Skyrim (X360)\\nUser: 8.5\\nNov 11, 2011', '35.\\n96\\nHalf-Life (PC)\\nUser: 9.1\\nNov 19, 1998', '36.\\n96\\nResident Evil 4 (PS2)\\nUser: 8.9\\nOct 25, 2005', '37.\\n96\\nThe Legend of Zelda: The Wind Waker (GC)\\nUser: 8.9\\nMar 24, 2003', '38.\\n96\\nGran Turismo (PS)\\nUser: 8.6\\nApr 30, 1998', '39.\\n96\\nBioShock (PC)\\nUser: 8.5\\nAug 21, 2007', '40.\\n96\\nMetal Gear Solid 2: Sons of Liberty (PS2)\\nUser: 8.8\\nNov 12, 2001', '41.\\n96\\nGrand Theft Auto Double Pack (XBOX)\\nUser: 7.9\\nOct 31, 2003', \"42.\\n95\\nBaldur's Gate II: Shadows of Amn (PC)\\nUser: 9.2\\nSep 24, 2000\", '43.\\n95\\nGrand Theft Auto: San Andreas (PS2)\\nUser: 9.0\\nOct 26, 2004', '44.\\n95\\nGrand Theft Auto: Vice City (PS2)\\nUser: 8.8\\nOct 27, 2002', '45.\\n95\\nLittleBigPlanet (PS3)\\nUser: 6.8\\nOct 27, 2008', '46.\\n95\\nDivinity: Original Sin II - Definitive Edition (XONE)\\nUser: 8.3\\nAug 31, 2018', \"47.\\n95\\nThe Legend of Zelda Collector's Edition (GC)\\nUser: 9.0\\nNov 17, 2003\", '48.\\n95\\nRed Dead Redemption (PS3)\\nUser: 8.9\\nMay 18, 2010', '49.\\n95\\nGran Turismo 3: A-Spec (PS2)\\nUser: 8.4\\nJul 9, 2001', '50.\\n95\\nHalo 2 (XBOX)\\nUser: 8.2\\nNov 9, 2004', '51.\\n95\\nThe Legend of Zelda: A Link to the Past (GBA)\\nUser: 8.9\\nDec 3, 2002', \"52.\\n95\\nThe Legend of Zelda: Majora's Mask (N64)\\nUser: 9.1\\nOct 25, 2000\", '53.\\n95\\nThe Last of Us (PS3)\\nUser: 9.2\\nJun 14, 2013', '54.\\n95\\nThe Legend of Zelda: Twilight Princess (WII)\\nUser: 9.0\\nNov 19, 2006', '55.\\n95\\nMadden NFL 2003 (PS2)\\nUser: 7.4\\nAug 12, 2002', \"56.\\n95\\nTony Hawk's Pro Skater 2 (GBA)\\nUser: 7.0\\nMay 30, 2001\", '57.\\n95\\nThe Last of Us Remastered (PS4)\\nUser: 9.1\\nJul 29, 2014', '58.\\n95\\nPortal 2 (PC)\\nUser: 9.0\\nApr 18, 2011', '59.\\n95\\nRed Dead Redemption (X360)\\nUser: 9.0\\nMay 18, 2010', '60.\\n95\\nPortal 2 (X360)\\nUser: 8.7\\nApr 19, 2011', '61.\\n95\\nMetal Gear Solid V: The Phantom Pain (XONE)\\nUser: 7.4\\nSep 1, 2015', '62.\\n95\\nPortal 2 (PS3)\\nUser: 8.5\\nApr 19, 2011', '63.\\n94\\nWorld of Goo (WII)\\nUser: 8.7\\nOct 13, 2008', '64.\\n94\\nBioShock Infinite (PS3)\\nUser: 8.5\\nMar 26, 2013', '65.\\n94\\nFinal Fantasy IX (PS)\\nUser: 8.9\\nNov 13, 2000', '66.\\n94\\nCall of Duty: Modern Warfare 2 (PS3)\\nUser: 6.4\\nNov 10, 2009', '67.\\n94\\nGod of War (PS4)\\nUser: 9.2\\nApr 20, 2018', \"68.\\n94\\nTony Hawk's Pro Skater 4 (PS2)\\nUser: 8.4\\nOct 23, 2002\", '69.\\n94\\nDevil May Cry (PS2)\\nUser: 8.6\\nOct 16, 2001', '70.\\n94\\nCall of Duty 4: Modern Warfare (PS3)\\nUser: 8.4\\nNov 5, 2007', '71.\\n94\\nMadden NFL 2002 (PS2)\\nUser: 7.4\\nAug 19, 2001', '72.\\n94\\nBatman: Arkham City (X360)\\nUser: 8.7\\nOct 18, 2011', '73.\\n94\\nThe Elder Scrolls V: Skyrim (PC)\\nUser: 8.2\\nNov 10, 2011', '74.\\n94\\nMass Effect 2 (PC)\\nUser: 8.8\\nJan 26, 2010', '75.\\n94\\nThe Legend of Zelda: Ocarina of Time 3D (3DS)\\nUser: 8.9\\nJun 19, 2011', '76.\\n94\\nChrono Cross (PS)\\nUser: 8.2\\nAug 15, 2000', '77.\\n94\\nBioShock (PS3)\\nUser: 8.7\\nOct 21, 2008', '78.\\n94\\nMass Effect 2 (PS3)\\nUser: 8.5\\nJan 17, 2011', '79.\\n94\\nGrand Theft Auto: Vice City (PC)\\nUser: 8.8\\nMay 12, 2003', '80.\\n94\\nMadden NFL 2004 (PS2)\\nUser: 8.4\\nAug 12, 2003', '81.\\n94\\nGears of War (X360)\\nUser: 8.3\\nNov 7, 2006', '82.\\n94\\nThe Elder Scrolls IV: Oblivion (X360)\\nUser: 8.7\\nMar 20, 2006', \"83.\\n94\\nSid Meier's Civilization II (PC)\\nUser: 8.9\\nFeb 29, 1996\", '84.\\n94\\nQuake (PC)\\nUser: 8.8\\nJun 22, 1996', '85.\\n94\\nCall of Duty 4: Modern Warfare (X360)\\nUser: 8.5\\nNov 5, 2007', '86.\\n94\\nBioShock Infinite (PC)\\nUser: 8.6\\nMar 25, 2013', '87.\\n94\\nHalo 3 (X360)\\nUser: 7.9\\nSep 25, 2007', '88.\\n94\\nNinja Gaiden Black (XBOX)\\nUser: 8.7\\nSep 20, 2005', '89.\\n94\\nThe Elder Scrolls IV: Oblivion (PC)\\nUser: 8.1\\nMar 20, 2006', '90.\\n94\\nStreet Fighter IV (PS3)\\nUser: 7.3\\nFeb 17, 2009', '91.\\n94\\nCeleste (XONE)\\nUser: 6.5\\nJan 26, 2018', '92.\\n94\\nSuper Mario Advance 4: Super Mario Bros. 3 (GBA)\\nUser: 8.9\\nOct 21, 2003', '93.\\n94\\nJet Grind Radio (DC)\\nUser: 8.0\\nOct 30, 2000', '94.\\n94\\nMetal Gear Solid (PS)\\nUser: 9.2\\nOct 21, 1998', '95.\\n94\\nGrim Fandango (PC)\\nUser: 9.1\\nOct 14, 1998', \"96.\\n94\\nTom Clancy's Splinter Cell Chaos Theory (XBOX)\\nUser: 8.9\\nMar 28, 2005\", '97.\\n94\\nBurnout 3: Takedown (XBOX)\\nUser: 7.4\\nSep 7, 2004', '98.\\n94\\nDiablo (PC)\\nUser: 8.7\\nDec 31, 1996', '99.\\n94\\nMetal Gear Solid 3: Subsistence (PS2)\\nUser: 8.9\\nMar 14, 2006', '100.\\n94\\nCall of Duty: Modern Warfare 2 (X360)\\nUser: 6.4\\nNov 10, 2009']\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m~/workrepo/pycon2018/.venv/lib/python3.6/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m    376\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m  \u001b[0;31m# Python 2.7, use buffering of HTTP responses\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 377\u001b[0;31m                 \u001b[0mhttplib_response\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetresponse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbuffering\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    378\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m  \u001b[0;31m# Python 2.6 and older, Python 3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mTypeError\u001b[0m: getresponse() got an unexpected keyword argument 'buffering'",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-12-2de06c06d363>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mpage_num\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m200\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      8\u001b[0m     \u001b[0msession\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mHTMLSession\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m     \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'{base_url}?{qs}{page_num}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     11\u001b[0m     \u001b[0melems\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhtml\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'.product_row'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/workrepo/pycon2018/.venv/lib/python3.6/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36mget\u001b[0;34m(self, url, **kwargs)\u001b[0m\n\u001b[1;32m    523\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    524\u001b[0m         \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msetdefault\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'allow_redirects'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 525\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'GET'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    526\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    527\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0moptions\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/workrepo/pycon2018/.venv/lib/python3.6/site-packages/requests_html.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    670\u001b[0m         \"\"\"\n\u001b[1;32m    671\u001b[0m         \u001b[0;31m# Convert Request object into HTTPRequest object.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 672\u001b[0;31m         \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mHTMLSession\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    673\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    674\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mHTMLResponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_from_response\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/workrepo/pycon2018/.venv/lib/python3.6/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m    510\u001b[0m         }\n\u001b[1;32m    511\u001b[0m         \u001b[0msend_kwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 512\u001b[0;31m         \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0msend_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    513\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    514\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/workrepo/pycon2018/.venv/lib/python3.6/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m    620\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    621\u001b[0m         \u001b[0;31m# Send the request\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 622\u001b[0;31m         \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0madapter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    623\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    624\u001b[0m         \u001b[0;31m# Total elapsed time of the request (approximately)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/workrepo/pycon2018/.venv/lib/python3.6/site-packages/requests/adapters.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m    443\u001b[0m                     \u001b[0mdecode_content\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    444\u001b[0m                     \u001b[0mretries\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax_retries\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 445\u001b[0;31m                     \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    446\u001b[0m                 )\n\u001b[1;32m    447\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/workrepo/pycon2018/.venv/lib/python3.6/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m    598\u001b[0m                                                   \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout_obj\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    599\u001b[0m                                                   \u001b[0mbody\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 600\u001b[0;31m                                                   chunked=chunked)\n\u001b[0m\u001b[1;32m    601\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    602\u001b[0m             \u001b[0;31m# If we're going to release the connection in ``finally:``, then\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/workrepo/pycon2018/.venv/lib/python3.6/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m    378\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m  \u001b[0;31m# Python 2.6 and older, Python 3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    379\u001b[0m                 \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 380\u001b[0;31m                     \u001b[0mhttplib_response\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetresponse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    381\u001b[0m                 \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    382\u001b[0m                     \u001b[0;31m# Remove the TypeError from the exception chain in Python 3;\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.anyenv/envs/pyenv/versions/3.6.4/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mgetresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1329\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1330\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1331\u001b[0;31m                 \u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbegin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1332\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mConnectionError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1333\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.anyenv/envs/pyenv/versions/3.6.4/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mbegin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    295\u001b[0m         \u001b[0;31m# read until we get a non-100 response\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    296\u001b[0m         \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 297\u001b[0;31m             \u001b[0mversion\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstatus\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreason\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_read_status\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    298\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mstatus\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mCONTINUE\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    299\u001b[0m                 \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.anyenv/envs/pyenv/versions/3.6.4/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36m_read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    256\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    257\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_read_status\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 258\u001b[0;31m         \u001b[0mline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_MAXLINE\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"iso-8859-1\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    259\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0m_MAXLINE\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    260\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mLineTooLong\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"status line\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.anyenv/envs/pyenv/versions/3.6.4/lib/python3.6/socket.py\u001b[0m in \u001b[0;36mreadinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m    584\u001b[0m         \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    585\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 586\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    587\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    588\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_timeout_occurred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.anyenv/envs/pyenv/versions/3.6.4/lib/python3.6/ssl.py\u001b[0m in \u001b[0;36mrecv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m   1007\u001b[0m                   \u001b[0;34m\"non-zero flags not allowed in calls to recv_into() on %s\"\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1008\u001b[0m                   self.__class__)\n\u001b[0;32m-> 1009\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnbytes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1010\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1011\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0msocket\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnbytes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mflags\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.anyenv/envs/pyenv/versions/3.6.4/lib/python3.6/ssl.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m    869\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Read on closed or unwrapped SSL socket.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    870\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 871\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sslobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    872\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mSSLError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    873\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mSSL_ERROR_EOF\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msuppress_ragged_eofs\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.anyenv/envs/pyenv/versions/3.6.4/lib/python3.6/ssl.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m    629\u001b[0m         \"\"\"\n\u001b[1;32m    630\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mbuffer\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 631\u001b[0;31m             \u001b[0mv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sslobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    632\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    633\u001b[0m             \u001b[0mv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sslobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ],
     "output_type": "error"
    }
   ],
   "source": [
    "from requests_html import HTMLSession\n",
    "import time\n",
    "\n",
    "base_url = 'https://www.metacritic.com/browse/games/score/metascore/all/all/filtered'\n",
    "qs = 'sort=desc&page='\n",
    "\n",
    "for page_num in range(200):\n",
    "    session = HTMLSession()\n",
    "    resp = session.get(f'{base_url}?{qs}{page_num}')\n",
    "\n",
    "    elems = resp.html.find('.product_row')\n",
    "    print([i.text for i in elems])\n",
    "\n",
    "    time.sleep(2)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# pyppeteerでログインしてmoneyfowardの会員ページを取得する\n",
    "async def main():\n",
    "    browser = await launch()\n",
    "    page = await browser.newPage()\n",
    "    await page.goto('https://moneyforward.com/users/sign_in')\n",
    "    \n",
    "    await page.type('#sign_in_session_service_email', 'sample@sample.com')\n",
    "    await page.type('#sign_in_session_service_password', 'sample_pass')\n",
    "    btn_elem = await page.querySelector('#login-btn-sumit')\n",
    "    await btn_elem.click()\n",
    "    \n",
    "    await page.waitFor(5000)\n",
    "    await page.screenshot({'path': 'logined.png', 'fullPage': True})\n",
    "    await browser.close()\n",
    "\n",
    "\n",
    "loop.run_until_complete(main())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['「リモートペアプロでマントルを突き抜けろ!」AWS Cloud9でリモートペアプロ&楽々サーバーレス開発', '1次元畳み込みフィルターを利用した音楽データのオートエンコーダ', 'AltJSとしてのPython - フロントエンドをPythonで書こう', 'Applying serverless architecture pattern to distributed data processing', 'Build text classification models ( CBOW and Skip-gram) with FastText in python', 'Building Maintainable Python Web App using Flask', 'C拡張と共に乗り切るPython 2→3移行術', 'Django REST Framework におけるAPI実装プラクティス', 'Django を Zappaで構築してServerless Python のベストプラクティスを探る', 'Djangoアプリケーションにおけるトイル撲滅戦記', 'Djangoだってカンバンつくれるもん(Django Channels + Vue)', 'DjangoではじめるPyCharm実践入門', 'From Data to Web Application: Anime Character Image Recognition with Transfer Learning', 'Fun with Python and Kanji', 'HomeSecurity with Python', 'How to Data Wrangling? Tips for using python libraries for big-data analysis including scikit-learn.', 'Integrate Full-text Search service with Django', 'Interactive Network Visualization using Python 〜 NetworkX + BokehでPEPの参照関係を可視化する', 'Jupyterで広がるPythonの可能性', 'JVM上で動くPython3処理系cafebabepyの実装詳解', 'Make a Drone using RaspberryPi and Google VoiceKit by Python', 'Migrating from Py2 application to Py3: first trial in MonotaRO / Python2 から Python3 への移植: MonotaRO での取り組み', 'niconicoにおけるコンテンツレコメンドの取り組み', 'Notebook as Web API: Turn your notebook into Web API', 'PyCon JP における子ども向けワークショップの活動事例と実施の意義', 'Python, AWS and FinTech', 'Pythonistaに贈るコンテナ入門', 'Pythonistaの選球眼(せんきゅうがん) - エンジニアリングと野球の目利きになる技術', 'Pythonで「お絵描きパズル」を解いてみた。', 'Pythonでざっくり学ぶUnixプロセス', 'Pythonで解く大学入試数学', 'Pythonで始めるウェブスクレイピング実践入門', 'Pythonで時系列のデータを分析してみよう。', 'Pythonによる異常検知入門', 'Pythonを使ったハードウェア開発について', 'Python研修の作り方-Teaching Is Learning-', 'REST API に疲れたあなたへ贈る GraphQL 入門', 'Rust と Python', 'Sphinx-2.0 とドキュメントの未来', 'SymPyによる数式処理', 'The Modern OAuth 2.0', 'Webアプリケーションの仕組み', 'Why you should care about types: Python Typing in the Facebook Backend', 'WILDCAT SDKは量子コンピュータビジネスの味方となるのか!?', 'あなたと私いますぐパッケージン', 'オンザフライ高速化パッケージの比較:Numba, TensorFlow, Dask, etc', 'メルカリにおける AI 活用事例', '契約書データ関連のAI開発に伴う、前処理及び匿名化処理についての実例', '自分が欲しいものをPythonで書く方法(Python for Myself)', '実践・競馬データサイエンス', '複数アプリケーションのプロセスとログを管理するための新しいツールと手法', '料理写真が美味しく撮れる! 開発現場から覗くAI料理カメラの裏側', 'Artisanal Async Adventures', 'Introduce syntax and history of Python from 2.4 to 3.7', 'Interpretable Machine Learning, making black box models explainable with Python!', 'Python Boot Campで全国にPythonの環を広げよう!(2018年版)', 'Matplotlibで三角グラフを描く', 'プロダクトを安定稼動させるためのソフトウェア構成管理の事例のご紹介', 'NLP(自然言語処理)初心者のための単語分割/品詞タグ付けツールの紹介', 'Python in Finance: Circle in MUFG Investor Services', 'ばーちゃんとAlexa × チャットを実現した話', 'データ分析ライブラリを用いた最適化モデル', 'Python x 金融のコミュニティ fin-pyの活動事例', 'ドキュメンテーションビルダー「Sphinx」をはじめよう', '積雪メソッド, Snowfall Accumulation', 'PyCon JPで転職してみた人のホンネ', 'テキストマイニングによるTwitter個人アカウントの性格推定', 'Why your Django account registration should use a Turing test...', '医学研究者が深層学習環境の立ち上げの際に苦労した話', '暗号通貨技術・ブロックチェーン技術を活用するCrypto-Fintech Lab.', '安全なサンドボックス構築の裏側 ~投資アルゴリズム構築環境QuantX Factoryの事例~', 'diff 最小化原理で導く Zen of Python', 'Python × Investment ~投資信託をPythonで分析して、その結果を公開するサービス作った話~', 'Pythonの軽量フレームワークによるシンプルで高速なWebAPIの作り方', 'システム開発素人が深層学習を用いた画像認識で麻雀点数計算するLINEbot作った話', '【poke2vec】ポケモンの役割ベクトルの学習とその分析・可視化', 'asyncio + aiohttp で作るウェブサービス', 'PyCon JP 傾向と対策']\n"
     ]
    }
   ],
   "source": [
    "from requests_html import HTMLSession\n",
    "\n",
    "session = HTMLSession()\n",
    "resp = session.get('https://pycon.jp/2018/event/sessions')\n",
    "\n",
    "resp.html.render(sleep=5)\n",
    "\n",
    "sel = '.session-summary h3'\n",
    "elems = resp.html.find(sel)\n",
    "\n",
    "print([i.text for i in elems])\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}

以上是关于text pyconjp_2018_scraping_samples.ipynb的主要内容,如果未能解决你的问题,请参考以下文章

text cloud_comparsion_2018

text 2018-04-08_SpecOps-TheLine_syslog.text

text 2018-01-21_12-35-45.txt

text 2018-01-21_06-40-33.txt

text 2018-01-21_06-40-03.txt

Shangbang Long_ECCV2018_TextSnake_A Flexible Representation for Detecting Text of Arbitrary Shapes(示