text pyconjp_2018_scraping_samples.ipynb
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了text pyconjp_2018_scraping_samples.ipynb相关的知识,希望对你有一定的参考价值。
{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['Example Domain']\n"
]
}
],
"source": [
"# BS4\n",
"# pip install beautifulsoup4\n",
"from bs4 import BeautifulSoup\n",
"import requests\n",
"\n",
"resp = requests.get('https://www.example.com/')\n",
"bs_obj = BeautifulSoup(resp.content, 'lxml')\n",
"\n",
"print([i.text for i in bs_obj.find_all('h1')])\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Example Domain\n"
]
}
],
"source": [
"# Selenium\n",
"# pip install selenium\n",
"# brew cask install chromedriver\n",
"from selenium import webdriver\n",
"\n",
"options = webdriver.ChromeOptions()\n",
"options.add_argument('--headless')\n",
"driver = webdriver.Chrome(options=options)\n",
"\n",
"driver.get('http://www.example.com/')\n",
"print(driver.title)\n",
"driver.quit()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Example Domain\n"
]
}
],
"source": [
"with webdriver.Chrome(options=options) as driver:\n",
" driver.get('http://www.example.com/')\n",
" print(driver.title)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['Example Domain']\n"
]
}
],
"source": [
"# pip install requests_html\n",
"from requests_html import HTMLSession\n",
"\n",
"session = HTMLSession()\n",
"resp = session.get('http://www.example.com/')\n",
"print([i.text for i in resp.html.find('h1')])\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Example Domain\n"
]
}
],
"source": [
"# pip install pyppeteer\n",
"import asyncio\n",
"from pyppeteer import launch\n",
"\n",
"\n",
"async def main():\n",
" browser = await launch()\n",
" page = await browser.newPage()\n",
" await page.goto('http://example.com')\n",
" print(await page.title())\n",
" await browser.close()\n",
" \n",
"loop = asyncio.get_event_loop()\n",
"loop.run_until_complete(main())"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Example Domain\n"
]
}
],
"source": [
"from pyquery import PyQuery\n",
"\n",
"pq = PyQuery(url='https://example.com')\n",
"print(pq.find('h1').text())"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/plain": [
"['株式会社SQUEEZE',\n '株式会社MonotaRO',\n 'LINE株式会社',\n 'Retty株式会社',\n 'iRidge, Inc.',\n '株式会社いい生活',\n 'ミラクル・リナックス株式会社',\n '株式会社スカラコミュニケーションズ',\n '株式会社ビザスク',\n '株式会社ヌーラボ',\n 'TIS株式会社',\n 'PAY.JP',\n 'カラフル・ボード株式会社',\n '株式会社ビープラウド',\n '株式会社リーディング・エッジ社',\n '株式会社Nexedi',\n 'タロスカイ',\n 'ワイアーチ',\n '株式会社日本システム技研',\n 'SideCI',\n '日本経済新聞社',\n 'Elastic',\n '株式会社データミックス',\n 'XICA',\n '株式会社 ARISE analytics',\n '株式会社ミクシィ・リクルートメント',\n '株式会社JX通信社',\n 'シルバーエッグ・テクノロジー株式会社',\n 'Gandi.net',\n 'GROOVE X株式会社',\n 'HDE, Inc.',\n '株式会社DataSign',\n 'Credit Engine, Inc.',\n '富士通クラウドテクノロジーズ株式会社',\n 'Supership株式会社',\n '株式会社ブレインパッド',\n '株式会社CMSコミュニケーションズ',\n '株式会社slideship',\n '日本マイクロソフト株式会社',\n 'スフィアリンクス株式会社',\n 'gihyo.jp',\n 'Think IT',\n '株式会社オライリー・ジャパン',\n 'Togetter',\n 'CodeZine',\n 'エンジニアtype']"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# requests-htmlでなんかのページスクレイピングする\n",
"\n",
"from requests_html import HTMLSession\n",
"\n",
"session = HTMLSession()\n",
"resp = session.get('https://pycon.jp/2017/ja/sponsors/')\n",
"sel = '.sponsor-content h4'\n",
"elems = resp.html.find(sel)\n",
"[i.text for i in elems]\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/plain": [
"['株式会社SQUEEZE',\n '株式会社MonotaRO',\n 'LINE株式会社',\n 'Retty株式会社',\n 'iRidge, Inc.',\n '株式会社いい生活',\n 'ミラクル・リナックス株式会社',\n '株式会社スカラコミュニケーションズ',\n '株式会社ビザスク',\n '株式会社ヌーラボ',\n 'TIS株式会社',\n 'PAY.JP',\n 'カラフル・ボード株式会社',\n '株式会社ビープラウド',\n '株式会社リーディング・エッジ社',\n '株式会社Nexedi',\n 'タロスカイ',\n 'ワイアーチ',\n '株式会社日本システム技研',\n 'SideCI',\n '日本経済新聞社',\n 'Elastic',\n '株式会社データミックス',\n 'XICA',\n '株式会社 ARISE analytics',\n '株式会社ミクシィ・リクルートメント',\n '株式会社JX通信社',\n 'シルバーエッグ・テクノロジー株式会社',\n 'Gandi.net',\n 'GROOVE X株式会社',\n 'HDE, Inc.',\n '株式会社DataSign',\n 'Credit Engine, Inc.',\n '富士通クラウドテクノロジーズ株式会社',\n 'Supership株式会社',\n '株式会社ブレインパッド',\n '株式会社CMSコミュニケーションズ',\n '株式会社slideship',\n '日本マイクロソフト株式会社',\n 'スフィアリンクス株式会社',\n 'gihyo.jp',\n 'Think IT',\n '株式会社オライリー・ジャパン',\n 'Togetter',\n 'CodeZine',\n 'エンジニアtype']"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from requests.exceptions import ConnectionError, TooManyRedirects, HTTPError\n",
"from requests_html import HTMLSession\n",
"from retry import retry\n",
"\n",
"@retry(tries=3, delay=2, backoff=2)\n",
"def get_resp():\n",
" try:\n",
" session = HTMLSession()\n",
" return session.get('https://pycon.jp/2017/ja/sponsors/')\n",
" except ConnectionError:\n",
" print('NetworkError')\n",
" raise\n",
" except TooManyRedirects:\n",
" print('TooManyRedirects')\n",
" raise\n",
" except HTTPError:\n",
" print('BadResponse')\n",
" raise\n",
"\n",
"\n",
"try:\n",
" resp = get_resp()\n",
"except:\n",
" print('Response not found')\n",
"\n",
"sel = '.sponsor-content h4'\n",
"elems = resp.html.find(sel)\n",
"[i.text for i in elems]\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['1.\\n99\\nThe Legend of Zelda: Ocarina of Time (N64)\\nUser: 9.1\\nNov 23, 1998', \"2.\\n98\\nTony Hawk's Pro Skater 2 (PS)\\nUser: 7.4\\nSep 20, 2000\", '3.\\n98\\nGrand Theft Auto IV (PS3)\\nUser: 7.5\\nApr 29, 2008', '4.\\n98\\nSoulCalibur (DC)\\nUser: 8.7\\nSep 8, 1999', '5.\\n98\\nGrand Theft Auto IV (X360)\\nUser: 7.9\\nApr 29, 2008', '6.\\n97\\nSuper Mario Galaxy (WII)\\nUser: 9.0\\nNov 12, 2007', '7.\\n97\\nSuper Mario Galaxy 2 (WII)\\nUser: 9.1\\nMay 23, 2010', '8.\\n97\\nGrand Theft Auto V (XONE)\\nUser: 7.8\\nNov 18, 2014', '9.\\n97\\nGrand Theft Auto V (PS3)\\nUser: 8.3\\nSep 17, 2013', '10.\\n97\\nGrand Theft Auto V (X360)\\nUser: 8.3\\nSep 17, 2013', \"11.\\n97\\nTony Hawk's Pro Skater 2 (DC)\\nUser: 6.2\\nNov 6, 2000\", '12.\\n97\\nThe Legend of Zelda: Breath of the Wild (Switch)\\nUser: 8.5\\nMar 3, 2017', \"13.\\n97\\nTony Hawk's Pro Skater 3 (PS2)\\nUser: 7.4\\nOct 28, 2001\", '14.\\n97\\nPerfect Dark (N64)\\nUser: 8.9\\nMay 22, 2000', '15.\\n97\\nGrand Theft Auto V (PS4)\\nUser: 8.3\\nNov 18, 2014', '16.\\n97\\nMetroid Prime (GC)\\nUser: 9.2\\nNov 17, 2002', '17.\\n97\\nGrand Theft Auto III (PS2)\\nUser: 8.5\\nOct 22, 2001', '18.\\n97\\nSuper Mario Odyssey (Switch)\\nUser: 8.9\\nOct 27, 2017', '19.\\n97\\nHalo: Combat Evolved (XBOX)\\nUser: 8.6\\nNov 14, 2001', '20.\\n97\\nNFL 2K1 (DC)\\nUser: 6.4\\nSep 7, 2000', '21.\\n96\\nHalf-Life 2 (PC)\\nUser: 9.1\\nNov 16, 2004', '22.\\n96\\nGrand Theft Auto V (PC)\\nUser: 7.7\\nApr 14, 2015', '23.\\n96\\nThe Legend of Zelda: Breath of the Wild (WIIU)\\nUser: 8.2\\nMar 3, 2017', '24.\\n96\\nBioShock (X360)\\nUser: 8.9\\nAug 21, 2007', '25.\\n96\\nGoldenEye 007 (N64)\\nUser: 9.1\\nAug 25, 1997', '26.\\n96\\nUncharted 2: Among Thieves (PS3)\\nUser: 8.9\\nOct 13, 2009', '27.\\n96\\nResident Evil 4 (GC)\\nUser: 9.4\\nJan 11, 2005', '28.\\n96\\nThe Orange Box (X360)\\nUser: 8.8\\nOct 10, 2007', '29.\\n96\\nThe Orange Box (PC)\\nUser: 9.2\\nOct 10, 2007', '30.\\n96\\nBatman: Arkham City (PS3)\\nUser: 8.7\\nOct 18, 2011', '31.\\n96\\nTekken 3 (PS)\\nUser: 9.1\\nApr 29, 1998', '32.\\n96\\nMass Effect 2 (X360)\\nUser: 8.9\\nJan 26, 2010', '33.\\n96\\nThe Legend of Zelda: Twilight Princess (GC)\\nUser: 8.9\\nDec 11, 2006', '34.\\n96\\nThe Elder Scrolls V: Skyrim (X360)\\nUser: 8.5\\nNov 11, 2011', '35.\\n96\\nHalf-Life (PC)\\nUser: 9.1\\nNov 19, 1998', '36.\\n96\\nResident Evil 4 (PS2)\\nUser: 8.9\\nOct 25, 2005', '37.\\n96\\nThe Legend of Zelda: The Wind Waker (GC)\\nUser: 8.9\\nMar 24, 2003', '38.\\n96\\nGran Turismo (PS)\\nUser: 8.6\\nApr 30, 1998', '39.\\n96\\nBioShock (PC)\\nUser: 8.5\\nAug 21, 2007', '40.\\n96\\nMetal Gear Solid 2: Sons of Liberty (PS2)\\nUser: 8.8\\nNov 12, 2001', '41.\\n96\\nGrand Theft Auto Double Pack (XBOX)\\nUser: 7.9\\nOct 31, 2003', \"42.\\n95\\nBaldur's Gate II: Shadows of Amn (PC)\\nUser: 9.2\\nSep 24, 2000\", '43.\\n95\\nGrand Theft Auto: San Andreas (PS2)\\nUser: 9.0\\nOct 26, 2004', '44.\\n95\\nGrand Theft Auto: Vice City (PS2)\\nUser: 8.8\\nOct 27, 2002', '45.\\n95\\nLittleBigPlanet (PS3)\\nUser: 6.8\\nOct 27, 2008', '46.\\n95\\nDivinity: Original Sin II - Definitive Edition (XONE)\\nUser: 8.3\\nAug 31, 2018', \"47.\\n95\\nThe Legend of Zelda Collector's Edition (GC)\\nUser: 9.0\\nNov 17, 2003\", '48.\\n95\\nRed Dead Redemption (PS3)\\nUser: 8.9\\nMay 18, 2010', '49.\\n95\\nGran Turismo 3: A-Spec (PS2)\\nUser: 8.4\\nJul 9, 2001', '50.\\n95\\nHalo 2 (XBOX)\\nUser: 8.2\\nNov 9, 2004', '51.\\n95\\nThe Legend of Zelda: A Link to the Past (GBA)\\nUser: 8.9\\nDec 3, 2002', \"52.\\n95\\nThe Legend of Zelda: Majora's Mask (N64)\\nUser: 9.1\\nOct 25, 2000\", '53.\\n95\\nThe Last of Us (PS3)\\nUser: 9.2\\nJun 14, 2013', '54.\\n95\\nThe Legend of Zelda: Twilight Princess (WII)\\nUser: 9.0\\nNov 19, 2006', '55.\\n95\\nMadden NFL 2003 (PS2)\\nUser: 7.4\\nAug 12, 2002', \"56.\\n95\\nTony Hawk's Pro Skater 2 (GBA)\\nUser: 7.0\\nMay 30, 2001\", '57.\\n95\\nThe Last of Us Remastered (PS4)\\nUser: 9.1\\nJul 29, 2014', '58.\\n95\\nPortal 2 (PC)\\nUser: 9.0\\nApr 18, 2011', '59.\\n95\\nRed Dead Redemption (X360)\\nUser: 9.0\\nMay 18, 2010', '60.\\n95\\nPortal 2 (X360)\\nUser: 8.7\\nApr 19, 2011', '61.\\n95\\nMetal Gear Solid V: The Phantom Pain (XONE)\\nUser: 7.4\\nSep 1, 2015', '62.\\n95\\nPortal 2 (PS3)\\nUser: 8.5\\nApr 19, 2011', '63.\\n94\\nWorld of Goo (WII)\\nUser: 8.7\\nOct 13, 2008', '64.\\n94\\nBioShock Infinite (PS3)\\nUser: 8.5\\nMar 26, 2013', '65.\\n94\\nFinal Fantasy IX (PS)\\nUser: 8.9\\nNov 13, 2000', '66.\\n94\\nCall of Duty: Modern Warfare 2 (PS3)\\nUser: 6.4\\nNov 10, 2009', '67.\\n94\\nGod of War (PS4)\\nUser: 9.2\\nApr 20, 2018', \"68.\\n94\\nTony Hawk's Pro Skater 4 (PS2)\\nUser: 8.4\\nOct 23, 2002\", '69.\\n94\\nDevil May Cry (PS2)\\nUser: 8.6\\nOct 16, 2001', '70.\\n94\\nCall of Duty 4: Modern Warfare (PS3)\\nUser: 8.4\\nNov 5, 2007', '71.\\n94\\nMadden NFL 2002 (PS2)\\nUser: 7.4\\nAug 19, 2001', '72.\\n94\\nBatman: Arkham City (X360)\\nUser: 8.7\\nOct 18, 2011', '73.\\n94\\nThe Elder Scrolls V: Skyrim (PC)\\nUser: 8.2\\nNov 10, 2011', '74.\\n94\\nMass Effect 2 (PC)\\nUser: 8.8\\nJan 26, 2010', '75.\\n94\\nThe Legend of Zelda: Ocarina of Time 3D (3DS)\\nUser: 8.9\\nJun 19, 2011', '76.\\n94\\nChrono Cross (PS)\\nUser: 8.2\\nAug 15, 2000', '77.\\n94\\nBioShock (PS3)\\nUser: 8.7\\nOct 21, 2008', '78.\\n94\\nMass Effect 2 (PS3)\\nUser: 8.5\\nJan 17, 2011', '79.\\n94\\nGrand Theft Auto: Vice City (PC)\\nUser: 8.8\\nMay 12, 2003', '80.\\n94\\nMadden NFL 2004 (PS2)\\nUser: 8.4\\nAug 12, 2003', '81.\\n94\\nGears of War (X360)\\nUser: 8.3\\nNov 7, 2006', '82.\\n94\\nThe Elder Scrolls IV: Oblivion (X360)\\nUser: 8.7\\nMar 20, 2006', \"83.\\n94\\nSid Meier's Civilization II (PC)\\nUser: 8.9\\nFeb 29, 1996\", '84.\\n94\\nQuake (PC)\\nUser: 8.8\\nJun 22, 1996', '85.\\n94\\nCall of Duty 4: Modern Warfare (X360)\\nUser: 8.5\\nNov 5, 2007', '86.\\n94\\nBioShock Infinite (PC)\\nUser: 8.6\\nMar 25, 2013', '87.\\n94\\nHalo 3 (X360)\\nUser: 7.9\\nSep 25, 2007', '88.\\n94\\nNinja Gaiden Black (XBOX)\\nUser: 8.7\\nSep 20, 2005', '89.\\n94\\nThe Elder Scrolls IV: Oblivion (PC)\\nUser: 8.1\\nMar 20, 2006', '90.\\n94\\nStreet Fighter IV (PS3)\\nUser: 7.3\\nFeb 17, 2009', '91.\\n94\\nCeleste (XONE)\\nUser: 6.5\\nJan 26, 2018', '92.\\n94\\nSuper Mario Advance 4: Super Mario Bros. 3 (GBA)\\nUser: 8.9\\nOct 21, 2003', '93.\\n94\\nJet Grind Radio (DC)\\nUser: 8.0\\nOct 30, 2000', '94.\\n94\\nMetal Gear Solid (PS)\\nUser: 9.2\\nOct 21, 1998', '95.\\n94\\nGrim Fandango (PC)\\nUser: 9.1\\nOct 14, 1998', \"96.\\n94\\nTom Clancy's Splinter Cell Chaos Theory (XBOX)\\nUser: 8.9\\nMar 28, 2005\", '97.\\n94\\nBurnout 3: Takedown (XBOX)\\nUser: 7.4\\nSep 7, 2004', '98.\\n94\\nDiablo (PC)\\nUser: 8.7\\nDec 31, 1996', '99.\\n94\\nMetal Gear Solid 3: Subsistence (PS2)\\nUser: 8.9\\nMar 14, 2006', '100.\\n94\\nCall of Duty: Modern Warfare 2 (X360)\\nUser: 6.4\\nNov 10, 2009']\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m~/workrepo/pycon2018/.venv/lib/python3.6/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 376\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# Python 2.7, use buffering of HTTP responses\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 377\u001b[0;31m \u001b[0mhttplib_response\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetresponse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbuffering\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 378\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# Python 2.6 and older, Python 3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mTypeError\u001b[0m: getresponse() got an unexpected keyword argument 'buffering'",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-12-2de06c06d363>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mpage_num\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m200\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0msession\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mHTMLSession\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'{base_url}?{qs}{page_num}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0melems\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhtml\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'.product_row'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/workrepo/pycon2018/.venv/lib/python3.6/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36mget\u001b[0;34m(self, url, **kwargs)\u001b[0m\n\u001b[1;32m 523\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 524\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msetdefault\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'allow_redirects'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 525\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'GET'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 526\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 527\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0moptions\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/workrepo/pycon2018/.venv/lib/python3.6/site-packages/requests_html.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 670\u001b[0m \"\"\"\n\u001b[1;32m 671\u001b[0m \u001b[0;31m# Convert Request object into HTTPRequest object.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 672\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mHTMLSession\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 673\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 674\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mHTMLResponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_from_response\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/workrepo/pycon2018/.venv/lib/python3.6/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 510\u001b[0m }\n\u001b[1;32m 511\u001b[0m \u001b[0msend_kwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 512\u001b[0;31m \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0msend_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 513\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 514\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/workrepo/pycon2018/.venv/lib/python3.6/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 620\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 621\u001b[0m \u001b[0;31m# Send the request\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 622\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0madapter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 623\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 624\u001b[0m \u001b[0;31m# Total elapsed time of the request (approximately)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/workrepo/pycon2018/.venv/lib/python3.6/site-packages/requests/adapters.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 443\u001b[0m \u001b[0mdecode_content\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 444\u001b[0m \u001b[0mretries\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax_retries\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 445\u001b[0;31m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 446\u001b[0m )\n\u001b[1;32m 447\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/workrepo/pycon2018/.venv/lib/python3.6/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m 598\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout_obj\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 599\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 600\u001b[0;31m chunked=chunked)\n\u001b[0m\u001b[1;32m 601\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 602\u001b[0m \u001b[0;31m# If we're going to release the connection in ``finally:``, then\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/workrepo/pycon2018/.venv/lib/python3.6/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 378\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# Python 2.6 and older, Python 3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 379\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 380\u001b[0;31m \u001b[0mhttplib_response\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetresponse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 381\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 382\u001b[0m \u001b[0;31m# Remove the TypeError from the exception chain in Python 3;\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.anyenv/envs/pyenv/versions/3.6.4/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mgetresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1329\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1330\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1331\u001b[0;31m \u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbegin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1332\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mConnectionError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1333\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.anyenv/envs/pyenv/versions/3.6.4/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mbegin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 295\u001b[0m \u001b[0;31m# read until we get a non-100 response\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 296\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 297\u001b[0;31m \u001b[0mversion\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstatus\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreason\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_read_status\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 298\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mstatus\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mCONTINUE\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 299\u001b[0m \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.anyenv/envs/pyenv/versions/3.6.4/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36m_read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 257\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_read_status\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 258\u001b[0;31m \u001b[0mline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_MAXLINE\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"iso-8859-1\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 259\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0m_MAXLINE\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 260\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mLineTooLong\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"status line\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.anyenv/envs/pyenv/versions/3.6.4/lib/python3.6/socket.py\u001b[0m in \u001b[0;36mreadinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 584\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 585\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 586\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 587\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 588\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_timeout_occurred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.anyenv/envs/pyenv/versions/3.6.4/lib/python3.6/ssl.py\u001b[0m in \u001b[0;36mrecv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m 1007\u001b[0m \u001b[0;34m\"non-zero flags not allowed in calls to recv_into() on %s\"\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1008\u001b[0m self.__class__)\n\u001b[0;32m-> 1009\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnbytes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1010\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1011\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0msocket\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnbytes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mflags\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.anyenv/envs/pyenv/versions/3.6.4/lib/python3.6/ssl.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 869\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Read on closed or unwrapped SSL socket.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 870\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 871\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sslobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 872\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mSSLError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 873\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mSSL_ERROR_EOF\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msuppress_ragged_eofs\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.anyenv/envs/pyenv/versions/3.6.4/lib/python3.6/ssl.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 629\u001b[0m \"\"\"\n\u001b[1;32m 630\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mbuffer\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 631\u001b[0;31m \u001b[0mv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sslobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 632\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 633\u001b[0m \u001b[0mv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sslobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
],
"output_type": "error"
}
],
"source": [
"from requests_html import HTMLSession\n",
"import time\n",
"\n",
"base_url = 'https://www.metacritic.com/browse/games/score/metascore/all/all/filtered'\n",
"qs = 'sort=desc&page='\n",
"\n",
"for page_num in range(200):\n",
" session = HTMLSession()\n",
" resp = session.get(f'{base_url}?{qs}{page_num}')\n",
"\n",
" elems = resp.html.find('.product_row')\n",
" print([i.text for i in elems])\n",
"\n",
" time.sleep(2)\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# pyppeteerでログインしてmoneyfowardの会員ページを取得する\n",
"async def main():\n",
" browser = await launch()\n",
" page = await browser.newPage()\n",
" await page.goto('https://moneyforward.com/users/sign_in')\n",
" \n",
" await page.type('#sign_in_session_service_email', 'sample@sample.com')\n",
" await page.type('#sign_in_session_service_password', 'sample_pass')\n",
" btn_elem = await page.querySelector('#login-btn-sumit')\n",
" await btn_elem.click()\n",
" \n",
" await page.waitFor(5000)\n",
" await page.screenshot({'path': 'logined.png', 'fullPage': True})\n",
" await browser.close()\n",
"\n",
"\n",
"loop.run_until_complete(main())"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['「リモートペアプロでマントルを突き抜けろ!」AWS Cloud9でリモートペアプロ&楽々サーバーレス開発', '1次元畳み込みフィルターを利用した音楽データのオートエンコーダ', 'AltJSとしてのPython - フロントエンドをPythonで書こう', 'Applying serverless architecture pattern to distributed data processing', 'Build text classification models ( CBOW and Skip-gram) with FastText in python', 'Building Maintainable Python Web App using Flask', 'C拡張と共に乗り切るPython 2→3移行術', 'Django REST Framework におけるAPI実装プラクティス', 'Django を Zappaで構築してServerless Python のベストプラクティスを探る', 'Djangoアプリケーションにおけるトイル撲滅戦記', 'Djangoだってカンバンつくれるもん(Django Channels + Vue)', 'DjangoではじめるPyCharm実践入門', 'From Data to Web Application: Anime Character Image Recognition with Transfer Learning', 'Fun with Python and Kanji', 'HomeSecurity with Python', 'How to Data Wrangling? Tips for using python libraries for big-data analysis including scikit-learn.', 'Integrate Full-text Search service with Django', 'Interactive Network Visualization using Python 〜 NetworkX + BokehでPEPの参照関係を可視化する', 'Jupyterで広がるPythonの可能性', 'JVM上で動くPython3処理系cafebabepyの実装詳解', 'Make a Drone using RaspberryPi and Google VoiceKit by Python', 'Migrating from Py2 application to Py3: first trial in MonotaRO / Python2 から Python3 への移植: MonotaRO での取り組み', 'niconicoにおけるコンテンツレコメンドの取り組み', 'Notebook as Web API: Turn your notebook into Web API', 'PyCon JP における子ども向けワークショップの活動事例と実施の意義', 'Python, AWS and FinTech', 'Pythonistaに贈るコンテナ入門', 'Pythonistaの選球眼(せんきゅうがん) - エンジニアリングと野球の目利きになる技術', 'Pythonで「お絵描きパズル」を解いてみた。', 'Pythonでざっくり学ぶUnixプロセス', 'Pythonで解く大学入試数学', 'Pythonで始めるウェブスクレイピング実践入門', 'Pythonで時系列のデータを分析してみよう。', 'Pythonによる異常検知入門', 'Pythonを使ったハードウェア開発について', 'Python研修の作り方-Teaching Is Learning-', 'REST API に疲れたあなたへ贈る GraphQL 入門', 'Rust と Python', 'Sphinx-2.0 とドキュメントの未来', 'SymPyによる数式処理', 'The Modern OAuth 2.0', 'Webアプリケーションの仕組み', 'Why you should care about types: Python Typing in the Facebook Backend', 'WILDCAT SDKは量子コンピュータビジネスの味方となるのか!?', 'あなたと私いますぐパッケージン', 'オンザフライ高速化パッケージの比較:Numba, TensorFlow, Dask, etc', 'メルカリにおける AI 活用事例', '契約書データ関連のAI開発に伴う、前処理及び匿名化処理についての実例', '自分が欲しいものをPythonで書く方法(Python for Myself)', '実践・競馬データサイエンス', '複数アプリケーションのプロセスとログを管理するための新しいツールと手法', '料理写真が美味しく撮れる! 開発現場から覗くAI料理カメラの裏側', 'Artisanal Async Adventures', 'Introduce syntax and history of Python from 2.4 to 3.7', 'Interpretable Machine Learning, making black box models explainable with Python!', 'Python Boot Campで全国にPythonの環を広げよう!(2018年版)', 'Matplotlibで三角グラフを描く', 'プロダクトを安定稼動させるためのソフトウェア構成管理の事例のご紹介', 'NLP(自然言語処理)初心者のための単語分割/品詞タグ付けツールの紹介', 'Python in Finance: Circle in MUFG Investor Services', 'ばーちゃんとAlexa × チャットを実現した話', 'データ分析ライブラリを用いた最適化モデル', 'Python x 金融のコミュニティ fin-pyの活動事例', 'ドキュメンテーションビルダー「Sphinx」をはじめよう', '積雪メソッド, Snowfall Accumulation', 'PyCon JPで転職してみた人のホンネ', 'テキストマイニングによるTwitter個人アカウントの性格推定', 'Why your Django account registration should use a Turing test...', '医学研究者が深層学習環境の立ち上げの際に苦労した話', '暗号通貨技術・ブロックチェーン技術を活用するCrypto-Fintech Lab.', '安全なサンドボックス構築の裏側 ~投資アルゴリズム構築環境QuantX Factoryの事例~', 'diff 最小化原理で導く Zen of Python', 'Python × Investment ~投資信託をPythonで分析して、その結果を公開するサービス作った話~', 'Pythonの軽量フレームワークによるシンプルで高速なWebAPIの作り方', 'システム開発素人が深層学習を用いた画像認識で麻雀点数計算するLINEbot作った話', '【poke2vec】ポケモンの役割ベクトルの学習とその分析・可視化', 'asyncio + aiohttp で作るウェブサービス', 'PyCon JP 傾向と対策']\n"
]
}
],
"source": [
"from requests_html import HTMLSession\n",
"\n",
"session = HTMLSession()\n",
"resp = session.get('https://pycon.jp/2018/event/sessions')\n",
"\n",
"resp.html.render(sleep=5)\n",
"\n",
"sel = '.session-summary h3'\n",
"elems = resp.html.find(sel)\n",
"\n",
"print([i.text for i in elems])\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
以上是关于text pyconjp_2018_scraping_samples.ipynb的主要内容,如果未能解决你的问题,请参考以下文章
text 2018-04-08_SpecOps-TheLine_syslog.text
Shangbang Long_ECCV2018_TextSnake_A Flexible Representation for Detecting Text of Arbitrary Shapes(示