raise ValueError(err) - 在 Python 中使用 concurrent.future 实现多线程
Posted
技术标签:
【中文标题】raise ValueError(err) - 在 Python 中使用 concurrent.future 实现多线程【英文标题】:raise ValueError(err) - Implementation of multithreading using concurrent.future in Python 【发布时间】:2021-10-26 12:46:15 【问题描述】:我编写了一个从网站上抓取信息的 python 代码。我试图在我的代码中应用多线程方法。这是我在应用多线程之前的代码:它在我的 PC 上完美运行。
import requests
from bs4 import BeautifulSoup
import pandas as pd
import investpy
def getCurrencyHistorical():
t1 = time.perf_counter()
headers = 'Accept-Language': 'en-US,en;q=0.9',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/88.0.4324.150 Safari/537.36 Edg/88.0.705.63',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,/;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive'
links = "USD-IDR":"https://www.investing.com/currencies/usd-idr-historical-data",
"USD-JPY":"https://www.investing.com/currencies/usd-jpy-historical-data",
"USD-CNY":"https://www.investing.com/currencies/usd-cny-historical-data"
column = []
output = []
for key, value in links.items():
page = requests.get(value, headers=headers)
soup = BeautifulSoup(page.content, 'html.parser')
table =soup.select('table')[0]
#ColumnName
rows = table.find_all('tr')
for row in rows:
cols = row.find_all('th')
cols = [item.text.strip() for item in cols]
column.append(cols)
outs = row.find_all('td')
outs = [item.text.strip() for item in outs]
outs.append(key)
output.append(outs)
del output[0]
#print(value)
#print(output)
column[0].append('Currency')
df = pd.DataFrame(output, columns = column[0])
t2 = time.perf_counter()
print(f'Finished in t2-t1 seconds')
return(df)
但是,当我转换到下面时,我得到了一些错误。这是应用多线程后的代码:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import concurrent.futures
from functools import partial
import psutil
def process_data(key, page):
soup = BeautifulSoup(page, 'html.parser')
table =soup.select('table')[0]
#ColumnName
rows = table.find_all('tr')
for row in rows:
cols = row.find_all('th')
cols = [item.text.strip() for item in cols]
outs = row.find_all('td')
outs = [item.text.strip() for item in outs]
outs.append(key)
return cols, outs
def getCurrencyHistorical(session, pool_executor, item):
key, value = item
page = session.get(value)
f = pool_executor.submit(process_data, key, page.content)
return f.result()
def main():
t1 = time.perf_counter()
links = "USD-IDR":"https://www.investing.com/currencies/usd-idr-historical-data",
"USD-JPY":"https://www.investing.com/currencies/usd-jpy-historical-data",
"USD-CNY":"https://www.investing.com/currencies/usd-cny-historical-data"
with requests.Session() as session:
user_agent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.37"
session.headers = 'User-Agent': user_agent
column = []
output = []
with concurrent.futures.ProcessPoolExecutor(psutil.cpu_count(logical=False)) as pool_executor, \
concurrent.futures.ThreadPoolExecutor(max_workers=len(links)) as executor:
for return_value in executor.map(partial(getCurrencyHistorical, session, pool_executor), links.items()):
cols, outs = return_value
column.append(cols)
output.append(outs)
del output[0]
column[0].append('Currency')
df = pd.DataFrame(output, columns = column[0])
t2 = time.perf_counter()
print(f'Finished in t2-t1 seconds')
print(df)
# Required for Windows:
if __name__ == '__main__':
main()
我收到错误raise ValueError(err) from err. ValueError: 1 columns passed, passed data had 7 columns.
,它来自df = pd.DataFrame(output, columns = column[0])
行。怎么了?谢谢。
【问题讨论】:
只是一般性评论:我知道有些帖子声称永远不应该在池大小大于任务时拥有的 物理 核心数的情况下进行多处理纯粹的 CPU,就像这里的情况一样。但我没有发现是这样的。我可以展示一个 100% 纯 CPU 的工作函数,并在池大小为 8(我有 8 个逻辑处理器和 4 个物理处理器)上提交该函数的 8 个实例,并且它将在比我指定池大小时更短的时间内完成4. 无论如何,您只有 3 个 URL,因此您应该使用min(len(links), os.cpu_count())
。
我仅显示 3 个网址,例如 @Booboo
我知道。我的意思是,如果您碰巧有 4 个物理处理器,那么您将创建一个池大小,其中一个处理器超出了您的需要,这将花费更多的资源和时间。
【参考方案1】:
process_data
应该和非多处理情况一样,只是它只处理一个键值对,但这不是你所做的。主进程现在必须对process_data
返回的列表进行extend
操作。
更新
您没有检索关键“USD-JPY”的数据项,因为您没有查看正确的表格。您应该查看 id 为“curr_table”的表。根据我对您问题的评论,我还更新了多处理池大小。
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import concurrent.futures
from functools import partial
from os import cpu_count
def process_data(key, page):
soup = BeautifulSoup(page, 'html.parser')
table = soup.find('table', 'id': 'curr_table')
#ColumnName
rows = table.find_all('tr')
column = []
output = []
for row in rows:
cols = row.find_all('th')
cols = [item.text.strip() for item in cols]
column.append(cols)
outs = row.find_all('td')
outs = [item.text.strip() for item in outs]
outs.append(key)
output.append(outs)
del output[0]
return column, output
def getCurrencyHistorical(session, pool_executor, item):
key, value = item
page = session.get(value)
f = pool_executor.submit(process_data, key, page.content)
return f.result()
def main():
t1 = time.perf_counter()
links = "USD-IDR":"https://www.investing.com/currencies/usd-idr-historical-data",
"USD-JPY":"https://www.investing.com/currencies/usd-jpy-historical-data",
"USD-CNY":"https://www.investing.com/currencies/usd-cny-historical-data"
with requests.Session() as session:
user_agent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.37"
session.headers = 'User-Agent': user_agent
column = []
output = []
with concurrent.futures.ProcessPoolExecutor(min(len(links), cpu_count())) as pool_executor, \
concurrent.futures.ThreadPoolExecutor(max_workers=len(links)) as executor:
for return_value in executor.map(partial(getCurrencyHistorical, session, pool_executor), links.items()):
cols, outs = return_value
column.extend(cols)
output.extend(outs)
column[0].append('Currency')
df = pd.DataFrame(output, columns = column[0])
t2 = time.perf_counter()
print(f'Finished in t2-t1 seconds')
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(df)
# Required for Windows:
if __name__ == '__main__':
main()
打印:
Finished in 2.1944901 seconds
Date Price Open High Low Change % Currency
0 Aug 26, 2021 14,417.5 14,425.0 14,430.0 14,411.0 0.16% USD-IDR
1 Aug 25, 2021 14,395.0 14,405.0 14,421.0 14,387.5 0.03% USD-IDR
2 Aug 24, 2021 14,390.0 14,395.0 14,407.5 14,377.5 -0.14% USD-IDR
3 Aug 23, 2021 14,410.0 14,435.0 14,438.5 14,404.0 -0.28% USD-IDR
4 Aug 20, 2021 14,450.0 14,475.0 14,485.0 14,422.5 0.35% USD-IDR
5 Aug 19, 2021 14,400.0 14,405.0 14,425.0 14,392.5 0.21% USD-IDR
6 Aug 18, 2021 14,370.0 14,387.5 14,400.0 14,372.5 0.00% USD-IDR
7 Aug 16, 2021 14,370.0 14,390.0 14,395.0 14,371.5 -0.10% USD-IDR
8 Aug 13, 2021 14,385.0 14,382.5 14,395.0 14,366.0 0.03% USD-IDR
9 Aug 12, 2021 14,380.0 14,395.0 14,407.5 14,366.0 0.00% USD-IDR
10 Aug 10, 2021 14,380.0 14,375.0 14,402.0 14,375.0 0.14% USD-IDR
11 Aug 09, 2021 14,360.0 14,370.0 14,387.5 14,357.5 0.07% USD-IDR
12 Aug 06, 2021 14,350.0 14,360.0 14,377.5 14,347.5 0.07% USD-IDR
13 Aug 05, 2021 14,340.0 14,330.0 14,360.0 14,321.0 0.21% USD-IDR
14 Aug 04, 2021 14,310.0 14,325.0 14,347.5 14,304.5 -0.21% USD-IDR
15 Aug 03, 2021 14,340.0 14,375.0 14,388.0 14,338.5 -0.55% USD-IDR
16 Aug 02, 2021 14,420.0 14,465.0 14,472.5 14,422.5 -0.28% USD-IDR
17 Jul 30, 2021 14,460.0 14,435.0 14,477.5 14,434.5 -0.14% USD-IDR
18 Jul 29, 2021 14,480.0 14,490.0 14,502.5 14,482.5 -0.03% USD-IDR
19 Jul 28, 2021 14,485.0 14,500.0 14,512.5 14,485.0 -0.03% USD-IDR
20 Jul 27, 2021 14,490.0 14,473.5 14,497.5 14,465.0 0.07% USD-IDR
21 Jul 26, 2021 14,480.0 14,510.0 14,522.5 14,470.0 -0.07% USD-IDR
22 Aug 26, 2021 110.10 109.98 110.23 109.93 0.10% USD-JPY
23 Aug 25, 2021 109.99 109.64 110.13 109.61 0.34% USD-JPY
24 Aug 24, 2021 109.62 109.69 109.89 109.41 -0.05% USD-JPY
25 Aug 23, 2021 109.68 109.81 110.15 109.65 -0.11% USD-JPY
26 Aug 20, 2021 109.80 109.75 109.89 109.57 0.07% USD-JPY
27 Aug 19, 2021 109.72 109.76 110.23 109.49 -0.02% USD-JPY
28 Aug 18, 2021 109.74 109.57 110.07 109.47 0.16% USD-JPY
29 Aug 17, 2021 109.57 109.22 109.66 109.12 0.31% USD-JPY
30 Aug 16, 2021 109.23 109.71 109.76 109.11 -0.31% USD-JPY
31 Aug 13, 2021 109.57 110.39 110.46 109.54 -0.73% USD-JPY
32 Aug 12, 2021 110.38 110.42 110.55 110.31 -0.02% USD-JPY
33 Aug 11, 2021 110.40 110.58 110.81 110.31 -0.14% USD-JPY
34 Aug 10, 2021 110.56 110.29 110.60 110.28 0.25% USD-JPY
35 Aug 09, 2021 110.28 110.26 110.36 110.02 0.03% USD-JPY
36 Aug 06, 2021 110.25 109.77 110.36 109.69 0.46% USD-JPY
37 Aug 05, 2021 109.74 109.49 109.79 109.40 0.25% USD-JPY
38 Aug 04, 2021 109.47 109.07 109.68 108.72 0.39% USD-JPY
39 Aug 03, 2021 109.04 109.32 109.36 108.88 -0.22% USD-JPY
40 Aug 02, 2021 109.28 109.69 109.79 109.18 -0.38% USD-JPY
41 Jul 30, 2021 109.70 109.49 109.83 109.36 0.22% USD-JPY
42 Jul 29, 2021 109.46 109.91 109.96 109.42 -0.40% USD-JPY
43 Jul 28, 2021 109.90 109.75 110.29 109.74 0.13% USD-JPY
44 Jul 27, 2021 109.76 110.36 110.41 109.58 -0.53% USD-JPY
45 Jul 26, 2021 110.34 110.57 110.59 110.11 -0.18% USD-JPY
46 Aug 26, 2021 6.4815 6.4725 6.4866 6.4725 0.09% USD-CNY
47 Aug 25, 2021 6.4756 6.4714 6.4811 6.4707 0.07% USD-CNY
48 Aug 24, 2021 6.4710 6.4790 6.4851 6.4676 -0.15% USD-CNY
49 Aug 23, 2021 6.4805 6.4915 6.4973 6.4788 -0.32% USD-CNY
50 Aug 20, 2021 6.5012 6.4960 6.5057 6.4935 0.11% USD-CNY
51 Aug 19, 2021 6.4942 6.4847 6.4997 6.4840 0.16% USD-CNY
52 Aug 18, 2021 6.4841 6.4861 6.4872 6.4776 -0.02% USD-CNY
53 Aug 17, 2021 6.4854 6.4787 6.4889 6.4759 0.17% USD-CNY
54 Aug 16, 2021 6.4742 6.4774 6.4810 6.4719 -0.04% USD-CNY
55 Aug 13, 2021 6.4768 6.4778 6.4854 6.4749 -0.02% USD-CNY
56 Aug 12, 2021 6.4782 6.4767 6.4811 6.4719 -0.00% USD-CNY
57 Aug 11, 2021 6.4783 6.4846 6.4894 6.4752 -0.11% USD-CNY
58 Aug 10, 2021 6.4852 6.4826 6.4875 6.4774 -0.01% USD-CNY
59 Aug 09, 2021 6.4857 6.4835 6.4895 6.4731 0.05% USD-CNY
60 Aug 06, 2021 6.4825 6.4660 6.4848 6.4622 0.34% USD-CNY
61 Aug 05, 2021 6.4608 6.4671 6.4677 6.4595 -0.07% USD-CNY
62 Aug 04, 2021 6.4655 6.4662 6.4673 6.4555 -0.07% USD-CNY
63 Aug 03, 2021 6.4700 6.4656 6.4710 6.4604 0.12% USD-CNY
64 Aug 02, 2021 6.4620 6.4615 6.4693 6.4580 0.02% USD-CNY
65 Jul 30, 2021 6.4609 6.4645 6.4693 6.4506 0.07% USD-CNY
66 Jul 29, 2021 6.4562 6.4908 6.4908 6.4544 -0.53% USD-CNY
67 Jul 28, 2021 6.4905 6.5095 6.5101 6.4891 -0.31% USD-CNY
68 Jul 27, 2021 6.5104 6.4760 6.5132 6.4735 0.43% USD-CNY
69 Jul 26, 2021 6.4825 6.4790 6.4875 6.4785 0.03% USD-CNY
【讨论】:
感谢@Booboo 的回答。你能帮忙回答另一个问题吗? ***.com/questions/68993281/…以上是关于raise ValueError(err) - 在 Python 中使用 concurrent.future 实现多线程的主要内容,如果未能解决你的问题,请参考以下文章
如何解决 raise ValueError("columns must have matching element counts") ValueError: columns mus
raise ValueError("Unknown label type: %s" % repr(ys)) ValueError: Unknown label type: (arr
raise ValueError(“{0} format is not supported“.format(y_type))ValueError: continuous format
raise ValueError(“{0} format is not supported“.format(y_type))ValueError: continuous format
LSTM调用tensorflow提示 raise ValueError("Ambiguous dimension: %s" % value),怎么解决?