查询执行期间资源超出。大查询
Posted
技术标签:
【中文标题】查询执行期间资源超出。大查询【英文标题】:Resources Exceeded during query execution. BigQuery 【发布时间】:2017-01-19 20:09:37 【问题描述】:全部,
我一直在尝试让我们的一些更大的分析代码通过 BigQuery 运行,但我继续遇到许多状态和存在的数据量的问题。我们谈论的是多年的数据。可能是我的查询不是最新的,但我需要尝试根据特定的 group by 获取总和。
为了防止资源超出错误,我需要在查询中进行哪些更改?
SELECT
COMPANY_NAME,
RATING_CLASS,
COMPANY_KEY,
-- State Info & Calculations
-- Over is used as a WINDOW function to SUM ALL results within the given query
SUM(ZIP5_MED_SUPP_TOOL_NUM_QUOTE) OVER () AS STATE_MED_SUPP_TOOL_NUM_QUOTE,
-- ZIP3 Info & Calculations
ZIP3,
ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
ZIP3_TOTAL_RESULT_APPEARANCE,
ZIP3_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST,
ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST,
ZIP3_AVG_RANK AS ZIP3_AVG_RANK,
ZIP3_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5,
ZIP3_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10,
ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF,
ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST,
ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK,
ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5,
ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10,
ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
-- ZIP5 Info & Calculations
ZIP5,
ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
ZIP5_TOTAL_RESULT_APPEARANCE,
ZIP5_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST,
ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST,
ZIP5_AVG_RANK AS ZIP5_AVG_RANK,
ZIP5_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5,
ZIP5_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10,
ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF,
ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST,
ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK,
ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5,
ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10,
ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
FROM (
SELECT
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
-- ZIP3
ZIP3,
COUNT(DISTINCT logging_key) OVER (PARTITION BY ZIP3) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
COUNT(*) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOTAL_RESULT_APPEARANCE,
SUM(CASE
WHEN lowest = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_LOWEST_COUNT,
AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_RANK,
SUM(CASE
WHEN top5 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOP5_COUNT,
SUM(CASE
WHEN top10 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOP10_COUNT,
AVG(discounted_cent_diff) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_CENT_DIFF,
SUM(CASE
WHEN DISCOUNTED_lowest = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_LOWEST_COUNT,
AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_RANK,
SUM(CASE
WHEN DISCOUNTED_top5 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_TOP5_COUNT,
SUM(CASE
WHEN DISCOUNTED_top10 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_TOP10_COUNT,
AVG(discounted_cent_diff) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
-- ZIP5
ZIP5,
COUNT(DISTINCT logging_key) OVER (PARTITION BY ZIP5) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
COUNT(*) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOTAL_RESULT_APPEARANCE,
SUM(CASE
WHEN lowest = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_LOWEST_COUNT,
AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_RANK,
SUM(CASE
WHEN top5 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOP5_COUNT,
SUM(CASE
WHEN top10 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOP10_COUNT,
AVG(discounted_cent_diff) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_CENT_DIFF,
SUM(CASE
WHEN DISCOUNTED_lowest = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_LOWEST_COUNT,
AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_RANK,
SUM(CASE
WHEN DISCOUNTED_top5 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_TOP5_COUNT,
SUM(CASE
WHEN DISCOUNTED_top10 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_TOP10_COUNT,
AVG(discounted_cent_diff) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
ZIP3,
ZIP5,
LOWEST,
RATIO_TO_MIN,
RATE_ORDER,
TOP5,
TOP10,
CENT_DIFF,
DISCOUNTED_LOWEST,
DISCOUNTED_RATIO_TO_MIN,
DISCOUNTED_RATE_ORDER,
DISCOUNTED_TOP5,
DISCOUNTED_TOP10,
DISCOUNTED_CENT_DIFF,
LOGGING_KEY )
GROUP BY
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
-- ZIP3 General
ZIP3,
ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
ZIP3_TOTAL_RESULT_APPEARANCE,
ZIP3_LOWEST,
ZIP3_AVG_RATIO_TO_LOWEST,
ZIP3_AVG_RANK,
ZIP3_TOP5,
ZIP3_TOP10,
ZIP3_AVG_CENT_DIFF,
ZIP3_DISCOUNTED_LOWEST,
ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP3_DISCOUNTED_AVG_RANK,
ZIP3_DISCOUNTED_TOP5,
ZIP3_DISCOUNTED_TOP10,
ZIP3_DISCOUNTED_AVG_CENT_DIFF,
-- ZIP5 General
ZIP5,
ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
ZIP5_TOTAL_RESULT_APPEARANCE,
ZIP5_LOWEST,
ZIP5_AVG_RATIO_TO_LOWEST,
ZIP5_AVG_RANK,
ZIP5_TOP5,
ZIP5_TOP10,
ZIP5_AVG_CENT_DIFF,
ZIP5_DISCOUNTED_LOWEST,
ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP5_DISCOUNTED_AVG_RANK,
ZIP5_DISCOUNTED_TOP5,
ZIP5_DISCOUNTED_TOP10,
ZIP5_DISCOUNTED_AVG_CENT_DIFF
使用建议修复更新查询:
SELECT
main.COMPANY_NAME AS COMPANY_NAME,
main.COMPANY_KEY AS COMPANY_KEY,
main.RATING_CLASS AS RATING_CLASS,
state_count.STATE_MED_SUPP_TOOL_NUM_QUOTE AS STATE_MED_SUPP_TOOL_NUM_QUOTE,
-- ZIP3
main.ZIP3 AS ZIP3,
ZIP3_COUNT.ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE,
ZIP3_SUB.ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST,
ZIP3_SUB.ZIP3_TOP5_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5,
ZIP3_SUB.ZIP3_LOWEST_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST,
ZIP3_SUB.ZIP3_TOP10_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10,
ZIP3_SUB.ZIP3_AVG_RANK AS ZIP3_AVG_RANK,
ZIP3_SUB.ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF,
ZIP3_SUB.ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP3_SUB.ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5,
ZIP3_SUB.ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST,
ZIP3_SUB.ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10,
ZIP3_SUB.ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK,
ZIP3_SUB.ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
-- ZIP5
main.ZIP5 AS ZIP5,
ZIP5_COUNT.ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE,
ZIP5_SUB.ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST,
ZIP5_SUB.ZIP5_TOP5_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5,
ZIP5_SUB.ZIP5_LOWEST_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST,
ZIP5_SUB.ZIP5_TOP10_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10,
ZIP5_SUB.ZIP5_AVG_RANK AS ZIP5_AVG_RANK,
ZIP5_SUB.ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF,
ZIP5_SUB.ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP5_SUB.ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5,
ZIP5_SUB.ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST,
ZIP5_SUB.ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10,
ZIP5_SUB.ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK,
ZIP5_SUB.ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
FROM (
SELECT
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
ZIP3,
ZIP5,
STATE,
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824",
"4872666167115776",
"6396348765044736",
"6139303562313728",
"4988973881491456")
AND portal_key NOT IN ("5878607637381120")
GROUP BY
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
ZIP3,
ZIP5,
STATE,
) AS MAIN
LEFT JOIN (
SELECT
ZIP3,
COUNT(*) AS ZIP3_TOTAL_RESULT_APPEARANCE,
COMPANY_KEY,
RATING_CLASS,
AVG(discounted_ratio_to_min) AS ZIP3_AVG_RATIO_TO_LOWEST,
SUM(CASE
WHEN TOP5 = TRUE THEN 1
ELSE 0 END) AS ZIP3_TOP5_COUNT,
SUM(CASE
WHEN LOWEST = TRUE THEN 1
ELSE 0 END) AS ZIP3_LOWEST_COUNT,
SUM(CASE
WHEN TOP10 = TRUE THEN 1
ELSE 0 END) AS ZIP3_TOP10_COUNT,
AVG(discounted_rate_order) AS ZIP3_AVG_RANK,
AVG(discounted_cent_diff) AS ZIP3_AVG_CENT_DIFF,
AVG(discounted_ratio_to_min) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
SUM(CASE
WHEN DISCOUNTED_TOP5 = TRUE THEN 1
ELSE 0 END) AS ZIP3_DISCOUNTED_TOP5_COUNT,
SUM(CASE
WHEN DISCOUNTED_LOWEST = TRUE THEN 1
ELSE 0 END) AS ZIP3_DISCOUNTED_LOWEST_COUNT,
SUM(CASE
WHEN DISCOUNTED_TOP10 = TRUE THEN 1
ELSE 0 END) AS ZIP3_DISCOUNTED_TOP10_COUNT,
AVG(discounted_rate_order) AS ZIP3_DISCOUNTED_AVG_RANK,
AVG(discounted_cent_diff) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824")
GROUP BY
ZIP3,
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
) AS ZIP3_SUB
ON
main.ZIP3 = ZIP3_SUB.ZIP3
AND main.COMPANY_KEY = ZIP3_SUB.COMPANY_KEY
AND main.RATING_CLASS = ZIP3_SUB.RATING_CLASS
LEFT JOIN (
SELECT
ZIP3,
EXACT_COUNT_DISTINCT(logging_key) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824")
GROUP BY
ZIP3 ) AS ZIP3_COUNT
ON
main.ZIP3 = ZIP3_COUNT.ZIP3
LEFT JOIN (
SELECT
ZIP5,
COUNT(*) AS ZIP5_TOTAL_RESULT_APPEARANCE,
COMPANY_KEY,
RATING_CLASS,
AVG(discounted_ratio_to_min) AS ZIP5_AVG_RATIO_TO_LOWEST,
SUM(CASE
WHEN TOP5 = TRUE THEN 1
ELSE 0 END) AS ZIP5_TOP5_COUNT,
SUM(CASE
WHEN LOWEST = TRUE THEN 1
ELSE 0 END) AS ZIP5_LOWEST_COUNT,
SUM(CASE
WHEN TOP10 = TRUE THEN 1
ELSE 0 END) AS ZIP5_TOP10_COUNT,
AVG(discounted_rate_order) AS ZIP5_AVG_RANK,
AVG(discounted_cent_diff) AS ZIP5_AVG_CENT_DIFF,
AVG(discounted_ratio_to_min) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
SUM(CASE
WHEN DISCOUNTED_TOP5 = TRUE THEN 1
ELSE 0 END) AS ZIP5_DISCOUNTED_TOP5_COUNT,
SUM(CASE
WHEN DISCOUNTED_LOWEST = TRUE THEN 1
ELSE 0 END) AS ZIP5_DISCOUNTED_LOWEST_COUNT,
SUM(CASE
WHEN DISCOUNTED_TOP10 = TRUE THEN 1
ELSE 0 END) AS ZIP5_DISCOUNTED_TOP10_COUNT,
AVG(discounted_rate_order) AS ZIP5_DISCOUNTED_AVG_RANK,
AVG(discounted_cent_diff) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824")
GROUP BY
ZIP5,
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
) AS ZIP5_SUB
ON
main.ZIP5 = ZIP5_SUB.ZIP5
AND main.COMPANY_KEY = ZIP5_SUB.COMPANY_KEY
AND main.RATING_CLASS = ZIP5_SUB.RATING_CLASS
LEFT JOIN (
SELECT
ZIP5,
EXACT_COUNT_DISTINCT(logging_key) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824")
GROUP BY
ZIP5 ) AS ZIP5_COUNT
ON
main.ZIP5 = ZIP5_COUNT.ZIP5
LEFT JOIN (
SELECT
STATE,
EXACT_COUNT_DISTINCT(logging_key) AS STATE_MED_SUPP_TOOL_NUM_QUOTE
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824")
GROUP BY
STATE ) AS STATE_COUNT
ON
main.STATE = STATE_COUNT.STATE
说明: 由于 BigQuery 是一种共享资源,BigQuery 会估算查询所需的资源量并相应地分配资源。通过将查询拆分为同一张表上的多个连接,可以使用比最初预期更多的资源来执行计算。有关为什么限制查询效果更好的更多技术解释,请参阅 Jordan Tigani 关于堆栈溢出响应here。
【问题讨论】:
只是想知道,这个查询大概处理多少 GB? 我对您的查询为何如此昂贵有一些想法。原因之一可能是因为它被严重过度设计。但是,如果没有清楚地了解您在这里想要达到的具体目标(逻辑明智) - 很难做出最终结论,也很容易在判断中出错。我建议您向我们详细解释您试图在此查询中获得什么,这样我们就可以像现在一样帮助您,而不会失明:o) 【参考方案1】:不知何故,我觉得下面是你需要的。我可能是错的,因为它实际上只是相对盲目地尝试对您的逻辑进行逆向工程,所以如果我在这里错了,请不要过分判断 无法测试,但我觉得实际成功会很便宜
SELECT
main.COMPANY_NAME AS COMPANY_NAME,
main.COMPANY_KEY AS COMPANY_KEY,
main.RATING_CLASS AS RATING_CLASS,
SUM(ZIP5_MED_SUPP_TOOL_NUM_QUOTE) OVER () AS STATE_MED_SUPP_TOOL_NUM_QUOTE,
-- ZIP3
main.ZIP3 AS ZIP3,
ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
ZIP3_TOTAL_RESULT_APPEARANCE,
ZIP3_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST,
ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST,
ZIP3_AVG_RANK AS ZIP3_AVG_RANK,
ZIP3_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5,
ZIP3_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10,
ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF,
ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST,
ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK,
ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5,
ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10,
ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
-- ZIP5
main.ZIP5 AS ZIP5,
ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
ZIP5_TOTAL_RESULT_APPEARANCE,
ZIP5_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST,
ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST,
ZIP5_AVG_RANK AS ZIP5_AVG_RANK,
ZIP5_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5,
ZIP5_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10,
ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF,
ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST,
ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK,
ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5,
ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10,
ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF
FROM (
SELECT COMPANY_NAME, COMPANY_KEY, RATING_CLASS, ZIP3, ZIP5
FROM [csgapi:qh_med_supp_tool.v2_TX]
WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY COMPANY_NAME, COMPANY_KEY, RATING_CLASS, ZIP3, ZIP5
) AS main
LEFT JOIN (
SELECT
ZIP3, company_key, rating_class,
COUNT(*) AS ZIP3_TOTAL_RESULT_APPEARANCE,
SUM(CASE WHEN lowest = TRUE THEN 1 ELSE 0 END) AS ZIP3_LOWEST_COUNT,
AVG(discounted_ratio_to_min) AS ZIP3_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) AS ZIP3_AVG_RANK,
SUM(CASE WHEN top5 = TRUE THEN 1 ELSE 0 END) AS ZIP3_TOP5_COUNT,
SUM(CASE WHEN top10 = TRUE THEN 1 ELSE 0 END) AS ZIP3_TOP10_COUNT,
AVG(discounted_cent_diff) AS ZIP3_AVG_CENT_DIFF,
SUM(CASE WHEN DISCOUNTED_lowest = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_LOWEST_COUNT,
AVG(discounted_ratio_to_min) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) AS ZIP3_DISCOUNTED_AVG_RANK,
SUM(CASE WHEN DISCOUNTED_top5 = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_TOP5_COUNT,
SUM(CASE WHEN DISCOUNTED_top10 = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_TOP10_COUNT,
AVG(discounted_cent_diff) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
FROM [csgapi:qh_med_supp_tool.v2_TX]
WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY ZIP3, company_key, rating_class
) AS zip3_sub
ON main.ZIP3 = zip3_sub.ZIP3 AND main.company_key = zip3_sub.company_key AND main.rating_class = zip3_sub.rating_class
LEFT JOIN (
SELECT
ZIP5, company_key, rating_class,
COUNT(*) AS ZIP5_TOTAL_RESULT_APPEARANCE,
SUM(CASE WHEN lowest = TRUE THEN 1 ELSE 0 END) AS ZIP5_LOWEST_COUNT,
AVG(discounted_ratio_to_min) AS ZIP5_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) AS ZIP5_AVG_RANK,
SUM(CASE WHEN top5 = TRUE THEN 1 ELSE 0 END) AS ZIP5_TOP5_COUNT,
SUM(CASE WHEN top10 = TRUE THEN 1 ELSE 0 END) AS ZIP5_TOP10_COUNT,
AVG(discounted_cent_diff) AS ZIP5_AVG_CENT_DIFF,
SUM(CASE WHEN DISCOUNTED_lowest = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_LOWEST_COUNT,
AVG(discounted_ratio_to_min) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) AS ZIP5_DISCOUNTED_AVG_RANK,
SUM(CASE WHEN DISCOUNTED_top5 = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_TOP5_COUNT,
SUM(CASE WHEN DISCOUNTED_top10 = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_TOP10_COUNT,
AVG(discounted_cent_diff) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
FROM [csgapi:qh_med_supp_tool.v2_TX]
WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY ZIP5, company_key, rating_class
) AS zip5_sub
ON main.ZIP5 = zip5_sub.ZIP5 AND main.company_key = zip5_sub.company_key AND main.rating_class = zip5_sub.rating_class
LEFT JOIN (
SELECT ZIP3, COUNT(DISTINCT logging_key) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE
FROM [csgapi:qh_med_supp_tool.v2_TX]
WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY ZIP3
) AS zip3_count
ON main.ZIP3 = zip3_count.ZIP3
LEFT JOIN (
SELECT ZIP5, COUNT(DISTINCT logging_key) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE
FROM [csgapi:qh_med_supp_tool.v2_TX]
WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY ZIP5
) AS zip5_count
ON main.ZIP5 = zip5_count.ZIP5
另外,请注意:在 BigQuery 旧版 SQL 中 - COUNT(DISTINCT) 函数是概率的 - 提供统计近似值,但不保证准确。 您可以改用 EXACT_COUNT_DISTINCT() 函数 - 这个函数会为您提供准确的数字,但在后端会贵一些
当然,当 COUNT(DISTINCT) 产生准确的计数并且根据 BigQuery 团队的建议最好使用标准 SQL 时,可以为 BigQuery Standard SQL 重写整个查询
【讨论】:
哇。你简直让我大吃一惊。我想就 BigQuery 以及它们如何分配资源而言,这是完全合理的。感谢您与我们分享这一点。我已发布更新的查询以说明您建议的更改。这是工作。平均运行时间从 86 秒缩短到 30 秒。以上是关于查询执行期间资源超出。大查询的主要内容,如果未能解决你的问题,请参考以下文章