在 BigQuery 中按 Google Analytics 自定义维度过滤
Posted
技术标签:
【中文标题】在 BigQuery 中按 Google Analytics 自定义维度过滤【英文标题】:Filtering By Google Analytics Custom Dimension in BigQuery 【发布时间】:2018-11-16 17:06:02 【问题描述】:我一直在尝试将自定义维度过滤器添加到我在 BigQuery 中的工作查询中。自定义维度及其嵌套肯定会给查询增加一些复杂性,但我过去已经能够解决这个问题。这一次我运气不好。
查询很长,但本质上是我们正在跟踪的结帐渠道。我已将自定义维度添加到我认为必需的 SELECT 中,但出现错误“无法识别的名称:[274:9] 处的实践名称”。
#standardSQL
WITH
ga_tables AS (
SELECT
CAST(CONCAT(SUBSTR(date,1,4),'-',SUBSTR(date,5,2),'-',SUBSTR(date,7,2)) AS DATE) AS Date,
COUNT(DISTINCT s3_fullVisitorId) AS users,
COUNT(s0_firstHit) AS product_views,
COUNT(s1_firstHit) AS carts,
COUNT(s2_firstHit) AS order_confirmation
FROM (
SELECT
IFNULL(s3.date,
IFNULL(s0.date,
IFNULL(s1.date,
s2.date))) AS date,
s3.fullVisitorId s3_fullVisitorId,
s0.fullVisitorId s0_fullVisitorId,
s0.visitId,
s0.firstHit s0_firstHit,
s1.firstHit s1_firstHit,
s2.firstHit s2_firstHit
FROM (
# user subquery
SELECT
date,
fullVisitorId,
visitId,
(
SELECT
MAX(IF(index = 27,
value,
''))
FROM
UNNEST(customDimensions)) AS PracticeName
FROM
`big-query-project-34643.162968675.ga_sessions_*` ga
WHERE
_TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 100 DAY))
AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 0 DAY))
AND totals.visits = 1
GROUP BY
date,
PracticeName,
fullVisitorId,
visitId) s3
FULL OUTER JOIN ((
# first subquery
SELECT
date,
fullVisitorId,
visitId,
MIN(h.hitNumber) AS firstHit,
(
SELECT
MAX(IF(index = 27,
value,
''))
FROM
UNNEST(ga.customDimensions)) AS PracticeName
FROM
`big-query-project-34643.162968675.ga_sessions_*` ga,
UNNEST(hits) AS h
WHERE
_TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 100 DAY))
AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 0 DAY))
AND REGEXP_CONTAINS(h.page.pagePath, '/p/')
AND totals.visits = 1
GROUP BY
date,
PracticeName,
fullVisitorId,
visitId)) s0
ON
s3.fullVisitorId = s0.fullVisitorId
AND s3.visitId = s0.visitId
FULL OUTER JOIN ((
# Second Subquery
SELECT
date,
fullVisitorId,
visitId,
MIN(h.hitNumber) AS firstHit,
(
SELECT
MAX(IF(index = 27,
value,
''))
FROM
UNNEST(ga.customDimensions)) AS PracticeName
FROM
`big-query-project-34643.162968675.ga_sessions_*` ga,
UNNEST(hits) AS h
WHERE
_TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 100 DAY))
AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 0 DAY))
AND REGEXP_CONTAINS(h.page.pagePath, '/cart')
AND totals.visits = 1
GROUP BY
date,
PracticeName,
fullVisitorId,
visitId)) s1
ON
s0.fullVisitorId = s1.fullVisitorId
AND s0.visitId = s1.visitId
FULL OUTER JOIN ((
# Third Subquery
SELECT
date,
fullVisitorId,
visitId,
MIN(h.hitNumber) AS firstHit,
(
SELECT
MAX(IF(index = 27,
value,
''))
FROM
UNNEST(ga.customDimensions)) AS PracticeName
FROM
`big-query-project-34643.162968675.ga_sessions_*` ga,
UNNEST(hits) AS h
WHERE
_TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 100 DAY))
AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 0 DAY))
AND REGEXP_CONTAINS(h.page.pagePath, '/orderconfirmation')
AND totals.visits = 1
GROUP BY
date,
PracticeName,
fullVisitorId,
visitId)) s2
ON
s1.fullVisitorId = s2.fullVisitorId
AND s1.visitId = s2.visitId)
GROUP BY
date
UNION ALL
SELECT
CAST(CONCAT(SUBSTR(date,1,4),'-',SUBSTR(date,5,2),'-',SUBSTR(date,7,2)) AS DATE) AS Date,
COUNT(DISTINCT s3_fullVisitorId) AS users,
COUNT(s0_firstHit) AS product_views,
COUNT(s1_firstHit) AS order_details,
COUNT(s2_firstHit) AS order_confirmation
FROM (
SELECT
IFNULL(s3.date,
IFNULL(s0.date,
IFNULL(s1.date,
s2.date))) AS date,
s3.fullVisitorId s3_fullVisitorId,
s0.fullVisitorId s0_fullVisitorId,
s0.visitId,
s0.firstHit s0_firstHit,
s1.firstHit s1_firstHit,
s2.firstHit s2_firstHit
FROM (
# user subquery
SELECT
date,
fullVisitorId,
visitId,
(
SELECT
MAX(IF(index = 27,
value,
''))
FROM
UNNEST(gart.customDimensions)) AS PracticeName
FROM
`big-query-project-34643.162968675.ga_exportKey_view_2` gart
WHERE
totals.visits = 1
GROUP BY
date,
PracticeName,
fullVisitorId,
visitId) s3
FULL OUTER JOIN ((
# first subquery
SELECT
date,
fullVisitorId,
visitId,
MIN(h.hitNumber) AS firstHit,
(
SELECT
MAX(IF(index = 27,
value,
''))
FROM
UNNEST(gart.customDimensions)) AS PracticeName
FROM
`big-query-project-34643.162968675.ga_exportKey_view_2` gart,
UNNEST(hits) AS h
WHERE
REGEXP_CONTAINS(h.page.pagePath, '/p/')
AND totals.visits = 1
GROUP BY
date,
PracticeName,
fullVisitorId,
visitId)) s0
ON
s3.fullVisitorId = s0.fullVisitorId
AND s3.visitId = s0.visitId
FULL OUTER JOIN ((
# Second Subquery
SELECT
date,
fullVisitorId,
visitId,
MIN(h.hitNumber) AS firstHit,
(
SELECT
MAX(IF(index = 27,
value,
''))
FROM
UNNEST(gart.customDimensions)) AS PracticeName
FROM
`big-query-project-34643.162968675.ga_exportKey_view_2` gart,
UNNEST(hits) AS h
WHERE
REGEXP_CONTAINS(h.page.pagePath, '/cart')
AND totals.visits = 1
GROUP BY
date,
PracticeName,
fullVisitorId,
visitId)) s1
ON
s0.fullVisitorId = s1.fullVisitorId
AND s0.visitId = s1.visitId
FULL OUTER JOIN ((
# Third Subquery
SELECT
date,
fullVisitorId,
visitId,
MIN(h.hitNumber) AS firstHit,
(
SELECT
MAX(IF(index = 27,
value,
''))
FROM
UNNEST(gart.customDimensions)) AS PracticeName
FROM
`big-query-project-34643.162968675.ga_exportKey_view_2` gart,
UNNEST(hits) AS h
WHERE
REGEXP_CONTAINS(h.page.pagePath, '/orderconfirmation')
AND totals.visits = 1
GROUP BY
date,
PracticeName,
fullVisitorId,
visitId)) s2
ON
s1.fullVisitorId = s2.fullVisitorId
AND s1.visitId = s2.visitId)
GROUP BY
date)
SELECT
Date AS Date,
SUM(users) AS users,
SUM(product_views) AS product_views,
SUM(carts) AS Carts,
SUM(order_confirmation) AS order_confirmation
FROM
ga_tables
WHERE
LOWER(PracticeName) NOT LIKE '%demo%'
AND PracticeName NOT LIKE 'asdf'
GROUP BY
Date
ORDER BY
Date DESC
任何解决此问题的帮助将不胜感激。
【问题讨论】:
【参考方案1】:您正在尝试在 where 子句中使用 practicename,但它在 ga_tables 的最终版本中不可用。查看代码的顶部。看起来你有工作漏斗,然后你决定在那里添加自定义维度,但你没有将它添加到它应该在的所有地方。
WITH
ga_tables AS (
SELECT
CAST(CONCAT(SUBSTR(date,1,4),'-',SUBSTR(date,5,2),'-',SUBSTR(date,7,2)) AS date) AS date,
PracticeName,
COUNT(DISTINCT s3_fullvisitorid) AS users,
COUNT(s0_firsthit) AS product_views,
COUNT(s1_firsthit) AS carts,
COUNT(s2_firsthit) AS order_confirmation
FROM (
SELECT
ifnull(s3.date,
ifnull(s0.date,
ifnull(s1.date,
s2.date))) AS date,
s0.practicename PracticeName,
s3.fullvisitorid s3_fullvisitorid,
s0.fullvisitorid s0_fullvisitorid,
s0.visitid,
s0.firsthit s0_firsthit,
s1.firsthit s1_firsthit,
s2.firsthit s2_firsthit
FROM (
SELECT
date,
fullvisitorid,
visitid,
(
SELECT
MAX(IF(index = 27,
value,
''))
FROM
UNNEST(customdimensions)) AS practicename
FROM
`virtual-core-194015.157925963.ga_sessions_*` ga
WHERE
_table_suffix BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 100 day))
AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 0 day))
AND totals.visits = 1
GROUP BY
date,
practicename,
fullvisitorid,
visitid) s3
FULL OUTER JOIN ((
SELECT
date,
fullvisitorid,
visitid,
MIN(h.hitnumber) AS firsthit,
(
SELECT
MAX(IF(index = 27,
value,
''))
FROM
UNNEST(ga.customdimensions)) AS practicename
FROM
`virtual-core-194015.157925963.ga_sessions_*` ga,
UNNEST(hits) AS h
WHERE
_table_suffix BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 100 day))
AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 0 day))
AND REGEXP_CONTAINS(h.page.pagepath, '/p/')
AND totals.visits = 1
GROUP BY
date,
practicename,
fullvisitorid,
visitid)) s0
ON
s3.fullvisitorid = s0.fullvisitorid
AND s3.visitid = s0.visitid
FULL OUTER JOIN ((
SELECT
date,
fullvisitorid,
visitid,
MIN(h.hitnumber) AS firsthit,
(
SELECT
MAX(IF(index = 27,
value,
''))
FROM
UNNEST(ga.customdimensions)) AS practicename
FROM
`virtual-core-194015.157925963.ga_sessions_*` ga,
UNNEST(hits) AS h
WHERE
_table_suffix BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 100 day))
AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 0 day))
AND REGEXP_CONTAINS(h.page.pagepath, '/cart')
AND totals.visits = 1
GROUP BY
date,
practicename,
fullvisitorid,
visitid)) s1
ON
s0.fullvisitorid = s1.fullvisitorid
AND s0.visitid = s1.visitid
FULL OUTER JOIN (( # third subquery
SELECT
date,
fullvisitorid,
visitid,
MIN(h.hitnumber) AS firsthit,
(
SELECT
MAX(IF(index = 27,
value,
''))
FROM
UNNEST(ga.customdimensions)) AS practicename
FROM
`virtual-core-194015.157925963.ga_sessions_*` ga,
UNNEST(hits) AS h
WHERE
_table_suffix BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 100 day))
AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 0 day))
AND REGEXP_CONTAINS(h.page.pagepath, '/orderconfirmation')
AND totals.visits = 1
GROUP BY
date,
practicename,
fullvisitorid,
visitid)) s2
ON
s1.fullvisitorid = s2.fullvisitorid
AND s1.visitid = s2.visitid)
GROUP BY
date,2
UNION ALL
SELECT
CAST(CONCAT(SUBSTR(date,1,4),'-',SUBSTR(date,5,2),'-',SUBSTR(date,7,2)) AS date) AS date,
practicename,
COUNT(DISTINCT s3_fullvisitorid) AS users,
COUNT(s0_firsthit) AS product_views,
COUNT(s1_firsthit) AS order_details,
COUNT(s2_firsthit) AS order_confirmation
FROM (
SELECT
ifnull(s3.date,
ifnull(s0.date,
ifnull(s1.date,
s2.date))) AS date,
s0.practicename practicename,
s3.fullvisitorid s3_fullvisitorid,
s0.fullvisitorid s0_fullvisitorid,
s0.visitid,
s0.firsthit s0_firsthit,
s1.firsthit s1_firsthit,
s2.firsthit s2_firsthit
FROM ( # USER subquery
SELECT
date,
fullvisitorid,
visitid,
(
SELECT
MAX(IF(index = 27,
value,
''))
FROM
UNNEST(gart.customdimensions)) AS practicename
FROM
`virtual-core-194015.157925963.ga_sessions_*` gart
WHERE
totals.visits = 1
GROUP BY
date,
practicename,
fullvisitorid,
visitid) s3
FULL OUTER JOIN (( # first subquery
SELECT
date,
fullvisitorid,
visitid,
MIN(h.hitnumber) AS firsthit,
(
SELECT
MAX(IF(index = 27,
value,
''))
FROM
UNNEST(gart.customdimensions)) AS practicename
FROM
`virtual-core-194015.157925963.ga_sessions_*` gart,
UNNEST(hits) AS h
WHERE
REGEXP_CONTAINS(h.page.pagepath, '/p/')
AND totals.visits = 1
GROUP BY
date,
practicename,
fullvisitorid,
visitid)) s0
ON
s3.fullvisitorid = s0.fullvisitorid
AND s3.visitid = s0.visitid
FULL OUTER JOIN (( # second subquery
SELECT
date,
fullvisitorid,
visitid,
MIN(h.hitnumber) AS firsthit,
(
SELECT
MAX(IF(index = 27,
value,
''))
FROM
UNNEST(gart.customdimensions)) AS practicename
FROM
`virtual-core-194015.157925963.ga_sessions_*` gart,
UNNEST(hits) AS h
WHERE
REGEXP_CONTAINS(h.page.pagepath, '/cart')
AND totals.visits = 1
GROUP BY
date,
practicename,
fullvisitorid,
visitid)) s1
ON
s0.fullvisitorid = s1.fullvisitorid
AND s0.visitid = s1.visitid
FULL OUTER JOIN (( # third subquery
SELECT
date,
fullvisitorid,
visitid,
MIN(h.hitnumber) AS firsthit,
(
SELECT
MAX(IF(index = 27,
value,
''))
FROM
UNNEST(gart.customdimensions)) AS practicename
FROM
`virtual-core-194015.157925963.ga_sessions_*` gart,
UNNEST(hits) AS h
WHERE
REGEXP_CONTAINS(h.page.pagepath, '/orderconfirmation')
AND totals.visits = 1
GROUP BY
date,
practicename,
fullvisitorid,
visitid)) s2
ON
s1.fullvisitorid = s2.fullvisitorid
AND s1.visitid = s2.visitid)
GROUP BY
date,practicename)
SELECT
date AS date,
SUM(users) AS users,
SUM(product_views) AS product_views,
SUM(carts) AS carts,
SUM(order_confirmation) AS order_confirmation
FROM
ga_tables
WHERE
LOWER(PracticeName) NOT LIKE '%demo%'
AND PracticeName NOT LIKE 'asdf'
GROUP BY
date
ORDER BY
date DESC
【讨论】:
以上是关于在 BigQuery 中按 Google Analytics 自定义维度过滤的主要内容,如果未能解决你的问题,请参考以下文章