在 BigQuery 中按 Google Analytics 自定义维度过滤

Posted

技术标签:

【中文标题】在 BigQuery 中按 Google Analytics 自定义维度过滤【英文标题】:Filtering By Google Analytics Custom Dimension in BigQuery 【发布时间】:2018-11-16 17:06:02 【问题描述】:

我一直在尝试将自定义维度过滤器添加到我在 BigQuery 中的工作查询中。自定义维度及其嵌套肯定会给查询增加一些复杂性,但我过去已经能够解决这个问题。这一次我运气不好。

查询很长,但本质上是我们正在跟踪的结帐渠道。我已将自定义维度添加到我认为必需的 SELECT 中,但出现错误“无法识别的名称:[274:9] 处的实践名称”。

#standardSQL
WITH
  ga_tables AS (
  SELECT
    CAST(CONCAT(SUBSTR(date,1,4),'-',SUBSTR(date,5,2),'-',SUBSTR(date,7,2)) AS DATE) AS Date,
    COUNT(DISTINCT s3_fullVisitorId) AS users,
    COUNT(s0_firstHit) AS product_views,
    COUNT(s1_firstHit) AS carts,
    COUNT(s2_firstHit) AS order_confirmation
  FROM (
    SELECT
      IFNULL(s3.date,
        IFNULL(s0.date,
          IFNULL(s1.date,
            s2.date))) AS date,
      s3.fullVisitorId s3_fullVisitorId,
      s0.fullVisitorId s0_fullVisitorId,
      s0.visitId,
      s0.firstHit s0_firstHit,
      s1.firstHit s1_firstHit,
      s2.firstHit s2_firstHit
    FROM (
        # user subquery
      SELECT
        date,
        fullVisitorId,
        visitId,
        (
        SELECT
          MAX(IF(index = 27,
              value,
              ''))
        FROM
          UNNEST(customDimensions)) AS PracticeName
      FROM
        `big-query-project-34643.162968675.ga_sessions_*` ga
      WHERE
        _TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 100 DAY))
        AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 0 DAY))
        AND totals.visits = 1
      GROUP BY
        date,
        PracticeName,
        fullVisitorId,
        visitId) s3
    FULL OUTER JOIN ((
          # first subquery
        SELECT
          date,
          fullVisitorId,
          visitId,
          MIN(h.hitNumber) AS firstHit,
          (
          SELECT
            MAX(IF(index = 27,
                value,
                ''))
          FROM
            UNNEST(ga.customDimensions)) AS PracticeName
        FROM
          `big-query-project-34643.162968675.ga_sessions_*` ga,
          UNNEST(hits) AS h
        WHERE
          _TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 100 DAY))
          AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 0 DAY))
          AND REGEXP_CONTAINS(h.page.pagePath, '/p/')
          AND totals.visits = 1
        GROUP BY
          date,
          PracticeName,
          fullVisitorId,
          visitId)) s0
    ON
      s3.fullVisitorId = s0.fullVisitorId
      AND s3.visitId = s0.visitId
    FULL OUTER JOIN ((
          # Second Subquery
        SELECT
          date,
          fullVisitorId,
          visitId,
          MIN(h.hitNumber) AS firstHit,
          (
          SELECT
            MAX(IF(index = 27,
                value,
                ''))
          FROM
            UNNEST(ga.customDimensions)) AS PracticeName
        FROM
          `big-query-project-34643.162968675.ga_sessions_*` ga,
          UNNEST(hits) AS h
        WHERE
          _TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 100 DAY))
          AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 0 DAY))
          AND REGEXP_CONTAINS(h.page.pagePath, '/cart')
          AND totals.visits = 1
        GROUP BY
          date,
          PracticeName,
          fullVisitorId,
          visitId)) s1
    ON
      s0.fullVisitorId = s1.fullVisitorId
      AND s0.visitId = s1.visitId
    FULL OUTER JOIN ((
          # Third Subquery
        SELECT
          date,
          fullVisitorId,
          visitId,
          MIN(h.hitNumber) AS firstHit,
          (
          SELECT
            MAX(IF(index = 27,
                value,
                ''))
          FROM
            UNNEST(ga.customDimensions)) AS PracticeName
        FROM
          `big-query-project-34643.162968675.ga_sessions_*` ga,
          UNNEST(hits) AS h
        WHERE
          _TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 100 DAY))
          AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 0 DAY))
          AND REGEXP_CONTAINS(h.page.pagePath, '/orderconfirmation')
          AND totals.visits = 1
        GROUP BY
          date,
          PracticeName,
          fullVisitorId,
          visitId)) s2
    ON
      s1.fullVisitorId = s2.fullVisitorId
      AND s1.visitId = s2.visitId)
  GROUP BY
    date
  UNION ALL
  SELECT
    CAST(CONCAT(SUBSTR(date,1,4),'-',SUBSTR(date,5,2),'-',SUBSTR(date,7,2)) AS DATE) AS Date,
    COUNT(DISTINCT s3_fullVisitorId) AS users,
    COUNT(s0_firstHit) AS product_views,
    COUNT(s1_firstHit) AS order_details,
    COUNT(s2_firstHit) AS order_confirmation
  FROM (
    SELECT
      IFNULL(s3.date,
        IFNULL(s0.date,
          IFNULL(s1.date,
            s2.date))) AS date,
      s3.fullVisitorId s3_fullVisitorId,
      s0.fullVisitorId s0_fullVisitorId,
      s0.visitId,
      s0.firstHit s0_firstHit,
      s1.firstHit s1_firstHit,
      s2.firstHit s2_firstHit
    FROM (
        # user subquery
      SELECT
        date,
        fullVisitorId,
        visitId,
        (
        SELECT
          MAX(IF(index = 27,
              value,
              ''))
        FROM
          UNNEST(gart.customDimensions)) AS PracticeName
      FROM
        `big-query-project-34643.162968675.ga_exportKey_view_2` gart
      WHERE
        totals.visits = 1
      GROUP BY
        date,
        PracticeName,
        fullVisitorId,
        visitId) s3
    FULL OUTER JOIN ((
          # first subquery
        SELECT
          date,
          fullVisitorId,
          visitId,
          MIN(h.hitNumber) AS firstHit,
          (
          SELECT
            MAX(IF(index = 27,
                value,
                ''))
          FROM
            UNNEST(gart.customDimensions)) AS PracticeName
        FROM
          `big-query-project-34643.162968675.ga_exportKey_view_2` gart,
          UNNEST(hits) AS h
        WHERE
          REGEXP_CONTAINS(h.page.pagePath, '/p/')
          AND totals.visits = 1
        GROUP BY
          date,
          PracticeName,
          fullVisitorId,
          visitId)) s0
    ON
      s3.fullVisitorId = s0.fullVisitorId
      AND s3.visitId = s0.visitId
    FULL OUTER JOIN ((
          # Second Subquery
        SELECT
          date,
          fullVisitorId,
          visitId,
          MIN(h.hitNumber) AS firstHit,
          (
          SELECT
            MAX(IF(index = 27,
                value,
                ''))
          FROM
            UNNEST(gart.customDimensions)) AS PracticeName
        FROM
          `big-query-project-34643.162968675.ga_exportKey_view_2` gart,
          UNNEST(hits) AS h
        WHERE
          REGEXP_CONTAINS(h.page.pagePath, '/cart')
          AND totals.visits = 1
        GROUP BY
          date,
          PracticeName,
          fullVisitorId,
          visitId)) s1
    ON
      s0.fullVisitorId = s1.fullVisitorId
      AND s0.visitId = s1.visitId
    FULL OUTER JOIN ((
          # Third Subquery
        SELECT
          date,
          fullVisitorId,
          visitId,
          MIN(h.hitNumber) AS firstHit,
          (
          SELECT
            MAX(IF(index = 27,
                value,
                ''))
          FROM
            UNNEST(gart.customDimensions)) AS PracticeName
        FROM
          `big-query-project-34643.162968675.ga_exportKey_view_2` gart,
          UNNEST(hits) AS h
        WHERE
          REGEXP_CONTAINS(h.page.pagePath, '/orderconfirmation')
          AND totals.visits = 1
        GROUP BY
          date,
          PracticeName,
          fullVisitorId,
          visitId)) s2
    ON
      s1.fullVisitorId = s2.fullVisitorId
      AND s1.visitId = s2.visitId)
  GROUP BY
    date)
SELECT
  Date AS Date,
  SUM(users) AS users,
  SUM(product_views) AS product_views,
  SUM(carts) AS Carts,
  SUM(order_confirmation) AS order_confirmation
FROM
  ga_tables
WHERE
  LOWER(PracticeName) NOT LIKE '%demo%'
  AND PracticeName NOT LIKE 'asdf'
GROUP BY
  Date
ORDER BY
  Date DESC

任何解决此问题的帮助将不胜感激。

【问题讨论】:

【参考方案1】:

您正在尝试在 where 子句中使用 practicename,但它在 ga_tables 的最终版本中不可用。查看代码的顶部。看起来你有工作漏斗,然后你决定在那里添加自定义维度,但你没有将它添加到它应该在的所有地方。

WITH
  ga_tables AS (
  SELECT
    CAST(CONCAT(SUBSTR(date,1,4),'-',SUBSTR(date,5,2),'-',SUBSTR(date,7,2)) AS date) AS date,
    PracticeName,
    COUNT(DISTINCT s3_fullvisitorid) AS users,
    COUNT(s0_firsthit) AS product_views,
    COUNT(s1_firsthit) AS carts,
    COUNT(s2_firsthit) AS order_confirmation
  FROM (
    SELECT
      ifnull(s3.date,
        ifnull(s0.date,
          ifnull(s1.date,
            s2.date))) AS date,
      s0.practicename PracticeName,
      s3.fullvisitorid s3_fullvisitorid,
      s0.fullvisitorid s0_fullvisitorid,
      s0.visitid,
      s0.firsthit s0_firsthit,
      s1.firsthit s1_firsthit,
      s2.firsthit s2_firsthit
    FROM (
      SELECT
        date,
        fullvisitorid,
        visitid,
        (
        SELECT
          MAX(IF(index = 27,
              value,
              ''))
        FROM
          UNNEST(customdimensions)) AS practicename
      FROM
        `virtual-core-194015.157925963.ga_sessions_*` ga
      WHERE
        _table_suffix BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 100 day))
        AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 0 day))
        AND totals.visits = 1
      GROUP BY
        date,
        practicename,
        fullvisitorid,
        visitid) s3
    FULL OUTER JOIN ((
        SELECT
          date,
          fullvisitorid,
          visitid,
          MIN(h.hitnumber) AS firsthit,
          (
          SELECT
            MAX(IF(index = 27,
                value,
                ''))
          FROM
            UNNEST(ga.customdimensions)) AS practicename
        FROM
          `virtual-core-194015.157925963.ga_sessions_*` ga,
          UNNEST(hits) AS h
        WHERE
          _table_suffix BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 100 day))
          AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 0 day))
          AND REGEXP_CONTAINS(h.page.pagepath, '/p/')
          AND totals.visits = 1
        GROUP BY
          date,
          practicename,
          fullvisitorid,
          visitid)) s0
    ON
      s3.fullvisitorid = s0.fullvisitorid
      AND s3.visitid = s0.visitid
    FULL OUTER JOIN ((
        SELECT
          date,
          fullvisitorid,
          visitid,
          MIN(h.hitnumber) AS firsthit,
          (
          SELECT
            MAX(IF(index = 27,
                value,
                ''))
          FROM
            UNNEST(ga.customdimensions)) AS practicename
        FROM
          `virtual-core-194015.157925963.ga_sessions_*` ga,
          UNNEST(hits) AS h
        WHERE
          _table_suffix BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 100 day))
          AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 0 day))
          AND REGEXP_CONTAINS(h.page.pagepath, '/cart')
          AND totals.visits = 1
        GROUP BY
          date,
          practicename,
          fullvisitorid,
          visitid)) s1
    ON
      s0.fullvisitorid = s1.fullvisitorid
      AND s0.visitid = s1.visitid
    FULL OUTER JOIN (( # third subquery
        SELECT
          date,
          fullvisitorid,
          visitid,
          MIN(h.hitnumber) AS firsthit,
          (
          SELECT
            MAX(IF(index = 27,
                value,
                ''))
          FROM
            UNNEST(ga.customdimensions)) AS practicename
        FROM
          `virtual-core-194015.157925963.ga_sessions_*` ga,
          UNNEST(hits) AS h
        WHERE
          _table_suffix BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 100 day))
          AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 0 day))
          AND REGEXP_CONTAINS(h.page.pagepath, '/orderconfirmation')
          AND totals.visits = 1
        GROUP BY
          date,
          practicename,
          fullvisitorid,
          visitid)) s2
    ON
      s1.fullvisitorid = s2.fullvisitorid
      AND s1.visitid = s2.visitid)
  GROUP BY
    date,2
  UNION ALL
  SELECT
    CAST(CONCAT(SUBSTR(date,1,4),'-',SUBSTR(date,5,2),'-',SUBSTR(date,7,2)) AS date) AS date,
    practicename,
    COUNT(DISTINCT s3_fullvisitorid) AS users,
    COUNT(s0_firsthit) AS product_views,
    COUNT(s1_firsthit) AS order_details,
    COUNT(s2_firsthit) AS order_confirmation
  FROM (
    SELECT
      ifnull(s3.date,
        ifnull(s0.date,
          ifnull(s1.date,
            s2.date))) AS date,
      s0.practicename practicename,
      s3.fullvisitorid s3_fullvisitorid,
      s0.fullvisitorid s0_fullvisitorid,
      s0.visitid,
      s0.firsthit s0_firsthit,
      s1.firsthit s1_firsthit,
      s2.firsthit s2_firsthit
    FROM ( # USER subquery
      SELECT
        date,
        fullvisitorid,
        visitid,

        (
        SELECT
          MAX(IF(index = 27,
              value,
              ''))
        FROM
          UNNEST(gart.customdimensions)) AS practicename
      FROM
        `virtual-core-194015.157925963.ga_sessions_*` gart
      WHERE
        totals.visits = 1
      GROUP BY
        date,
        practicename,
        fullvisitorid,
        visitid) s3

    FULL OUTER JOIN (( # first subquery
        SELECT
          date,
          fullvisitorid,
          visitid,
          MIN(h.hitnumber) AS firsthit,
          (
          SELECT
            MAX(IF(index = 27,
                value,
                ''))
          FROM
            UNNEST(gart.customdimensions)) AS practicename
        FROM
          `virtual-core-194015.157925963.ga_sessions_*` gart,
          UNNEST(hits) AS h
        WHERE
          REGEXP_CONTAINS(h.page.pagepath, '/p/')
          AND totals.visits = 1
        GROUP BY
          date,
          practicename,
          fullvisitorid,
          visitid)) s0
    ON
      s3.fullvisitorid = s0.fullvisitorid
      AND s3.visitid = s0.visitid
    FULL OUTER JOIN (( # second subquery
        SELECT
          date,
          fullvisitorid,
          visitid,
          MIN(h.hitnumber) AS firsthit,
          (
          SELECT
            MAX(IF(index = 27,
                value,
                ''))
          FROM
            UNNEST(gart.customdimensions)) AS practicename
        FROM
          `virtual-core-194015.157925963.ga_sessions_*` gart,
          UNNEST(hits) AS h
        WHERE
          REGEXP_CONTAINS(h.page.pagepath, '/cart')
          AND totals.visits = 1
        GROUP BY
          date,
          practicename,
          fullvisitorid,
          visitid)) s1
    ON
      s0.fullvisitorid = s1.fullvisitorid
      AND s0.visitid = s1.visitid
    FULL OUTER JOIN (( # third subquery
        SELECT
          date,
          fullvisitorid,
          visitid,
          MIN(h.hitnumber) AS firsthit,
          (
          SELECT
            MAX(IF(index = 27,
                value,
                ''))
          FROM
            UNNEST(gart.customdimensions)) AS practicename
        FROM
          `virtual-core-194015.157925963.ga_sessions_*` gart,
          UNNEST(hits) AS h
        WHERE
          REGEXP_CONTAINS(h.page.pagepath, '/orderconfirmation')
          AND totals.visits = 1
        GROUP BY
          date,
          practicename,
          fullvisitorid,
          visitid)) s2
    ON
      s1.fullvisitorid = s2.fullvisitorid
      AND s1.visitid = s2.visitid)
  GROUP BY
    date,practicename)
SELECT
  date AS date,
  SUM(users) AS users,
  SUM(product_views) AS product_views,
  SUM(carts) AS carts,
  SUM(order_confirmation) AS order_confirmation
FROM
  ga_tables
WHERE
  LOWER(PracticeName) NOT LIKE '%demo%'
  AND PracticeName NOT LIKE 'asdf'
GROUP BY
  date
ORDER BY
  date DESC

【讨论】:

以上是关于在 BigQuery 中按 Google Analytics 自定义维度过滤的主要内容,如果未能解决你的问题,请参考以下文章

如何在bigquery中按月创建分区

在 BigQuery 中按字母顺序连接列

在 bigquery 中按月显示数据

如何在bigquery中按月/年汇总

在 Bigquery 中按用户计算登录之间的时间

unnest 是不是始终在 Bigquery 中按命中数的升序显示数据