如何修复此 BigQuery 表架构的更新查询?

Posted

技术标签:

【中文标题】如何修复此 BigQuery 表架构的更新查询?【英文标题】:How can I fix the update query for this BigQuery table schema? 【发布时间】:2021-09-18 09:42:52 【问题描述】:

我有以下数据结构:

[
   
      "mode": "NULLABLE",
      "name": "id",
      "type": "STRING"
   ,
   
    "fields": [
      
        "mode": "NULLABLE",
        "name": "key",
        "type": "STRING"
      ,
      
        "mode": "NULLABLE",
        "name": "lifeid",
        "type": "STRING"
      ,
      
        "mode": "NULLABLE",
        "name": "monthly",
        "type": "STRING"
      ,
      
        "mode": "NULLABLE",
        "name": "thingtype",
        "type": "STRING"
      ,
      
        "mode": "NULLABLE",
        "name": "thingvalue",
        "type": "INTEGER"
      ,
      
        "fields": [
          
            "mode": "NULLABLE",
            "name": "premium",
            "type": "STRING"
          ,
          
            "mode": "NULLABLE",
            "name": "provider",
            "type": "STRING"
          ,
          
            "mode": "NULLABLE",
            "name": "rank",
            "type": "STRING"
          
        ],
        "mode": "NULLABLE",
        "name": "provider",
        "type": "RECORD"
      ,
      
        "fields": [
          
            "mode": "NULLABLE",
            "name": "someBool",
            "type": "BOOLEAN"
          ,
          
            "fields": [
              
                "mode": "NULLABLE",
                "name": "code",
                "type": "STRING"
              ,
              
                "mode": "NULLABLE",
                "name": "description",
                "type": "STRING"
              ,
              
                "mode": "NULLABLE",
                "name": "listName",
                "type": "STRING"
              
            ],
            "mode": "NULLABLE",
            "name": "anotherBool",
            "type": "RECORD"
          ,
          
            "fields": [
              
                "mode": "NULLABLE",
                "name": "caseid",
                "type": "STRING"
              ,
              
                "mode": "NULLABLE",
                "name": "dateCreated",
                "type": "TIMESTAMP"
              ,
              
                "mode": "NULLABLE",
                "name": "dateLastModified",
                "type": "TIMESTAMP"
              ,
              
                "mode": "NULLABLE",
                "name": "interviewTerminated",
                "type": "BOOLEAN"
              ,
              
                "mode": "NULLABLE",
                "name": "status",
                "type": "STRING"
              
            ],
            "mode": "NULLABLE",
            "name": "casedata",
            "type": "RECORD"
          ,
          
            "mode": "NULLABLE",
            "name": "hasAttachments",
            "type": "BOOLEAN"
          ,
          
            "mode": "NULLABLE",
            "name": "hasX",
            "type": "BOOLEAN"
          ,
          
            "mode": "NULLABLE",
            "name": "lifeId",
            "type": "STRING"
          ,
          
            "mode": "NULLABLE",
            "name": "lifeIdentifier",
            "type": "STRING"
          ,
          
            "fields": [
              
                "mode": "NULLABLE",
                "name": "code",
                "type": "STRING"
              ,
              
                "mode": "NULLABLE",
                "name": "description",
                "type": "STRING"
              ,
              
                "mode": "NULLABLE",
                "name": "listName",
                "type": "STRING"
              
            ],
            "mode": "NULLABLE",
            "name": "status",
            "type": "RECORD"
          ,
          
            "mode": "NULLABLE",
            "name": "mibComplete",
            "type": "BOOLEAN"
          ,
          
            "mode": "NULLABLE",
            "name": "thingid",
            "type": "STRING"
          ,
          
            "fields": [
              
                "mode": "NULLABLE",
                "name": "code",
                "type": "STRING"
              ,
              
                "mode": "NULLABLE",
                "name": "description",
                "type": "STRING"
              ,
              
                "mode": "NULLABLE",
                "name": "listname",
                "type": "STRING"
              
            ],
            "mode": "REPEATED",
            "name": "riskTypes",
            "type": "RECORD"
          
        ],
        "mode": "NULLABLE",
        "name": "randomcompany",
        "type": "RECORD"
      ,
      
        "mode": "NULLABLE",
        "name": "termlength",
        "type": "STRING"
      ,
      
        "mode": "NULLABLE",
        "name": "timestamp",
        "type": "TIMESTAMP"
      
    ],
    "mode": "REPEATED",
    "name": "things",
    "type": "RECORD"
  ,
]

我正在尝试为给定的id 更新things,使用:

UPDATE `table` SET things = things ||  [
        struct<key STRING, lifeid STRING, monthly STRING, thingtype STRING, thingvalue INTEGER, 
            provider struct<premium STRING, provider STRING, rank STRING>, 
            randomcompany struct< 
                someBool BOOLEAN,
                anotherBool struct<code STRING, description STRING, listName STRING>,
                casedata struct<caseid STRING, dateCreated STRING, dateLastModified STRING, interviewTerminated BOOLEAN, status STRING>,
                hasAttachments BOOLEAN,
                hasX BOOLEAN,
                lifeId STRING,
                lifeIdentifier STRING,
                status struct<code STRING, description STRING, listName STRING>,
                mibComplete BOOLEAN,
                thingid STRING,
                riskTypes array<struct<code STRING, description STRING, listName STRING>>
            >, 
            termlength STRING, timestamp TIMESTAMP>
        ('example key', 'example lifeid', 'example monthly', 'example thingtype', 4, 
            struct('example premium' as premium, 'example provider' as provider, 'example rank' as rank), 
                (true,
                    struct('a code' as code, 'a description' as description, 'a listName' as listname),
                    struct('6752' as caseid, NULL as dateCreated, NULL as dateLastModified, false as interviewTerminated, 'OPEN' as status),
                    false,
                    false,
                    'LV25APL37',
                    'LV25APL37',
                    struct('a code' as code, 'a description' as description, 'a listName' as listname),
                    false,
                    'ST11006752',
                    [struct('a code' as code, 'a description' as description, 'a listName' as listname)]
                ), 
                'example termlength', NULL
        )
    ]
WHERE id = '7PHh1dN0HdVGCAzIDIVkPWQ4GjI3';

这不起作用,我收到错误:

No matching signature for operator || for argument types: ARRAY<STRUCT<key STRING, lifeid STRING, monthly STRING, ...>>, ARRAY<STRUCT<key STRING, lifeid STRING, monthly STRING, ...>>. Supported signatures: STRING || STRING; BYTES || BYTES; ARRAY || ARRAY at [1:77]

由于截断错误,我无法找出究竟是什么错误。如果我能看到完整的错误,我就能看到更新语法的哪一部分不正确。这将阻止我向 Stack Overflow 写问题并让我自己解决问题。我听说在控制台中运行以下命令会给我完整的错误,但它似乎不起作用,只显示相同的错误:

bq --format=prettyjson show -j project:US.bquxjob_...

请问这里更新的正确语法是什么?

另外,有没有办法调试这样的问题,例如,看到一个完整的错误。

在编写这样的语句时,别名的经验法则是什么?何时需要在查询的后半部分显式编写 struct([struct(?前半部分肯定提供了有关类型的所有信息吗?

最后,我是否错过了 BigQuery 的要点?我应该把当前的数据结构分解成不同的表吗?结构肯定没那么复杂,应该很容易更新吧?!

【问题讨论】:

您需要确保连接的数组具有完全相同的模式!所有名称,类型必须相同!否则 - 正是那个错误出现了!在您之前回答的两个类似问题中-这正是解决您问题的方法。你需要在这里应用相同的技术!如果仍然有问题并且您仍然需要帮助 - 您应该提供一种创建表语句,它将重现您的数据,以便我们可以使用它 【参考方案1】:

只是快速拍摄 - 试试下面(我发现两个地方的类型是 STRING 但应该是 TIMESTAMP)

UPDATE `table` SET things = things ||  [
        struct<key STRING, lifeid STRING, monthly STRING, thingtype STRING, thingvalue INTEGER, 
            provider struct<premium STRING, provider STRING, rank STRING>, 
            randomcompany struct< 
                someBool BOOLEAN,
                anotherBool struct<code STRING, description STRING, listName STRING>,
                casedata struct<caseid STRING, dateCreated TIMESTAMP, dateLastModified TIMESTAMP, interviewTerminated BOOLEAN, status STRING>,
                hasAttachments BOOLEAN,
                hasX BOOLEAN,
                lifeId STRING,
                lifeIdentifier STRING,
                status struct<code STRING, description STRING, listName STRING>,
                mibComplete BOOLEAN,
                thingid STRING,
                riskTypes array<struct<code STRING, description STRING, listName STRING>>
            >, 
            termlength STRING, timestamp TIMESTAMP>
        ('example key', 'example lifeid', 'example monthly', 'example thingtype', 4, 
            struct('example premium' as premium, 'example provider' as provider, 'example rank' as rank), 
                (true,
                    struct('a code' as code, 'a description' as description, 'a listName' as listname),
                    struct('6752' as caseid, NULL as dateCreated, NULL as dateLastModified, false as interviewTerminated, 'OPEN' as status),
                    false,
                    false,
                    'LV25APL37',
                    'LV25APL37',
                    struct('a code' as code, 'a description' as description, 'a listName' as listname),
                    false,
                    'ST11006752',
                    [struct('a code' as code, 'a description' as description, 'a listName' as listname)]
                ), 
                'example termlength', NULL
        )
    ]
WHERE id = '7PHh1dN0HdVGCAzIDIVkPWQ4GjI3';

【讨论】:

以上是关于如何修复此 BigQuery 表架构的更新查询?的主要内容,如果未能解决你的问题,请参考以下文章

如何修复 BigQuery 表中意外重复的数据?

从 Bigquery 中的查询复制表

从表一的比较更新表二,bigQuery

BigQuery SQL:将视图 A 中的子查询作为嵌套表嵌入视图 B

如何在 bigquery 标准 sql 中展平结构?

如何通过 BigQuery 中的 WebUI 导出现有表的架构?