使用 Apache Drill 查询嵌套的复杂 json 结构

Posted

技术标签:

【中文标题】使用 Apache Drill 查询嵌套的复杂 json 结构【英文标题】:Query nested complex json structure with Apache Drill 【发布时间】:2017-06-06 19:24:01 【问题描述】:

我有一个包含许多像这样的 json 对象的 json 文件


"created_time": "2015-04-15T15:12:43+0000",
"updated_time": "2015-04-15T15:12:43+0000",
"comments": 
    "data": [
        
            "created_time": "2015-04-15T18:09:06+0000",
            "can_remove": false,
            "like_count": 4,
            "user_likes": false,
            "from": 
                "name": "Ana Paula Mandlaze Gomez",
                "id": "1413157615397359"
            ,
            "id": "981539778532568_981613971858482",
            "message": "\"Many people, especially ignorant people, want to punish you for speaking the truth, for being correct, for being you. Never apologize for being correct, or for being years ahead of your time. If you' re right and you know it, speak your mind. Even if you are a minority  of one, the truth is still the truth.\"Gandhi"
        ,
        
            "created_time": "2015-04-15T16:33:27+0000",
            "can_remove": false,
            "like_count": 2,
            "user_likes": false,
            "from": 
                "name": "Ana Paula Mandlaze Gomez",
                "id": "1413157615397359"
            ,
            "id": "981539778532568_981574091862470",
            "message": "I love I love GHANDI"
        ,
        
            "created_time": "2015-04-22T19:47:59+0000",
            "can_remove": false,
            "like_count": 0,
            "user_likes": false,
            "from": 
                "name": "Fardous Alhorafi",
                "id": "1800049323548924"
            ,
            "id": "981539778532568_985418828144663",
            "message": "كان عبقري"
        ,
        
            "created_time": "2015-04-15T21:06:35+0000",
            "can_remove": false,
            "like_count": 0,
            "user_likes": false,
            "from": 
                "name": "Jamal Abdulrahman",
                "id": "1366198393416315"
            ,
            "id": "981539778532568_981681661851713",
            "message": "This great leader"
        ,
        
            "created_time": "2015-04-15T15:35:42+0000",
            "can_remove": false,
            "like_count": 0,
            "user_likes": false,
            "from": 
                "name": "Sergio Pellicciotta",
                "id": "1307515282677785"
            ,
            "id": "981539778532568_981548471865032",
            "message": "ci vuole la pazienza"
        ,
        
            "created_time": "2015-04-15T16:31:44+0000",
            "can_remove": false,
            "like_count": 0,
            "user_likes": false,
            "from": 
                "name": "Merve Tacirli",
                "id": "773109599535260"
            ,
            "id": "981539778532568_981573545195858",
            "message": "Key word: patient"
        ,
        
            "created_time": "2015-08-15T21:36:38+0000",
            "can_remove": false,
            "like_count": 0,
            "user_likes": false,
            "from": 
                "name": "Tu Abuela",
                "id": "10212662776625548"
            ,
            "id": "981539778532568_1046913331995212",
            "message": ":)"
        ,
        
            "created_time": "2015-04-15T19:38:53+0000",
            "can_remove": false,
            "like_count": 0,
            "user_likes": false,
            "from": 
                "name": "Vikash Tiwari",
                "id": "977320485738496"
            ,
            "id": "981539778532568_981645241855355",
            "message": "sadar naman bapu"
        ,
        
            "created_time": "2015-04-15T15:29:21+0000",
            "can_remove": false,
            "like_count": 0,
            "user_likes": false,
            "from": 
                "name": "Nikunj Patel",
                "id": "1370944359688528"
            ,
            "id": "981539778532568_981545948531951",
            "message": "Aaapko khiladi ka salaaM"
        ,
        
            "created_time": "2015-04-16T06:01:33+0000",
            "can_remove": false,
            "like_count": 0,
            "user_likes": false,
            "from": 
                "name": "Satya Prakash",
                "id": "791386747691183"
            ,
            "id": "981539778532568_981861258500420",
            "message": "......patience es depth of laef.."
        ,
        
            "created_time": "2015-04-27T20:46:37+0000",
            "can_remove": false,
            "like_count": 0,
            "user_likes": false,
            "from": 
                "name": "Javier Arce",
                "id": "1353955594657881"
            ,
            "id": "981539778532568_988593714493841",
            "message": "COMIC - THE DAY GANDHI DIE - COMIC https://www.youtube.com/watch?v=_g7KpUx0qSQ"
        ,
        
            "created_time": "2015-04-15T15:28:59+0000",
            "can_remove": false,
            "like_count": 0,
            "user_likes": false,
            "from": 
                "name": "Nikunj Patel",
                "id": "1370944359688528"
            ,
            "id": "981539778532568_981545825198630",
            "message": "Bhai"
        
    ],
    "paging": "cursors": 
        "before": "MTYZD",
        "after": "MQZDZD"
    
,
"status_type": "added_photos",
"link": "https://www.facebook.com/FatherofNation/photos/a.279218472098039.73934.173835512636336/981539778532568/?type=3",
"icon": "https://www.facebook.com/images/icons/photo.gif",
"is_hidden": false,
"privacy": 
    "allow": "",
    "deny": "",
    "description": "",
    "value": "",
    "friends": ""
,
"message": "You cannot achieve durable reform by becoming impatient. - Mahatma Gandhi",
"type": "photo",
"object_id": "981539778532568",
"picture": "https://fb-s-c-a.akamaihd.net/h-ak-fbx/v/t1.0-0/s130x130/10300287_981539778532568_5512778811597106826_n.jpg?oh=900c8f181224cef2b2b93688a0ef9d88&oe=599C0E7D&__gda__=1508137453_7f9508a338248a38665e21299f8410bb",
"shares": "count": 140,
"is_expired": false,
"name": "Timeline Photos",
"from": 
    "name": "Mahatma Gandhi",
    "id": "173835512636336",
    "category": "Public Figure"
,
"id": "173835512636336_981539778532568",
"likes": 
    "data": [
        
            "name": "Rai Santi",
            "id": "699214820279365"
        ,
        
            "name": "Rabari Bhavesh Bhandu",
            "id": "197158197474003"
        ,
        
            "name": "Cristiano Hussaini",
            "id": "469179660091433"
        ,
        
            "name": "D Pák Ský",
            "id": "342182466199173"
        ,
        
            "name": "Joseph Muendo",
            "id": "893394437480382"
        ,
        
            "name": "Kati Vali",
            "id": "10209489431495182"
        ,
        
            "name": "Neelam Gupta",
            "id": "278625325934685"
        ,
        
            "name": "Rahul Patel",
            "id": "298644327257948"
        ,
        
            "name": "Niti Jani",
            "id": "1520081698036960"
        ,
        
            "name": "Sara Maria Crespo Echeandía",
            "id": "10155452589526055"
        ,
        
            "name": "Sonia Quintana Castillo",
            "id": "485471615125223"
        ,
        
            "name": "Tello Maweng",
            "id": "720777431451520"
        ,
        
            "name": "David Figueroa",
            "id": "1886084661665963"
        ,
        
            "name": "Aphle Grace Celestre Devera",
            "id": "1517425511643699"
        ,
        
            "name": "Angel Legaspi Alquizar",
            "id": "327218104364636"
        ,
        
            "name": "Sanjeev Mishra",
            "id": "1207468336065542"
        ,
        
            "name": "Patricia Fastinger",
            "id": "1956586287688483"
        ,
        
            "name": "Bojana Simonovska",
            "id": "10155588604414614"
        ,
        
            "name": "Nath Schmidt",
            "id": "1337540319645871"
        ,
        
            "name": "Santoshi Nallanchakravartula",
            "id": "1364678026914798"
        ,
        
            "name": "Pabitra Kumar",
            "id": "631689807038744"
        ,
        
            "name": "Thaís Araujo",
            "id": "1350769495004631"
        ,
        
            "name": "Dilip Singh Rathore",
            "id": "1711441295817067"
        ,
        
            "name": "Kamlesh Saini",
            "id": "1987451464817042"
        ,
        
            "name": "SaDhu Ka",
            "id": "1490063764348943"
        
    ],
    "paging": 
        "next": "https://graph.facebook.com/v2.3/173835512636336_981539778532568/likes?access_token=166536670200421%7Cb8948bb3c57f35a94afee5d5869854b7&limit=25&after=MTQ5MDA2Mzc2NDM0ODk0MwZDZD",
        "cursors": 
            "before": "Njk5MjE0ODIwMjc5MzY1",
            "after": "MTQ5MDA2Mzc2NDM0ODk0MwZDZD"
        
    

我想使用 apache Drill 进行 SQL 查询以访问嵌套对象,例如 cmets.data[0].message。我尝试了所有方法并按照官方指南查询复杂的 JSON 数据,但没有奏效。

选择 cmets.data[0].message FROM dfs./home/dhaker/Desktop/GhandiOffcialPage/Facebookposts.json

这是来自 jdbc 的错误:

验证错误:从第 1 行第 8 列到第 1 行第 15 列:未找到表“cmets”

SQL 查询为空

[错误 ID:dhaker-X550V:31010 上的 c50f3051-79f8-4f35-b9bd-fd51c06ecb61] []

【问题讨论】:

【参考方案1】:

我必须为路径添加一个别名,然后像这样调用:

从 dfs./home/dhaker/Desktop/GhandiOffcialPage/Facebookposts.json 帖子中选择 posts.cmets.data[0].message;

这样可以正常工作。

【讨论】:

以上是关于使用 Apache Drill 查询嵌套的复杂 json 结构的主要内容,如果未能解决你的问题,请参考以下文章

Apache Drill 与 mongodb。在地图中查询具有特定键和值的文档

* 使用 apache Drill 将 saiku 与 mongo 连接时代替键

无法使用 apache Drill 1.2 配置 postgreSQL JDBC 驱动程序

我可以在 Raspberry Pi 上运行 Apache Drill 并发现查询计划的物理成本吗?

如何在 Apache Drill 中加入不等式?

Apache Drill - hiveserver2 jdbc 错误