hive的特殊函数named_structinline及特殊类型map<string, array<string>>

Posted 风路丞

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了hive的特殊函数named_structinline及特殊类型map<string, array<string>>相关的知识,希望对你有一定的参考价值。

spark-sql> desc test2;
userid int NULL
date string NULL
list_01 array<string> NULL
target string NULL
group_num int NULL
Time taken: 0.526 seconds, Fetched 5 row(s)

spark-sql> select * from test2;
1 20220303 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 1:20220303 1
1 20220202 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 1:20220202 1
1 20220202 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 1:20220202 1
2 20220202 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 2:20220202 2
3 20220303 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 3:20220303 2
3 20220303 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 3:20220303 2
3 20220303 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 3:20220303 3
3 20220303 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 3:20220303 3

spark-sql> select named_struct('a',target, 'b',list_01) from test2 limit 1;
"a":"1:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]

1、构建 类型 struct<key:string,value:array<string>
spark-sql> 
    drop table if exists test3;
    create table test3 as 
    select 
        named_struct('a',target, 'b',list_01) as content_shi_group,
        group_num
    from 
        test2;
"a":"1:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 1
"a":"1:20220202","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 1
"a":"1:20220202","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 1
"a":"2:20220202","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 2
"a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 2
"a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 2
"a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 3
"a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 3

2、按group_num聚合,生成 类型array<struct<key:string,value:array<string>>
spark-sql> 
    drop table if exists test4;
    create table test4 as 
    select 
        group_num, collect_list(content_shi_group) as list_struct
    from test3 
    group by group_num;
1 ["a":"1:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"],"a":"1:20220202","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"],"a":"1:20220202","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]]
3 ["a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"],"a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]]
2 ["a":"2:20220202","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"],"a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"],"a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]]

3、类型 array<struct<key:string,value:array<string>> 转 map<string,array<string>>
spark-sql> 
    drop table if exists test5;
    create table test5 as 
    select 
        group_num, map(k1,v1) as map_list
    from test4 
    lateral view inline(list_struct) t1 as k1,v1;
1 "1:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]
1 "1:20220202":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]
1 "1:20220202":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]
3 "3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]
3 "3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]
2 "2:20220202":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]
2 "3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]
2 "3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]

spark-sql> select inline(list_struct) from test4 where group_num=1;
1:20220303 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]
1:20220202 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]
1:20220202 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]

4、直接生成hive map<string, array<String>>类型
spark-sql> 
    drop table if exists test3_02;
    create table test3_02 as 
    select 
        map(target, list_01) as content_group,
        group_num
    from 
    test2 ;
"1:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 1
"1:20220202":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 1
"1:20220202":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 1
"2:20220202":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 2
"3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 2
"3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 2
"3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 3
"3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 3
spark-sql> desc test3_02;
content_group map<string,array<string>> NULL
group_num int NULL

以上是关于hive的特殊函数named_structinline及特殊类型map<string, array<string>>的主要内容,如果未能解决你的问题,请参考以下文章

Hive入门函数入门

hive一些函数总结

编写scala版hive的自定义函数

hive函数之~行转列与列转行

hive怎样取出字段包含的所有特殊字符

hive distinct关键字