hive的特殊函数named_structinline及特殊类型map<string, array<string>>
Posted 风路丞
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了hive的特殊函数named_structinline及特殊类型map<string, array<string>>相关的知识,希望对你有一定的参考价值。
spark-sql> desc test2;
userid int NULL
date string NULL
list_01 array<string> NULL
target string NULL
group_num int NULL
Time taken: 0.526 seconds, Fetched 5 row(s)
spark-sql> select * from test2;
1 20220303 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 1:20220303 1
1 20220202 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 1:20220202 1
1 20220202 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 1:20220202 1
2 20220202 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 2:20220202 2
3 20220303 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 3:20220303 2
3 20220303 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 3:20220303 2
3 20220303 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 3:20220303 3
3 20220303 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 3:20220303 3
spark-sql> select named_struct('a',target, 'b',list_01) from test2 limit 1;
"a":"1:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]
1、构建 类型 struct<key:string,value:array<string>
spark-sql>
drop table if exists test3;
create table test3 as
select
named_struct('a',target, 'b',list_01) as content_shi_group,
group_num
from
test2;
"a":"1:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 1
"a":"1:20220202","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 1
"a":"1:20220202","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 1
"a":"2:20220202","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 2
"a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 2
"a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 2
"a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 3
"a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 3
2、按group_num聚合,生成 类型array<struct<key:string,value:array<string>>
spark-sql>
drop table if exists test4;
create table test4 as
select
group_num, collect_list(content_shi_group) as list_struct
from test3
group by group_num;
1 ["a":"1:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"],"a":"1:20220202","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"],"a":"1:20220202","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]]
3 ["a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"],"a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]]
2 ["a":"2:20220202","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"],"a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"],"a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]]
3、类型 array<struct<key:string,value:array<string>> 转 map<string,array<string>>
spark-sql>
drop table if exists test5;
create table test5 as
select
group_num, map(k1,v1) as map_list
from test4
lateral view inline(list_struct) t1 as k1,v1;
1 "1:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]
1 "1:20220202":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]
1 "1:20220202":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]
3 "3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]
3 "3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]
2 "2:20220202":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]
2 "3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]
2 "3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]
spark-sql> select inline(list_struct) from test4 where group_num=1;
1:20220303 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]
1:20220202 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]
1:20220202 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]
4、直接生成hive map<string, array<String>>类型
spark-sql>
drop table if exists test3_02;
create table test3_02 as
select
map(target, list_01) as content_group,
group_num
from
test2 ;
"1:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 1
"1:20220202":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 1
"1:20220202":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 1
"2:20220202":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 2
"3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 2
"3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 2
"3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 3
"3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 3
spark-sql> desc test3_02;
content_group map<string,array<string>> NULL
group_num int NULL
以上是关于hive的特殊函数named_structinline及特殊类型map<string, array<string>>的主要内容,如果未能解决你的问题,请参考以下文章