text Hive array,map,struct复杂数据结构
-- 链接内含: map, struct, array的创建和访问方式
-- 建议建立一个dummy表, 只插入一条数据即可, 便于select操作使用
-- struct类型的任何字段不支持null赋值, 必须给每个字段手动置'', 0.0这种的, 如果实在想用null, 那么需要cast(null as string)
drop table if exists tmp.test_struct_array_struct;
create table `tmp.test_struct_array_struct` (
`field_0` string,
`charge_data` STRUCT<fake_price: double, red_envelope: ARRAY<STRUCT<id: string, price: double>>>
COMMENT '测试结构体-数组嵌套'
ROW FORMAT delimited
fields terminated by '\t';
insert into table tmp.test_struct_array_struct
SELECT '1', NAMED_STRUCT('fake_price', 100.0, 'red_envelope', ARRAY(NAMED_STRUCT('id', 're10', 'price', 10.0), NAMED_STRUCT('id', 're11', 'price', 11.0))) FROM tmp.test_struct_array limit 1;
-- TODO 空ARRAY的时候怎么处理
insert into table tmp.test_struct_array_struct
SELECT '2', NAMED_STRUCT('fake_price', 200.0, 'red_envelope', ARRAY(NAMED_STRUCT('id', '', 'price', 0.0))) FROM tmp.test_struct_array limit 1;
-- TODO 不通过
insert into table tmp.test_struct_array_struct
SELECT '2', NAMED_STRUCT('fake_price', 200.0, 'red_envelope', ARRAY(NAMED_STRUCT())) FROM tmp.test_struct_array limit 1;
drop table if exists tmp.test_struct_array;
create table `tmp.test_struct_array` (
`field_0` string,
`charge_data` ARRAY<STRUCT<id: string, price: double>>
COMMENT '测试结构体-数组嵌套'
ROW FORMAT delimited
fields terminated by '\t';
--- 测试数据(通过)
1 re10:10.0,re11:11.0
2 re2:20.0
-- 提取嵌套结构: INSERT SELECT(通过)
insert into table tmp.test_struct_array
SELECT '1', array(NAMED_STRUCT('id', 're100', 'price', 100.0)) FROM tmp.test_struct_array limit 1;
insert into table tmp.test_struct_array
SELECT '1', charge_data.red_envelope FROM tmp.test_struct_array_struct limit 1;
drop table if exists tmp.test_map;
create table `tmp.test_map` (
`field_0` string,
`charge_data` Map<string, string>
COMMENT '测试map'
ROW FORMAT delimited
fields terminated by '\t';
insert into table tmp.test_map
SELECT '1', map() FROM tmp.test_struct_array_struct limit 1;
drop table if exists tmp.test_array;
create table `tmp.test_array` (
`field_0` string,
`charge_data` ARRAY<string>
COMMENT '测试数组'
ROW FORMAT delimited
fields terminated by '\t';
-- array类型为string时, 空数组通过, 返回[]
insert into table tmp.test_array
SELECT '1', array() FROM tmp.test_struct_array limit 1;
-- 结构体不可以赋值为null或struct()
insert into table tmp.test_struct partition(dt='2018-06-27')
SELECT null FROM tmp.test_struct_array limit 1;
drop table if exists tmp.test_array_map;
create table `tmp.test_array_map` (
`field_0` string,
`charge_data` array<Map<string, string>>
COMMENT '测试array, map嵌套'
ROW FORMAT delimited
fields terminated by '\t';
insert into table tmp.test_array_map
SELECT '1', array(map()) FROM tmp.test_struct_array_struct limit 1;
drop table if exists tmp.test_struct_array_map;
create table `tmp.test_struct_array_map` (
`field_0` string,
`charge_data` STRUCT<fake_price: double, red_envelope: ARRAY<MAP<string, string>>>
COMMENT '测试struct, array, map嵌套'
ROW FORMAT delimited
fields terminated by '\t';
-- 测试不通过
insert into table tmp.test_struct_array_map
SELECT '1', named_struct('fake_price', 100.0, 'red_envelope', array(map())) FROM tmp.test_struct_array_struct limit 1;
insert into table tmp.test_struct_array_map
SELECT '1', NAMED_STRUCT('fake_price', 100.0, 'red_envelope', ARRAY(map('id', 're10', 'price', '10.0'), map('id', 're11', 'price', '11.0'))) FROM tmp.test_struct_array limit 1;
-- 测试通过
select cast(charge_data.red_envelope[0]['price'] as double), cast(charge_data.red_envelope[1]['price'] as double) FROM tmp.test_struct_array_map;

参考链接2: https://blog.csdn.net/sl1992/article/details/53894481
关于struct的讨论文章, 以及为什么不支持null的解释:https://technicaltidbit.blogspot.com/2013/02/hive-struct-tips.html
关于hive嵌套数据结构分隔符问题的解释(总结: 一共内置8级, 仅支持手动覆盖前三级, 默认是从\001到\008):
As answered in: HIVE nested ARRAY in MAP data type, you can only override the first three delimiters in hive, while hive actually supports 8. In nested data structures, for each nesting level, a consequent delimiter is used.
In your hive table, the delimiter between fields in the struct that is inside the address map is \u004 (Unicode 4), and it can't be overridden.
