各种数据格式的Hive建表语句

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了各种数据格式的Hive建表语句相关的知识,希望对你有一定的参考价值。

Xml格式

CREATE EXTERNAL TABLE Gateway_pmsarisoap(

BookingSoapLogID STRING,?

GuidNo STRING,?

SoapType STRING,?

SoapContent STRING,?

InsertDate STRING,?

SourceOpsType STRING)

PARTITIONED BY (?

? `dt` string)

ROW FORMAT SERDE ‘com.ibm.spss.hive.serde2.xml.XmlSerDe‘

WITH SERDEPROPERTIES (

"column.xpath.BookingSoapLogID"="/HWSoapBase/BookingSoapLogID/text()",

"column.xpath.GuidNo"="/HWSoapBase/GuidNo/text()",

"column.xpath.SoapType"="/HWSoapBase/SoapType/text()",

"column.xpath.SoapContent"="/HWSoapBase/SoapContent/*",

"column.xpath.InsertDate"="/HWSoapBase/InsertDate/text()",

"column.xpath.SourceOpsType"="/HWSoapBase/SourceOpsType/text()"

)

STORED AS

INPUTFORMAT ‘com.ibm.spss.hive.serde2.xml.XmlInputFormat‘

OUTPUTFORMAT ‘org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat‘

LOCATION ‘hdfs://ns1/wh/source/hw/Gateway/PmsARISoap‘

TBLPROPERTIES (

"xmlinput.start"="<HWSoapBase",

"xmlinput.end"="</HWSoapBase>"

);

Json格式

CREATE EXTERNAL TABLE QuhuhuGateway_pmsinvcountnotify(

CountType string,

Count string,

HotelCode string,

Start string,

`End` string)?

PARTITIONED BY (dt string)?

ROW FORMAT SERDE ‘com.cloudera.hive.serde.JSONSerDe‘?

STORED AS INPUTFORMAT ‘com.hadoop.mapred.DeprecatedLzoTextInputFormat‘?

OUTPUTFORMAT ‘org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat‘?

LOCATION ‘hdfs://ns1/wh/source/hw/QuhuhuGateway/PmsInvCountNotify‘;

ORC格式

create external table BWAdmin_Log(

? `LogID` BIGINT,?

? `AccountID` BIGINT,??

? `VHotelID` BIGINT,?

? `LogType` String,?

? `LogComment` String,

? `OperateTime` INT

)

row format delimited

fields terminated by ‘\t‘

STORED AS ORC

location?

‘hdfs://ns1/wh/source/bw/hotel/admin_log‘

AVRO格式

CREATE EXTERNAL TABLE `hotel_list`

PARTITIONED BY ( `dt` string)

ROW FORMAT SERDE ‘org.apache.hadoop.hive.serde2.avro.AvroSerDe‘

WITH SERDEPROPERTIES( ‘avro.schema.url‘=‘hdfs://ns1/wh/config/schema/web/online/hotel_list.avsc‘)

STORED AS INPUTFORMAT ‘org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat‘

OUTPUTFORMAT ‘org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat‘

LOCATION ‘hdfs://ns1/wh/format/online_search‘;

LZO格式

CREATE EXTERNAL TABLE online_test(

sid int,

pvid int,

ts bigint)

PARTITIONED BY ( dt string)

ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘\t‘

LINES TERMINATED BY ‘\n‘

STORED AS INPUTFORMAT‘com.hadoop.mapred.DeprecatedLzoTextInputFormat‘

OUTPUTFORMAT ‘org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat‘

LOCATION ‘hdfs://ns1/test/online‘;

TEXT格式

CREATE EXTERNAL TABLE `order_currenthis`(

`orderid` string,

`room` int)

PARTITIONED BY ( `dt` string)

ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘\t‘ LINES TERMINATED BY ‘\n‘

STORED AS INPUTFORMAT ‘org.apache.hadoop.mapred.TextInputFormat‘

OUTPUTFORMAT ‘org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat‘

LOCATION ‘hdfs://ns1/wh/format/otb/order_currenthis‘

以上是关于各种数据格式的Hive建表语句的主要内容,如果未能解决你的问题,请参考以下文章

hive的几种文件格式

hive 非正确json格式字段造成查询错误

hive 建表方式及参数详解

hive建表详注小记(备忘)

Hive 文件格式

大数据入门-五分钟读懂Hive