hive映射Elasticsearch时间date类型问题
Posted 上官沐雪
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了hive映射Elasticsearch时间date类型问题相关的知识,希望对你有一定的参考价值。
hive映射Elasticsearch时间date类型问题
hive映射es时间date类型出现问题,需要自定义类读取时间类型,如下:
1.建立hive与es映射表
CREATE EXTERNAL TABLE `hive_es_dt_basic4_20210724_02`(
`id` string COMMENT 'from deserializer',
`country` string COMMENT 'from deserializer',
`regstatus` string COMMENT 'from deserializer',
`districtcode` string COMMENT 'from deserializer',
`opfrom` string COMMENT 'from deserializer',
`taxpayeridentifynum` string COMMENT 'from deserializer',
`brief` string COMMENT 'from deserializer',
`address` string COMMENT 'from deserializer',
`brands` string COMMENT 'from deserializer',
`updateTime` TIMESTAMP COMMENT 'from deserializer',
`dataSources` int COMMENT 'from deserializer')
ROW FORMAT SERDE
'org.elasticsearch.hadoop.hive.EsSerDe'
STORED BY
'org.elasticsearch.hadoop.hive.EsStorageHandler'
WITH SERDEPROPERTIES (
'serialization.format'='1')
TBLPROPERTIES (
'es.index.auto.create'='false',
'es.mapping.id'='id',
'es.mapping.names'='country:country,regStatus:regStatus,districtCode:districtCode,regCapital:regCapital,city:city,emailList:emailList,cityCode:cityCode,regOrg:regOrg,location:location,registerNum:registerNum,smallIndustryCode:smallIndustryCode,from:from,size:size,shortName:shortName,tmName:tmName,bigIndustryCode:bigIndustryCode,provinceCode:provinceCode,createTime:createTime,updateTime:updateTime,dataSources:dataSources',
'es.nodes'='ip1,ip2,ip3,ip4,ip5',
'es.port'='9200',
'es.read.metadata'='true',
'es.read.metadata.field'='_metadata',
'es.resource'='dt_basic4/dt_basic',
'last_modified_by'='root',
'es.date.format'='yyyy-MM-dd HH:mm:ss',
'es.ser.reader.value.class'='com.zhendao.MyESReader',
'last_modified_time'='1617801697',
'transient_lastDdlTime'='1617801697');
参数:
‘es.date.format’=‘yyyy-MM-dd HH:mm:ss’ 定义数据类型格式
‘es.ser.reader.value.class’=‘com.zhendao.MyESReader’ 用自定义类读取es
2.自定义com.zhendao.MyESReader类
package com.zhendao;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.elasticsearch.hadoop.cfg.Settings;
import org.elasticsearch.hadoop.hive.HiveValueReader;
import javax.xml.bind.DatatypeConverter;
import java.sql.Timestamp;
import java.text.ParseException;
import java.text.ParsePosition;
import java.text.SimpleDateFormat;
import java.util.Date;
public class MyESReader extends HiveValueReader {
private String dateFormat;
private static final String DEFALUT_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
@Override
public void setSettings(Settings settings) {
super.setSettings(settings);
dateFormat = settings.getProperty("es.date.format");
}
@Override
protected Object parseDate(String value, boolean richDate) {
if (value != null && value.trim().length() > 11 && DEFALUT_DATE_FORMAT.equalsIgnoreCase(dateFormat)) {
return (richDate ? new TimestampWritable(new Timestamp(parseDate(value,
DEFALUT_DATE_FORMAT).getTime())) : parseString(value));
}
/**如果没有设置日期格式,通过默认的方式支持,以避免使用新的ValueReader后影响到其它的外部表**/
return super.parseDate(value, richDate);
}
private static Date parseDate(String stringDate, String format) {
if (stringDate == null) {
return null;
}
try {
return parseDate(stringDate, new String[]{format});
} catch (ParseException e) {
return null;
}
}
public static Date parseDate(String str, String... parsePatterns) throws ParseException {
return parseDateWithLeniency(str, parsePatterns, true);
}
private static Date parseDateWithLeniency(
String str, String[] parsePatterns, boolean lenient) throws ParseException {
if (str == null || parsePatterns == null) {
throw new IllegalArgumentException("Date and Patterns must not be null");
}
SimpleDateFormat parser = new SimpleDateFormat();
parser.setLenient(lenient);
ParsePosition pos = new ParsePosition(0);
for (String parsePattern : parsePatterns) {
String pattern = parsePattern;
// LANG-530 - need to make sure 'ZZ' output doesn't get passed to SimpleDateFormat
if (parsePattern.endsWith("ZZ")) {
pattern = pattern.substring(0, pattern.length() - 1);
}
parser.applyPattern(pattern);
pos.setIndex(0);
String str2 = str;
// LANG-530 - need to make sure 'ZZ' output doesn't hit SimpleDateFormat as it will ParseException
if (parsePattern.endsWith("ZZ")) {
str2 = str.replaceAll("([-+][0-9][0-9]):([0-9][0-9])$", "$1$2");
}
Date date = parser.parse(str2, pos);
if (date != null && pos.getIndex() == str2.length()) {
return date;
}
}
throw new ParseException("读取错误: " + str, -1);
}
}
pom依赖
<dependencies>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-hadoop</artifactId>
<version>6.3.0</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>1.2.1</version>
<exclusions>
<exclusion>
<groupId>org.pentaho</groupId>
<artifactId>pentaho-aggdesigner-algorithm</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.4</version>
</dependency>
</dependencies>
以上是关于hive映射Elasticsearch时间date类型问题的主要内容,如果未能解决你的问题,请参考以下文章
Spark hive to ES Elasticsearch