根据JSON创建对应的HIVE表

Posted gabry.wu

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了根据JSON创建对应的HIVE表相关的知识,希望对你有一定的参考价值。

  本文提供一种用SCALA把JSON串转换为HIVE表的方法,由于比较简单,只贴代码,不做解释。有问题可以留言探讨

package com.gabry.hive
import org.json4s._
import org.json4s.native.JsonMethods._
import scala.io.Source
class Json2Hive{
  /**
    * sealed abstract class JValue
    *case object JNothing extends JValue // ‘zero‘ for JValue
    *case object JNull extends JValue
    *case class JString(s: String) extends JValue
    *case class JDouble(num: Double) extends JValue
    *case class JDecimal(num: BigDecimal) extends JValue
    *case class JInt(num: BigInt) extends JValue
    *case class JBool(value: Boolean) extends JValue
    *case class JObject(obj: List[JField]) extends JValue
    *case class JArray(arr: List[JValue]) extends JValue
    *type JField = (String, JValue)
    *create table student_test(id INT, info struct< name:string,age:INT >)
    *jsonString:{ "people_type":1,"people":{"person_id": 5,"test_count": 5,"para":{"name":"jack","age":6}}}
    */

  private def fieldDelimiter(level:Int) = if ( level == 2 ) " " else ":"
  private def decodeJson(jv: Any,level:Int,hql:StringBuilder) :Unit = {
    jv match {
      case js:JString => hql.append(fieldDelimiter(level)+"string,")
      case jdo:JDouble => hql.append(fieldDelimiter(level)+"double,")
      case jde:JDecimal => hql.append(fieldDelimiter(level)+"decimal,")
      case ji:JInt => hql.append(fieldDelimiter(level)+"bigint,")
      case jb:JBool => hql.append(fieldDelimiter(level)+"int,")
      case jf:JField=>
        hql.append(jf._1)
        decodeJson(jf._2,level+1,hql)
      case ja:JArray=>
          hql.append(level + " struct<")
          ja.arr.foreach(decodeJson(_,level+1,hql))
          hql.append(">")
      case jo:JObject=>
          if (level !=0) hql.append(" struct<")
          jo.obj.foreach(decodeJson(_,level+1,hql))
          if ( hql.endsWith(",") ) hql.deleteCharAt(hql.length-1)
          if (level !=0) hql.append(">,")
      case JNull=> hql.append(fieldDelimiter(level)+"string,")
      case _ =>println(jv)
    }
  }
  def toHive(jsonStr:String,tableName:String):String = {
    val jsonObj = parse(jsonStr)
    val hql = new StringBuilder()
    decodeJson(jsonObj,0,hql)
    "create table %s ( %s )".format(tableName,hql.toString())
  }
}
object Json2Hive{
  val json2hive = new Json2Hive()
  def main (args :Array[String]) : Unit = {
    if ( args.length != 2 ) println("usage : json2hive jsonFile hiveTableName")
    val jsonFile = args(0)
    val hiveTableName = args(1)
    //val jsonstr ="{ \"people_type\":0,\"people_num\":0.1,\"people\":{\"person_id\": 5,\"test_count\": 5,\"para\":{\"name\":\"jack\",\"age\":6}},\"gender\":1}"
    //val jsonstr ="{ \"people_type\":0,\"object\":{\"f1\":1,\"f2\":1},\"gender\":1}"
/* 由于JSON串不容易用参数传递,故此处以json文件代替 */ val file = Source.fromFile(jsonFile,"UTF-8")
/* 将文件中的json串转换为对应的HIVE表 */ file.getLines().foreach(line=>println(json2hive.toHive(line.toString,hiveTableName))) file.close() } }

  

以下是测试结果

 

create table example ( people_type bigint,people_num double,people struct<person_id:bigint,test_count:bigint,para struct<name:string,age:bigint>>,gender bigint )

以上是关于根据JSON创建对应的HIVE表的主要内容,如果未能解决你的问题,请参考以下文章

创建function实现hive表结果导出到mysql

Hive 创建表:解析 json 文件时解析错误缺少“>”

使用 pyspark 来自 JSON 数据的 Hive 表

实时即未来,大数据项目车联网之原始数据实时ELT流式任务流程总结

实时即未来,大数据项目车联网之原始数据实时ELT流式任务流程总结

Hive处理Json数据