hive元数据查询
Posted 盛源_01
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了hive元数据查询相关的知识,希望对你有一定的参考价值。
一、概念参考
二、查询库信息
-- 简单查询
select
`DB_ID`
,`NAME`
,`DB_LOCATION_URI`
,`DESC`
from
HDBS
where
NAME = 'db_name'
;
三、查询表信息
-- 查询表信息
select
*
from
TBLS
where
TBL_NAME = 'eu-west-1'
limit 2
;
-- 查询某些表的ID
select
*
from (
select
TBL_ID
,concat(NAME,'.',TBL_NAME) as name
from ( -- 表信息
select
DB_ID
,TBL_ID
,TBL_NAME
from
TBLS
where
REGION = 'eu-west-1'
) aa
join ( -- 库信息
select
DB_ID
,NAME
from
DBS
where
REGION = 'eu-west-1'
) bb
on aa.DB_ID = bb.DB_ID
) aa
where
name in ('ssjt.shy', 'ssjt.shy2')
;
-- 选择某个库下的一批表
select
aa.TBL_ID
,bb.NAME as db_name
,aa.TBL_NAME as tb_name
,concat(bb.NAME,'.',aa.TBL_NAME) as name
from (
select
DB_ID
,TBL_ID
,TBL_NAME
from
TBLS
where
REGION = 'eu-west-1'
AND DELETE_TIME = 0
) aa
join (
select
DB_ID
,NAME
from
DBS
where
REGION = 'eu-west-1'
and NAME = 'tranods'
) bb
on aa.DB_ID = bb.DB_ID
limit 50
;
-- 每个库下有多少张表
select
bb.NAME
,aa.cnt
,aa.cnt2
,aa.error
from (
select
DB_ID
,COUNT(TBL_NAME) cnt
,COUNT( DISTINCT TBL_NAME) cnt2
,COUNT(TBL_NAME) - COUNT( DISTINCT TBL_NAME) error
from
TBLS
where
REGION = 'eu-west-1'
AND DELETE_TIME = 0
AND TBL_NAME NOT LIKE 'values__tmp__table__%' -- 指定表的限制条件
group by
DB_ID
) aa
join (
select
DB_ID
,NAME
from
DBS
where
REGION = 'eu-west-1'
) bb
on aa.DB_ID = bb.DB_ID
order by
aa.cnt desc
;
-- 查询表的路径
select
concat(bb.NAME, '.', aa.TBL_NAME) as name
,bb.NAME db_name
,aa.TBL_NAME tb_name
,cc.LOCATION as location
from
TBLS aa
join
DBS bb
on aa.DB_ID = bb.DB_ID
join
SDS cc
on aa.SD_ID = cc.SD_ID
where
aa.DELETE_TIME = 0
AND aa.REGION = 'eu-west-1'
AND bb.REGION = 'eu-west-1'
AND cc.DELETE_TIME = 0
AND cc.LOCATION like '%-ind%'
;
-- 表分区的最新修改时间
select
concat(dd.NAME, '.', cc.TBL_NAME) as name
,dd.NAME db_name
,cc.TBL_NAME tb_name
,aa.last_time
,bb.PARAM_VALUE last_modified
from ( -- 分区最新更新时间
select
TBL_ID
,max(UPDATE_TIME) last_time
from
HIVE_PARTITIONS
where
DELETE_TIME = 0
group by
TBL_ID
) aa
join -- 最后修改者
HIVE_TABLE_PARAMS bb
on aa.TBL_ID = bb.TBL_ID
join -- 表名信息
HIVE_TBLS cc
on aa.TBL_ID = cc.TBL_ID
join -- 库名信息
HIVE_DBS dd
on cc.DB_ID = dd.DB_ID
where
aa.last_time < 1659283200000 -- 过滤2022-08-01后没再更新的表
AND bb.PARAM_KEY = 'last_modified_by'
;
四、查询分区信息
-- 查询某些表的分区数
-- 限定表统计分区,然后再关联出库名、表名
select
concat(cc.NAME, '.', bb.TBL_NAME) as name
,aa.cnt
from (
select
TBL_ID
,count(1) as cnt
from
HIVE_PARTITIONS
where
DELETE_TIME = 0
AND TBL_ID IN ( -- 指定要查询的表
select
TBL_ID
from (
select
TBL_ID
,concat(NAME,'.',TBL_NAME) as name
from (
select
DB_ID
,TBL_ID
,TBL_NAME
from
HIVE_TBLS
where
REGION = 'eu-west-1'
AND DELETE_TIME = 0
) aa
join (
select
DB_ID
,NAME
from
HIVE_DBS
where
REGION = 'eu-west-1'
) bb
on aa.DB_ID = bb.DB_ID
) aa
where
name in ('ssjt.shy', 'ssjt.shy')
)
group by
TBL_ID
) aa
join
HIVE_TBLS bb
on aa.TBL_ID = bb.TBL_ID
join
HIVE_DBS cc
on bb.DB_ID = cc.DB_ID
;
-- 不限定表查询分区数
select
concat(cc.NAME, '.', bb.TBL_NAME) as name
,aa.cnt
from (
select
TBL_ID
,count(1) as cnt
from
HIVE_PARTITIONS
where
DELETE_TIME = 0
group by
TBL_ID
) aa
join
HIVE_TBLS bb
on aa.TBL_ID = bb.TBL_ID
join
HIVE_DBS cc
on bb.DB_ID = cc.DB_ID
where
bb.REGION = 'eu-west-1'
AND bb.DELETE_TIME = 0
AND cc.REGION = 'eu-west-1'
order by
aa.cnt desc
limit 100
;
五、查询函数信息
-- 查询函数
select
*
from
funcs aa
where
FUNC_NAME='getwordandtranslate'
;
-- 查询资源
select
*
from
func_ru
where
FUNC_ID = 28
;
-- 查询详细信息
select
bb.NAME
,aa.FUNC_NAME
,aa.CLASS_NAME
,cc.RESOURCE_URI
from
funcs aa
join
dbs bb
on aa.DB_ID = bb.DB_ID
join
func_ru cc
on aa.FUNC_ID = cc.FUNC_ID
;
以上是关于hive元数据查询的主要内容,如果未能解决你的问题,请参考以下文章