hive元数据查询

Posted 盛源_01

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了hive元数据查询相关的知识,希望对你有一定的参考价值。

一、概念参考

Hive的元数据存储与元数据表

二、查询库信息

-- 简单查询
select
    `DB_ID`
    ,`NAME`
    ,`DB_LOCATION_URI`
    ,`DESC`
from
    HDBS
where
    NAME = 'db_name' 
;

三、查询表信息

-- 查询表信息
select
    *
from
    TBLS
where
    TBL_NAME = 'eu-west-1'
limit 2
;


-- 查询某些表的ID
select
    *
from (

    select
        TBL_ID
        ,concat(NAME,'.',TBL_NAME) as name
    from ( -- 表信息
            select
                DB_ID
                ,TBL_ID
                ,TBL_NAME
            from
                TBLS
            where
                REGION = 'eu-west-1'
        ) aa
    join ( -- 库信息
            select
                DB_ID
                ,NAME
            from
                DBS
            where
                REGION = 'eu-west-1'
        ) bb
        on aa.DB_ID = bb.DB_ID
) aa
where
    name in ('ssjt.shy', 'ssjt.shy2')
;


-- 选择某个库下的一批表
select
    aa.TBL_ID
    ,bb.NAME as db_name
    ,aa.TBL_NAME as tb_name
    ,concat(bb.NAME,'.',aa.TBL_NAME) as name
from (
        select
            DB_ID
            ,TBL_ID
            ,TBL_NAME
        from
            TBLS
        where
            REGION = 'eu-west-1'
            AND DELETE_TIME = 0
    ) aa
join (
        select
            DB_ID
            ,NAME
        from
            DBS
        where
            REGION = 'eu-west-1'
            and NAME = 'tranods'
    ) bb
    on aa.DB_ID = bb.DB_ID
limit 50
;


-- 每个库下有多少张表
select
    bb.NAME
    ,aa.cnt
    ,aa.cnt2
    ,aa.error
from (
        select
            DB_ID
            ,COUNT(TBL_NAME) cnt
            ,COUNT( DISTINCT TBL_NAME) cnt2
            ,COUNT(TBL_NAME) - COUNT( DISTINCT TBL_NAME) error
        from
            TBLS
        where
            REGION = 'eu-west-1'
            AND DELETE_TIME = 0
            AND TBL_NAME NOT LIKE 'values__tmp__table__%'  -- 指定表的限制条件
        group by
            DB_ID
    ) aa
join (
        select
            DB_ID
            ,NAME
        from
            DBS
        where
            REGION = 'eu-west-1'
    ) bb
    on aa.DB_ID = bb.DB_ID
order by
    aa.cnt desc
; 


-- 查询表的路径
select
    concat(bb.NAME, '.', aa.TBL_NAME) as name
    ,bb.NAME db_name
    ,aa.TBL_NAME tb_name
    ,cc.LOCATION as location
from
    TBLS aa
join
    DBS bb
    on aa.DB_ID = bb.DB_ID
join
    SDS cc
    on aa.SD_ID = cc.SD_ID
where 
    aa.DELETE_TIME = 0
    AND aa.REGION = 'eu-west-1'
    AND bb.REGION = 'eu-west-1' 
    AND cc.DELETE_TIME = 0
    AND cc.LOCATION like '%-ind%'
;


-- 表分区的最新修改时间
select
    concat(dd.NAME, '.', cc.TBL_NAME) as name
    ,dd.NAME db_name
    ,cc.TBL_NAME tb_name
    ,aa.last_time
    ,bb.PARAM_VALUE last_modified
from ( -- 分区最新更新时间
    select
        TBL_ID
        ,max(UPDATE_TIME) last_time
    from
        HIVE_PARTITIONS
    where 
        DELETE_TIME = 0
    group by
        TBL_ID
) aa
join -- 最后修改者
    HIVE_TABLE_PARAMS bb
    on aa.TBL_ID = bb.TBL_ID
join  -- 表名信息
    HIVE_TBLS cc
    on aa.TBL_ID = cc.TBL_ID
join  -- 库名信息
    HIVE_DBS dd
    on cc.DB_ID = dd.DB_ID    
where 
    aa.last_time < 1659283200000 -- 过滤2022-08-01后没再更新的表
    AND bb.PARAM_KEY = 'last_modified_by'
;

四、查询分区信息

-- 查询某些表的分区数
-- 限定表统计分区,然后再关联出库名、表名
select 
    concat(cc.NAME, '.', bb.TBL_NAME) as name
    ,aa.cnt
from (
        select
            TBL_ID
            ,count(1) as cnt
        from
            HIVE_PARTITIONS
        where
            DELETE_TIME = 0
            AND TBL_ID IN (  -- 指定要查询的表
                    select
                        TBL_ID
                    from (

                        select
                            TBL_ID
                            ,concat(NAME,'.',TBL_NAME) as name
                        from (
                                select
                                    DB_ID
                                    ,TBL_ID
                                    ,TBL_NAME
                                from
                                    HIVE_TBLS
                                where
                                    REGION = 'eu-west-1'
                                    AND DELETE_TIME = 0
                            ) aa
                        join (
                                select
                                    DB_ID
                                    ,NAME
                                from
                                    HIVE_DBS
                                where
                                    REGION = 'eu-west-1'
                            ) bb
                            on aa.DB_ID = bb.DB_ID
                    ) aa
                    where
                        name in ('ssjt.shy', 'ssjt.shy')
                )
        group by
            TBL_ID
    ) aa
join
    HIVE_TBLS bb
    on aa.TBL_ID = bb.TBL_ID
join 
    HIVE_DBS cc
    on bb.DB_ID = cc.DB_ID
;

-- 不限定表查询分区数
select 
    concat(cc.NAME, '.', bb.TBL_NAME) as name
    ,aa.cnt
from (
        select
            TBL_ID
            ,count(1) as cnt
        from
            HIVE_PARTITIONS
        where
            DELETE_TIME = 0
        group by
            TBL_ID
    ) aa
join
    HIVE_TBLS bb
    on aa.TBL_ID = bb.TBL_ID
join 
    HIVE_DBS cc
    on bb.DB_ID = cc.DB_ID
where
    bb.REGION = 'eu-west-1'
    AND bb.DELETE_TIME = 0
    AND cc.REGION = 'eu-west-1'
order by
    aa.cnt desc
limit 100
;

五、查询函数信息

-- 查询函数
select
    *
from
    funcs aa
where
    FUNC_NAME='getwordandtranslate'
;

-- 查询资源
select
    *
from
    func_ru
where
    FUNC_ID = 28
;

-- 查询详细信息
select
    bb.NAME
    ,aa.FUNC_NAME
    ,aa.CLASS_NAME
    ,cc.RESOURCE_URI
from
    funcs aa
join
    dbs bb
    on aa.DB_ID = bb.DB_ID
join
    func_ru cc
    on aa.FUNC_ID = cc.FUNC_ID
;

以上是关于hive元数据查询的主要内容,如果未能解决你的问题,请参考以下文章

大数据技术之Hive基本概念安装数据类型

大数据:从入门到XX

安利:Hive初始化元数据库教程

Hive入门

青出于蓝 | Impala和Hive

Hive---浅谈Hive