数据仓库工具Hive——系统内置函数

Posted 2022-05-27 小企鹅推雪球!

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了数据仓库工具Hive——系统内置函数相关的知识，希望对你有一定的参考价值。

文章目录

- 系统内置函数

系统内置函数

-- 查看系统自带函数
show functions;

-- 显示自带函数的用法
desc function upper;
desc function extended upper;

日期函数：

-- 当前前日期
select current_date;
select unix_timestamp();

-- 建议使用current_timestamp，有没有括号都可以
select current_timestamp();

-- 时间戳转日期
select from_unixtime(1505456567);
select from_unixtime(1505456567, 'yyyyMMdd');
select from_unixtime(1505456567, 'yyyy-MM-dd HH:mm:ss');

-- 日期转时间戳
select unix_timestamp('2019-09-15 14:23:00');

-- 计算时间差
select datediff('2020-04-18','2019-11-21');
select datediff('2019-11-21', '2020-04-18');

-- 查询当月第几天
select dayofmonth(current_date);

-- 计算月末:
select last_day(current_date);

-- 当月第1天:
select date_sub(current_date, dayofmonth(current_date)-1)

-- 下个月第1天:
select add_months(date_sub(current_date,
dayofmonth(current_date)-1), 1)

-- 字符串转时间（字符串必须为：yyyy-MM-dd格式）
select to_date('2020-01-01');
select to_date('2020-01-01 12:12:12');

-- 日期、时间戳、字符串类型格式化输出标准时间格式
select date_format(current_timestamp(), 'yyyy-MM-dd HH:mm:ss');
select date_format(current_date(), 'yyyyMMdd');
select date_format('2020-06-01', 'yyyy-MM-dd HH:mm:ss');

-- 计算emp表中，每个人的工龄
select *, round(datediff(current_date, hiredate)/365,1)
workingyears from emp;

字符串函数

-- 转小写。lower
select lower("HELLO WORLD");

-- 转大写。upper
select lower(ename), ename from emp;

-- 求字符串长度。length
select length(ename), ename from emp;

-- 字符串拼接。 concat / ||
select empno || " " ||ename idname from emp;
select concat(empno, " " ,ename) idname from emp;

-- 指定分隔符。concat_ws(separator, [string | array(string)]+)
SELECT concat_ws('.', 'www', array('lagou', 'com'));
select concat_ws(" ", ename, job) from emp;

-- 求子串。substr
SELECT substr('www.lagou.com', 5);
SELECT substr('www.lagou.com', -5);
SELECT substr('www.lagou.com', 5, 5);

-- 字符串切分。split，注意 '.' 要转义
select split("www.lagou.com", "\\\\.");

数学函数

-- 四舍五入。round
select round(314.15926);
select round(314.15926, 2);
select round(314.15926, -2);

-- 向上取整。ceil
select ceil(3.1415926);
-- 向下取整。floor
select floor(3.1415926);

条件函数

-- if (boolean testCondition, T valueTrue, T valueFalseOrNull)
select sal, if (sal<1500, 1, if (sal < 3000, 2, 3)) from emp;

-- CASE WHEN a THEN b [WHEN c THEN d]* [ELSE e] END
-- 将emp表的员工工资等级分类：0-1500、1500-3000、3000以上
select sal, if (sal<=1500, 1, if (sal <= 3000, 2, 3)) from
emp;

-- CASE WHEN a THEN b [WHEN c THEN d]* [ELSE e] END
-- 复杂条件用 case when 更直观
select sal, case when sal<=1500 then 1
when sal<=3000 then 2
else 3 end sallevel
from emp;

-- 以下语句等价
select ename, deptno,
case deptno when 10 then 'accounting'
when 20 then 'research'
when 30 then 'sales'
else 'unknown' end deptname
from emp;

select ename, deptno,
case when deptno=10 then 'accounting'
when deptno=20 then 'research'
when deptno=30 then 'sales'
else 'unknown' end deptname
from emp;

-- COALESCE(T v1, T v2, ...)。返回参数中的第一个非空值；如果所有值都为NULL，那么返回NULL
select sal, coalesce(comm, 0) from emp;

-- isnull(a) isnotnull(a)
select * from emp where isnull(comm);
select * from emp where isnotnull(comm);

-- nvl(T value, T default_value)
select empno, ename, job, mgr, hiredate, deptno, sal +nvl(comm,0) sumsal
from emp;

-- nullif(x, y) 相等为空，否则为a
SELECT nullif("b", "b"), nullif("b", "a");

UDTF函数：UDTF : User Defined Table-Generating Functions。用户定义表生成函数，一行输入，多行输出。

-- explode，炸裂函数
-- 就是将一行中复杂的 array 或者 map 结构拆分成多行
select explode(array('A','B','C')) as col;
select explode(map('a', 8, 'b', 88, 'c', 888));

-- lateral view 常与 表生成函数explode结合使用
-- lateral view 语法：
lateralView: LATERAL VIEW udtf(expression) tableAlias AS
columnAlias (',' columnAlias)*
fromClause: FROM baseTable (lateralView)*

-- lateral view 的基本使用
with t1 as ( select 'OK' cola, split('www.lagou.com', '\\\\.') colb）
select cola, colc
from t1
lateral view explode(colb) t2 as colc;

UDTF 案例1：

-- 数据(id tags)：
1 1,2,3
2 2,3
3 1,2
--编写sql,实现如下结果：
1 1
1 2
1 3
2 2
2 3
3 1
3 2

-- 建表加载数据
create table tab1(id int, tags string)
row format delimited fields terminated by '\\t';
load data local inpath '/home/hadoop/data/tab1.dat' into table table1

-- SQL
select id, split(tags, ',')
from tab1;

select id, tag
from tab1
lateral view explode(split(tags, ",")) t1 as tag;

UDTF 案例2：

-- 创建表
create table studscore(
name string
,score map<String,string>)
row format delimited
fields terminated by '|'
collection items terminated by ','
map keys terminated by ':';

-- 加载数据
load data local inpath '/home/hadoop/data/score.dat' overwrite
into table studscore;

-- 需求：找到每个学员的最好成绩
-- 第一步，使用 explode 函数将map结构拆分为多行
select explode(score) as (subject, socre) from studscore;
--但是这里缺少了学员姓名，加上学员姓名后出错。下面的语句有是错的
select name, explode(score) as (subject, socre) from  studscore;

-- 第二步：explode常与 lateral view 函数联用，这两个函数结合在一起能关联其他字段
select name, subject, score1 as score from studscore lateral view explode(score) t1 as subject, score1;

-- 第三步：找到每个学员的最好成绩
select name, max(mark) maxscore
from (select name, subject, mark
from studscore lateral view explode(score) t1 assubject, mark) t1
group by name;

with tmp as (
select name, subject, mark 
from studscore lateral view explode(score) t1 as subject, mark )

select name, max(mark) maxscore from tmp group by name;

将一行数据转换成多行数据，可以用于array和map类型的数据；
lateral view 与 explode 联用，解决 UDTF 不能添加额外列的问题

以上是关于数据仓库工具Hive——系统内置函数的主要内容，如果未能解决你的问题，请参考以下文章