hive的简单语句
Posted 醉卧千山下,诗酒趁年华。
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了hive的简单语句相关的知识,希望对你有一定的参考价值。
create table bike (duration int,startdate TIMESTAMP,enddate TIMESTAMP, startnum int,startstation string,endnum int,endstation string,bikenum string,type string) row format delimited fields terminated by ‘,’;
1
insert overwrite local directory ‘/root/bike01/000000_0/’ row format delimited fields terminated by ‘\\t’ select count(distinct bikenum) from bike;
2
insert overwrite local directory ‘/root/bike02/000000_0/’ row format delimited fields terminated by ‘\\t’ select round(avg(duration)/60000) from bike;
3
insert overwrite local directory ‘/root/bike03/000000_0/’ row format delimited fields terminated by ‘\\t’ select startstation,count(*) as sum from bike group by startstation order by sum desc ,startstation asc limit 10;
5,题给出共享单车单日租赁排行榜
mkdir - p /root/bike04/000000_0/
create table day as select to_date(startdate) startdate from bike;
insert overwrite local directory ‘/root/bike04/000000_0/’ row format delimited fields terminated by ‘\\t’ select startdate ,count(*) as sum from day group by startdate order by sum desc limit 5;
6,修车
mkdir -p /root/bike05/000000_0
insert overwrite local directory ‘/root/bike05/000000_0/’ row format delimited fields terminated by ‘\\t’ select bikenum ,count(*) as sum from bike group by bikenum order by sum desc ,bikenum asc limit 10;
7 可以会员推广的地区
mkdir -p /root/bike06/000000_0
insert overwrite local directory ‘/root/bike06/000000_0/’ row format delimited fields terminated by ‘\\t’ select startstation, count(*) as sum from bike group by startstation order by sum desc ,startstation asc limit 10;
insert overwrite local directory ‘/root/bike06/’ row format delimited fields terminated by ‘\\t’ select startstation, count(*) as sum from bike group by startstation order by sum desc ,startstation asc limit 10;
8,
mkdir -p /root/bike07/000000_0/
insert overwrite local directory ‘/root/bike07/’ row format delimited fields terminated by ‘\\t’ select endstation, count(*) as sum from bike group by endstation order by sum desc ,endstation asc limit 10;
insert overwrite local directory ‘/root/bike07/000000_0/’ row format delimited fields terminated by ‘\\t’ select endstation, count(*) as sum from bike group by endstation order by sum desc ,endstation asc limit 10;
============================================
1,将数据上传
创建HDFS下路径/college
hadoop fs -mkdir -p /college #补充命令
将数据上传至HDFS
hadoop fs -put /root/college/person.csv /college/
查看HDFS文件
hadoop fs -ls /college/
2,建表 导数
进入hive客户端,创建数据库hive,并使用;
创建person表
create table person (
age double,
workclass string,
fnlwgt string,
edu string,
edu_num double,
marital_status string,
occupation string,
relationship string,
race string,
sex string,
gain string,
loss string,
hours double,
native string,
income string
) ??? ; # 补充命令,设置分隔符
load data local inpath ‘/root/college/person.csv’ into table ???;
insert overwrite local directory ‘/root/college000/01’
row format delimited fields ??? by ‘\\t’ # 补充命令
select count(*) from person;
3,计算较高收入人群占整体数据的比例,结果写入本地/root/college011/。
set hive.strict.checks.cartesian.product=false; # 默认不允许笛卡尔操作,这里允许操作
insert overwrite local directory ‘/root/college011/’
SELECT round((t2.v / t4.s),2)
FROM (SELECT count() AS v FROM person t1 where t1.income = ‘>50K’) t2
JOIN (SELECT count() as s FROM person t3) t4;
4,计算学历为本科的人员在调查中的占比,结果写入本地/root/college012/。
insert overwrite local directory ‘/root/college012/’
SELECT round((t2.v / t4.s),2)
FROM (SELECT count() AS v FROM person t1 where t1.edu=‘Bachelors’) t2
JOIN (SELECT count() as s FROM person t3) t4;
5,计算青年群体中高收入年龄层排行
insert overwrite local directory ‘/root/college013/’
row format delimited fields terminated by ‘\\t’
select age ,count(*) as sum from person
where age>=15 and age <=34 and income = ‘>50K’
group by age
order by sum desc,age asc limit 10;
6,计算男性群体中高收入职业排行,
insert overwrite local directory ‘/root/college014/’
row format delimited fields terminated by ‘\\t’
select occupation,count(*) as sum from person
where sex = ‘Male’ and income = ‘>50K’
group by occupation
order by sum desc,occupation asc limit 5;
7,对未婚人群高收入职业排行,
insert overwrite local directory ‘/root/college015/’
row format delimited fields terminated by ‘\\t’
select occupation,count(*) as sum from person
where marital_status = ‘Never-married’ and income = ‘>50K’
group by occupation
order by sum desc,occupation asc limit 5;
8,统计性别对于收入的影响,
insert overwrite local directory ‘/root/college016/’
row format delimited fields terminated by ‘\\t’
SELECT t2.sex ,round((t2.v / t4.s),2)
FROM (SELECT t1.sex as sex , count() AS v FROM person t1 where t1.income = ‘>50K’ group by sex order by v desc ) t2
JOIN (SELECT count() as s FROM person t3 where t3.income = ‘>50K’) t4;
9,统计教育程度对于收入的影响,
先按照数量进行降序排列,再按照教育程度名称升序排列;
insert overwrite local directory ‘/root/college017/’
row format delimited fields terminated by ‘\\t’
select edu ,count(*) as sum from person
where income = ‘>50K’
group by edu
order by sum desc ,edu asc ;
10,计算不同收入的平均工作时间,
格式:对于结果中的二维数据,要求使用“\\t”作为声明文件分隔符;
复合排列:先按照数量进行降序排列,再按照教育程度名称升序排列;
其他说明:根据收入类型对平均工时进行计算,结果四舍五入,保留整数。
insert overwrite local directory ‘/root/college018/’
row format delimited fields terminated by ‘\\t’
select income ,round(avg(hours)) as sum from person
group by income
order by sum desc ;
以上是关于hive的简单语句的主要内容,如果未能解决你的问题,请参考以下文章