hive-it十八掌

Posted 2021-01-10 jeasonit
tags:
篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了hive-it十八掌相关的知识，希望对你有一定的参考价值。
  1 回顾：
  2 hive
  3 --------------------------------
  4 数据仓库OLAP   在线分析处理，延迟较高
  5 数据库：OLTP   在线事务处理，事务支持
  6 
  7 运行在hadoop 类sql运行，sql，hql，mr运算
  8 结构化数据
  9 schema（模式，元信息，存放到数据库中）HDFS 文件  derby，mysql
 10 数据库和表都是文件夹路径。
 11 hive
 12 ------------------------------------------
 13 类似mysql  结构化数据
 14 配置hive   bin/env.sh
 15 schematool -initshcema --dertype derby   //初始化模式
 16 create database
 17 
 18 常见hive命令：
 19 0.启动hadoop
 20 1.初始化schema库
 21   hive/bin/schematool -initschema -dbtype derby
 22 2.完成后，在当前目录下创建一个metastore_db（元数据库）
 23 3.进入hive shell
 24       hive
 25 hive常见命令，类似于mysql
 26 ------------------------------------------
 27      >show databases；
 28      >create databases myhive;
 29      >user myhive;
 30      >show tables;
 31      //常见表
 32      >create table if not exists myhive.employee
 33      （eid int，name string,salary string，destination string）
 34                   comment ‘employee details‘
 35                   row format delimited
 36                   fields terminated BY  ‘	‘
 37                   lines  terminated by  ‘
‘
 38                   stored as textfile;
 39 //加载数据到hive（hdfs）
 40       >load data (local) inpath ‘filepath‘ (overwrite) into table tablename(partition (partcol1=val1,partcol2=val...))
 41       准备数据employee 数据
 42       1201  gopal       40000    technical  manager
 43       1202     manisha     45000    proof      reader
 44       1203     masthancali 400000   technical  writer
 45       1204     krian       4000     hr         admin
 46       1205    kranthi     30000    op         admin
 47           /t
 48 show tables;  查看表
 49 desc employee;
 50 ok              int
 51 name            string
 52 salary          string
 53 destination     string
 54 //加载数据
 55 ---------------------------------------------------------
 56 hive>load data local inpath ‘/home/ubuntu/desktop/employees.txt‘  into table employee
 57 //插入数据=====上传
 58 insert 
 59 select * from employee
 60 
 61 hdfs dfs -cat /user/hive/warehouse/myhive.db/employee/employees.txt
 62 
 63 select eid,name from employee 0rder by eid limit 2,4;
 64 
 65 //支持插入，不支持删除和更新
 66 
 67 //修改表alter table
 68 启动hiveserver2服务-----接受多个客户端连接请求
 69 hive --service hiveserver2 start
 70 查看作业
 71 jobs
 72 netstat -ano|more 10000号端口
 73 
 74 在eclipse中创建maven项目，使用jdbc连接操作hive数据仓库
 75 -------------------------------------------------------
 76 
 77 查看表
 78 dfs -LSR /;
 79 
 80 常用聚集函数
 81 ------------------------------------------------------
 82 count()
 83 sum()
 84 max()
 85 min()
 86 
 87 解决beeline命令行终端的上下键导航历史命令的bug
 88 -----------------------------------------------------
 89 修改行
 90 if((!$(ps -o stat=-p $$)=~ +));then
 91 改为：
 92 if((!$(ps -o stat=-p $$)=~ ‘+‘));then
 93 
 94 hive命令
 95 -----------------------------------------------------
 96 $hive>dfs -lsr                                 //执行dfs命令
 97 $hive>!clear;                                  //执行shell脚本
 98 $hive -e “select * from test”                  //-e execute 执行的意思
 99 $hive -S -e “select * from test” >/tmp/myquery/ 重定向   -S execute 静默模式  输出的时候去掉ok等行
100 $hive >  tab键tab键                            //528个可能么 y(显示函数和关键字)
101 $hive >this is a comment 
102 $hive >hive -f /x/x/a.sql                      //-f 执行一个文件，通常用于批处理，调度
103 $hive>--this is a comment                      //显示所有命令
104 $hive>set hive.cli.print.header=true           //显示字段名称
105 
106 $hive>create database if exists not xxx       
107 $hive>drop database if exists xxx               //存在即删除
108 $hive>alter database hive3 set dbpropertues(‘author‘=‘you‘)     
109 $hive>drop database if exists xxx               //存在即删除        
110 $hive>drop database if exists xxx cascde        //级联删除 
111 $hive>create database hive2 location ‘/user/ununtu/‘   
112 $hive>create database hive3 with dbpropertime(‘author‘=‘xupc‘,‘createtime‘=‘today‘)   //创建指定表，指定属性
113 
114 [创建表语法]
115 create (temporary)(external)table(if not exists)(db_name.)table_name
116 ((col_name data_type (commebt col_comment),....))
117 (comment table_comment)
118 (row format row_format)
119 (stored as file_format)
120 
121 例如：
122 create table if not exists employee(eid int,name string,salary string,destination string)
123       comment ‘employee details‘   //注释
124       row format delimited fields terminated by ‘	‘   //字符结束符
125       lines terminated by ‘
‘   //行结束符
126       stored as textfile;        //存储何种文件
127       
128 [带分区的创建表]
129 create  table hive1.test5(id int,name string,age int )
130 partitoned by (provence string,city string) 
131       row format delimited fields terminated by ‘	‘   //字符结束符
132       lines terminated by ‘
‘   //行结束符
133       stored as textfile;
134    
135 [加载数据===insert]
136 load data (loacal) inpath ‘filepath‘(overwrite)into table tablename(partition (partcol1=val1,partcol=val2...))
137 [加载数据===例子]
138 load data local inpath ‘/home/user/sample/txt‘ overwrite into table employee partitoned by (provence string,city string);
139 
140 
141 [创建分区]
142 create  table hive1.test5(id int,name string,age int )partitoned by (provence string,city string);//按照省份和城市分区
143 [加载数据到指定分区]
144 load data local inpath ‘/home/user/sample/employee‘ overwrite into table hive1.test5 partitoned by (provence=‘shanxi‘,city=‘jizhong‘);
145 
146 
147 [手动增加分区]
148 alter table hive1.test5 add partition(provice=‘henan‘,city=‘piingdingshan‘);
149 alter table hive1.test5 add partition(area=‘hebei‘,provice=‘henan‘,city=‘piingdingshan‘);//增加不存在的分区列，是非法的。
150 
151 [修改表]
152 $hive>alter table hive1.test5 rename to hive1.test6;                                               //重命名
153 $hive>alter table hive1.test5 add partion(provive=‘hebei‘,city=‘zhangjiakou‘)  location ‘xxx‘      //添加多个分区
154                             partion(provive=‘hebei‘,city=‘zhangjiakou‘)
155                             partion(provive=‘hebei‘,city=‘zhangjiakou‘)
156                             partion(provive=‘hebei‘,city=‘zhangjiakou‘)
157 $hive>alter table hive1.test5 add partion(provive=‘hebei‘,city=‘zhangjiakou‘) set  location ‘xxx‘  //移动分区
158 $hive>alter table hive1.test6 add columns(birth string,fire string);                                     //增加列
159 $hive>alter table hive1.test6 replace columns(birth string,fire string);                                     //增加列
160 $h$hive>alter table hive1.test5 set tblproperties(‘a‘=‘x‘,......)          //修改表属性
161 
162 [启用归档]
163 $hive>set hive.archive.enabled=true    //设置可归档，默认false
164 insert into hive2.test5 partition(province=‘hebei‘,city=‘baoding‘) select * from hive1.test2;//
165 insert overwrite.table employees partition(country=‘us‘,state=‘or‘)  select * from stated_employee se where se .cnty=‘us‘ and se.st=‘or‘;//字段个数要相同
166 查询时候，分区通过where字句指定
167 //插入时，分区用partition指定   overwrite 覆盖
168 [动态分区]
169 insert overwrite table  hive1.test6 partition(province,city) select id,...,province,city,from table2; //动态分区
170 hdfs dfs   -lsr /; 查看路径
171 
172 [查看数据hdfs]
173 /user/hive/warehouse/hive1.test5/provice=hebei/city=baoding/employee.txt
174 [分区表的查询模式:strict/nostrict]
175 set hive.mapred.mode=strict //严格模式,默认是nostrict
176 show pattitions hive1.test5;
177 show pattitions hive1.test5.partition(provice=‘hebei‘);
178 
179 
180 
181 $hive>desc extended hive1.test1; //显示扩展信息
182 $hive>desc database hive2;       //描述数据库
183 $hive>desc formatted hive1.test1;   //显格式化信息
184 $hive>create table hive2.test4 like hive1.test1;//复制表
185 $hive>show tables in hive2;  //显示指定数据库的表的集合
186 hive控制元数据，删除托管表时，数据不被删除
187 
188 
189 $hive>create external table hive1.test3 like hive1.test1;             //创建外部表external，只复制表结构，没有有数据
190 $hive>create external table hive1.test4 as select * from hive1.test1; //创建外部表external，只复制表结构，有数据
191 
192 
193 $hive> use hive3
194 使用beeline客户端可以实现远程jdbc连接
195 -----------------------------------------------------
196 1.连接
197        $hive --service beeline -u jdbc：hive2：//s100:10000/hive1；
198        $beeline -u jdbc:hive2://s100:10000/hive1；
199        $beeline>!sh  clear；
200        $beeline>show databases；
201        $beeline>!help  
202        $beeline>!dbinfo    //帮助   
203                            
204 配置hive的仓库位置
205 -----------------------------------------------------
206 hive-site.xml
207 hive-metastore.warehouse.dir=/user/hive/warehouse/
208 
209 hive的数据类型
210 ------------------------------------------------------
211 bytes：                           案例
212 tinyint       1                     20  
213 smallint      2                     20
214 int           4                     20
215 bigint        8                     20
216 boolean       true or false         true
217 float         single pricision floating point   3.14
218 double        double pricision floating point   3.14
219 
220 string ‘now is the time‘,‘for all good men‘
221 timestamp
222 binary   字节数组
223 集合类型
224 struct struct(‘john‘,‘doe‘)
225 hap    map(‘first‘,‘hohn‘,‘last‘,‘doe‘)
226 array  array (‘john‘,‘doe‘)
227 mysql>select * from tbls
228 hive>show tables;
229 创建表：
230 create table hive.test1(id int,name string,age int) tblproperties(‘author‘=‘you‘);
231 
232 hive 所谓的读模式
233 ------------------------------------------------------
234 
235 托管表
236 --------------------------------------------------------
237 hive默认的表都是托管表，hive控制数据的生命周期，删除托管表时候，元数据和数据都被删除
238 外部表
239 -------------------
240 hive控制元数据，删除托管表时，数据不被删除
241 create external table hive1.test3 like hive1.test1;
242 create external table hive1.test4 as select * from hive1.test1; 有数据
243 
244 -------------------------------------------------------------------------------
245 创建分区表
246 --------------
247 create  external table hive1.test2(id int,name string,age int)
248         partitioned by (province string,city string)
249         row format delimted
250             field terminated by ‘	‘
251             lines terminated by ‘
‘
252         store as testfile;
253         
254 手动添加分区
255 -------------------------------------------------------
256 alter table hive1.test2 add partition(provice=‘hebei‘,city=‘baoding‘)
257 插入数据
258 ------------------------------------------------------
259 insert into hive1.test2 partition(province=‘hebei‘,city=baoding‘)
260 selet * from hive1.test6 where province=‘hebei‘ and city=‘shijiazhuang‘ and id >5
261 
262 ---------------------------------------------------------------------------------
263 insert overwrite table test2 partition(provice=‘hebei‘,city=‘baoding‘)select id,name,,age from test1;
264 
265 
266 动态分区：
267 --------------------------------------------------------------------------------------
268 创建test3分区表
269 create table test3(id int,name string,age int)
270 prititioned by (province string,city string)
271             row format delimted
272             field terminated by ‘	‘
273             lines terminated by ‘
‘
274             store as testfile;
275 动态分区，复制一个表数据到分区表，动态创建分区
276 如果两个都是动态分区需要关闭严格模式
277 set hive.exec.dynamic.partition.mode=nonstrict;  //关闭动态分区的严格模式
278 insert overwrite table hive1.test6 partition(provice,city) select id,name,age‘henan‘ as provice,‘kaifeng‘ as city table2;
279 使用分区的动态和静态的混合
280 insert overwrite table hive1.test6 partition(provice=‘henan‘,city) select id,name,age‘henan‘ as provice,‘kaifeng‘ as city table2;
281 
282 
283 查询期间动态创建，并将数据写入创建表中
284 create tbale test3 as select id ,name from test2 where province=‘hebei‘ and city=‘baoding‘
285 
286 导出hive的数据到本地目录(下载)
287 ---------------------------------------------------------------------------------------------
288 insert overwrite local directory ‘/home/ubuntun/a.txt‘ select * from test2 where province=‘hebei‘
289 
290 导出hive的数据到HDFS目录(下载)
291 ---------------------------------------------------------------------------------------------
292 insert overwrite local directory ‘hdfs:s100:8020/user/ubuntu/xxx‘ select * from test2 where province=‘hebei‘
293 
294 
295 查询数据向多个目录同时输出
296 from test2 t
297 insert overwrite local directory ‘/home/ubuntu/hebei‘ select * where t.province=‘hebei‘
298 insert overwrite local directory ‘/home/ubuntu/henan‘ select * where t.province=‘henan‘;
299 查询
300 ---------------------
301 查询，投影查询
302 select col1,col2,...from table t;
303 查询，投影查询，指定表的别名
304 select upper(name) from test2;
305 select lowper(name) from test2;
306 
307 select id，name,-age from table
308 数学函数
309 --------------------------------------------
310 select round(12.345)  //四舍五入  12
311 select ceil(12.345)   13  //天花板
312 select floor(12.345)  12  //地板
313 select round(10)  随机数
314 
315 聚集函数
316 ------------------------------------------------
317 select count(*) from test2;
318 select max(*) from test2;
319 select min(*) from test2;
320 select avg(*) from test2;
321 select sum(*) from test2;
322 
323 去重，distinct;
324 --------------------------------------------------
325 select count(distinct name) from test2;
326 表生成函数
327 -----------------------------------------------
328 select explode(array(‘tom‘,‘tomas‘,‘tomslee‘)) from test2;   tom   tomas  tomslee
329 
330 ascii函数，字符串收个字母ascii值
331 ----------------------------------------------------------------------------------
332 select ascii(‘abc‘);  返回第一个97
333 
334 base64字符串编码,需要二进制数
335 binary 函数，可以将字符串转换成二进制数据
336 select base64(binary(‘http://localhost:8080/helloworld‘));
337 
338 类型转换
339 ----------------
340 select cast(‘120‘ as bigint)
341 字符串连接
342 -------------------------------------
343 select concat(‘120‘,‘200‘)  120200
344 
345 limit 分页查询
346 --------------------------------------
347 select * from test2,limit1,3  //offset,length
348 嵌套查询
349 ------------------------------------------
350 from(select * from test2 where province=‘hebei‘)e select e.id,e.name,e.age where e.city=‘baoding‘;
351 case when then
352 ---------------------------------------------
353 select id,name,
354 case  when age<=12 then ‘young‘
355       when age>12 and age<=13 then ‘middle‘ 
356       when age>13 and age<=15 then ‘old‘
357       else ‘too old‘
358       end as yearstate from test2;
359       
360 select id ,name n,age from test2 where n like ‘%t‘  //语法错误，where中不能使用字段名
361 
362 浮点数比较的规避的方案
363 ----------------------------------------------------------------------
364 select cast(0.2 as float);
365 
366 hive 的join操作，只支持等值连接
367 
368 创建customers和orders表 一对多关系
369 customers
370 create table customers(id int,name string,age int)
371 row format delimited
372 fields terminated by ‘	‘
373 lines terminated by ‘
‘
374 stored as textfile;
375 
376 orders
377 create table orders(id int,orderno string,price float,cid int)
378 row format delimited
379 fields terminated by ‘	‘
380 lines terminated by ‘
‘
381 stored as textfile;
382 
383 customers.txt 数据
384 
385 1  tom1  12
386 2  tom2  13
387 3  tom3  14
388 
389 orders.txt 数据
390 
391 1  NO001    121.34   1
392 2  NO002    123      1
393 3  NO003    232      1
394 4  NO004    32       2
395 5  NO005    324      2
396 6  NO006    234      2
397 7  NO007    5654     null
398 
399 
400 内连接  join  on
401 --------------------------------------------------------------------------------------
402 select a.id,a.name,b.id,b.orderno,b.price from customers a join orders b on a.id=b.cid;
403 
404 连接查询的优惠手段，查询表的大小从左到右是递增的。
405 select c.id,c.name,c.age,o.orderno,o.price from customers  c join orders o on c.id=o.cid where  ... //right
406 select c.id,c.name,c.age,o.orderno,o.price from orders o join customers  c on c.id=o.cid where  ... //wrong
407 ------使用查询暗示
408 select /**streamtable(c)/c.id,c.name,c.age,o.orderno,o.price from orders o join customers  c on c.id=o.cid where  ... //wrong
409 
410 left outer join
411 ----------------------------
412 select c.id,c.name,c.age,o.orderno,o.price from customers a left outer join order b on c.id=o.cid;
413 
414 right outer join
415 select c.id ,c.name,c.age,o.orderno,o.price from customers a full outer join order b on c.id=o.cid;
416 
417 左半连接，select和where子句不能引用到右边的表字段
418 左表的记录在右表中一旦找到对应的记录，右侧表即停止扫描
419 -----------------------------------------------------
420 hive不支持右半连接    right semi join xxxx
421 --------------------------------------------------------
422 
423 笛卡尔积m*n
424 ----------------------------------------------------------
425 select c.id,c.name,o.orderno from customers c.join orders o;
426 map 连接，一张小表，通过mapper的时候，将小表完全载入内存中
427 select /++mapjoin (c)*/c.id ,c.name,o.orderno from customers c join orders o;
428 
429 
430 order by  全排序,对所有数据通过一个reduce进行排序
431     select * from orders order by asc,price desc; ----全局排序
432 sort by  局部排序 每个reduce进行排序(局部排序)
433 select  * from orders sort by cid asc,price desc; ----局部排序
434 distrbute by 等价于自定义分区函数
435 select * from orders distribute cid sort by price desc;  局部排序
436 
437 cluster by 排序 =====districte by ... sort by.............
438 
439 分桶采样
440 select * from orders tablesample(bucket 3 out of 10 on number );
441 按照数据块百分比采样，100块，抽取10块，如果总共1块，没有采样。
442 select* from orders tablesample(0.1 percent);
443 
444 
445 union all 联合操作 ,字段的类型和个数需要匹配。
446 ------------------------------------------------------------------
447 select id,name from customers union all select id ,order select id ,orderno from orders;
448 
449 -------view视图---降低查询的复杂度   --创建视图
450 create view as select...
451 create view view1 as select c.id,c.name,c.age,o.id,o.orderno,o.price from customers c left outer join orders o on c.id=o.cid;
452 通过视图直接查询
453 select * from view1 where price>200;
454 
455 使用like方式创建视图
456 create view view2 like view;
457 
458 删除视图
459 drop view if exists v2;
460 
461 
462 
463 hive 索引  创建索引，deferred rebuild 该选项时，索引为空白状态，需要rebuild才能够初始化。
464 -----------------------------------------------------------------
465 create index idx_test2_d on table customers(id) as ‘org.apache.hadoop.hive.ql.index.compact.compactindexhandler‘ with deferred 
466 rebuild idxprpperties (‘creator‘=‘me‘) in table customers_index comment ‘this is a comment‘;
467 
468 alter index idx_customers_id on customers rebuild;  --------------重建索引
469 
470 删除索引
471 drop index idx_customers_id on table customers;
472 
473 ----------桶表------------------------------------bucket
474 分区是路径，是目录，是文件逻辑隔离，有效降低查询量
475 
476 桶表  是文件,
477 创建桶表
478 create table ... clustered by (filed_name) into n buckers;
479 create table  orderitems (id int,itemanme string,oid int ) clusrered by (oid)into 3 buckets row delimited field terminated by ‘	‘ lines terminated by ‘
‘ sored as textfile;
480 
481 
482 
483 
484 
485 
486 
487 
488 
489 
490 
491 hive在什么情况下可以避免mr操作
492 ----------------------------------------------
493 不是mr的作业就是本地模式
494 1.全表扫描，没有where字句
495    select * from test2
496 2.where 子句作用只有分区字段。也不需要mr
497 3.设置hive.exec.model.local.auto=true
498   该属性hive会尽量使用local模式查询
499 4.其余所有的查询都会转换成mr
500 
501 
502 group by分组查询
503 ------------------------------------------------
504 select count(*),province from test2 group by province;
505 select count(*),as c,province from test2 group by province having c>3;//having组内过滤
506 
507 hive的join操作，只支持等值连接
508 --------------------------------------------------
509 
510 
511 回顾2018.10.3
512 hive
513 ------------------------------------------------------
514 1.内部表
515       数据生命周期
516 2.外部表
517       删除外部表，比没有删除数，删掉了schema（rdbms）
518 3.分区表
519       表目录的子目录
520       create table xx(...)pratitioned()
521       load data local inpath... into table xxx partition(....)
522 4.bucket表
523 数据文件
524 
525 调优
526 ------------------------------------------------------
527 1. explain
528    解释执行计划
529    explain select sum(age) from test2;
530 2.启用limit调优，避免全表扫描，使用抽样机制
531    select * from xxx limit1,2
532    hive.limit.optimize.enable=true
533 3.join 
534    使用map端连接(/**streamtable(table)*/)
535 连接查询表的大小是从左至右依次增长
536 4.设置本地模式，在单台机器上处理所有任务。
537    适用于小数据情况
538 5.并行执行job
539 如果job之间没有依赖关系，可以并发执行，缩短执行时间
540 set.hive.exec.parallel=true
541 6.严格模式
542        set hive.mapred.strict=true   //不推荐，早期使用
543        set hive.strict.checks.large.query=false  //默认false,该设置会禁用一下操作
544                                                  //1.不指定limit的orderby
545                                                    2.对enquiry表不指定分区进行查询
546                                                    3.对分区表不指定分区进行查询，和数据量无关，只是个 查询模式
547        set hive.strict.checks.type.safety=true;//严格类型的安全检查，不允许以下操作
548                                                 1.bigint和string之间比较
549                                                 2.bigint和double之间比较
550        
551        set hive.strict.checks.checks.cartesian.product=true//不允许笛卡尔积连接
552 7.调整map,reduce个数
553 set hive.exec.reducers.bytes.per.reducer=256000000//每个reduce task的字节数
554 set hive.exec.reducers.max=1009                   //每个reduce task的最大值，属性为负数时，会使用该属性
555 
556 8.jvm重用
557 使同一个jvm在一个job(map*,reduce* )中执行多次,避免启动jvm的开销
558 set mapreduce.job.ubertask.enable=true;//是否启用uber，jvm重用
559 set mapreduce.job.ubertask.maxmaps=9  //uber，降低
560 set mapreduce.job.ubertask.maxreduces=1 //uber
561 
562 set hive.exec.reducers.bytes.per.reducer=100000000
563 set hive.exec.reducers.max=5
564 set mapreduce.job.reduces=3
565 
566 9.索引
567    使用index
568 10.动态分区严格模式
569    set hive.exec.dynamic.partition.mode=strict  //动态分区严格模式
570    set hive.max.dynamic.partitions=300000       //设置最大分区
571    set hive.max.dynamic.partitions.pernode=1000 //设置每个节点的最大分区数
572 11.推测执行，让mapreduce多个实例并发执行
573   set maprduce.map.speculative=true   //map推测
574   set mapreduce.reduce.speculative=true //reduce推测
575 12.多个分组优化
576 //若多个group by 操作使用的是一个公共的字段。则这些groupby 可以生成一个mr
577 hive.multigroupby.singlereducer=true； //默认true
578 13.虚拟列
579 hive.exec.rowoffset=false //是否使用虚拟列
580 select input_file_name,block_offset_inside_file from test2;//
581 
582 
583 
584 压缩
585 ------------------------------------------------------------------------
586 1.查看压缩的编解码器
587  set io.compression.codecs
588 2.set hive.exec.compress.intermediate=true
589 .set mapred.compress.map.output=
590 set mapred.map.output.compression.codec=
591 3.修改map输出结构的压缩，默认defaultcodec
592 4.设置最终的job输出结果是否为压缩
593 set hive.exec.compress.output.=true
594 5.使用seqencefule 作为存储格式
595 create table seqfile(...)stored as sequencefuile //使用序列文件存储
596 insert into t_seq select * from test1k 复制test1数据到t_seq
597 
598 6.控制sequencefule中map端输出文件的压缩类型,使用block压缩
599 set mapred.map.output.compression.codec=org.apache.hadoop.io.compress.snappycodec;
600 set hive.exec.compress.intermediate=true;
601 set hive.exec.compress.output=true;
602 set mapred.output.compression.codec=org.apache.hadoop.io.compress.gzipcodec;
603 set mapred.outout.comoression.type.Block;
604 7.对分区进行归档
605 ------------------------------------------------------------------------
606 1.启用hive分区档
607 set hive archive enabled=true;是否允许归档操作
608 2.对标的指定分区进行归档(只能对内部分区表进行归档，外部表不可以)
609 alter table test2 archaive partition (‘proveince=‘hebei‘,city=‘baoding‘)
610 3.操作
611 create table test3 as select id ,name,age from test2;
612 注意：运行时缺少hadooparchive.class类，查看日志/tmp/ubuntu/hive/hive.log 可见
613   复制${hadoop_home}/shared/hadoop/tools/hadoops-archive-xxx.jar hive/auxlib/
614   复制${hadoop_home}/shared/hadoop/tools/hadoops-archive-xxx.jar hive/lib/
615 函数（udf:user）
616 ------------------------
617 1.procedure+function
618 2.函数操作 show function
619         desc(ribe) function case; //查看函数的帮助
620         desc  extended function |;//查看函数的扩展帮助
621         desc  function concat;     
622   abs()绝对值
623   
624 3.udf
625 输入一行或者多行时，输出单行。
626 4.udaf：user define aggregate function 用户自定义聚集函数
627 一行或者多行的n个列作为输入，输出一个值
628 5.UDTF表生成函数
629   n个输入,多行作为输入或者多行作为输入
630   select explode(arry（1，2,3）);//
631 6.自定义函数
632 1、创建一个类 
633 2.将函数导出jar包
634 eclipse --exoort---jar
635 2.通过hive命令将jar添加到hive的类路径
636 add jar home/ubuntu/desktop/todate.jar
637 3.注册函数
638 create temporary function to_date as ‘com.it18zhang.myhive210.func.todate‘;
639 4.调用函数
640 desc function to_date;
641 自定义表生成函数
642 1.创建一个类UDTF
643 2.编译，打包，导出
644 mvn package -dskuotests
645 3.复制jar到ubuntu
646 4.使用hive的add jar命令，添加jar到classpath
647 5.创建临时函数
648 create temporary function forx as ‘x.x.x.xxx.forudtf‘
649 6.调用函数 
650 select forx(1,5);
651 
652 
653 
654 
655 
656 
657 
658 
659 
660 
661 
662 
663 
664 
665 
666 
667 
668  
669 
670 
671 
672 
673 
674 
675 
676 
677 
678 
679 
680
以上是关于hive-it十八掌的主要内容，如果未能解决你的问题，请参考以下文章