sh cca175-problem-07-step-01-flume.sh
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了sh cca175-problem-07-step-01-flume.sh相关的知识,希望对你有一定的参考价值。
// Pull orders data from order sqoop table to \user\cloudera\problem7\prework
sqoop import \
--table orders \
--connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
--username retail_dba \
--password cloudera \
-m 1 \
--target-dir /user/cloudera/problem7/prework \
--as-avrodatafile
// Get the file from HDFS to local
mkdir flume-avro;
cd flume-avro;
hadoop fs -get /user/cloudera/problem7/prework/* .
gedit f.config
// Create a flume-config file in problem7 folder named f.config
#Agent Name = step1
# Name the source, channel and sink
step1.sources = avro-source
step1.channels = jdbc-channel
step1.sinks = file-sink
# Source configuration
step1.sources.avro-source.type = avro
step1.sources.avro-source.port = 11112
step1.sources.avro-source.bind = localhost
# Describe the sink
step1.sinks.file-sink.type = hdfs
step1.sinks.file-sink.hdfs.path = /user/cloudera/problem7/sink
step1.sinks.file-sink.hdfs.fileType = DataStream
step1.sinks.file-sink.hdfs.fileSuffix = .avro
step1.sinks.file-sink.serializer = avro_event
step1.sinks.file-sink.serializer.compressionCodec=snappy
# Describe the type of channel -- Use memory channel if jdbc channel does not work
step1.channels.jdbc-channel.type = jdbc
# Bind the source and sink to the channel
step1.sources.avro-source.channels = jdbc-channel
step1.sinks.file-sink.channel = jdbc-channel
// Run the flume agent
flume-ng agent --name step1 --conf . --conf-file f.config
// Run the flume Avro client
flume-ng avro-client -H localhost -p 11112 -F <<Provide your avro file path here>>
以上是关于sh cca175-problem-07-step-01-flume.sh的主要内容,如果未能解决你的问题,请参考以下文章
sh cca175-problem-03-partitioning.sh
sh cca175-problem-06-import.sh
sh cca175-problem-5-step-7-validation.sh
sh cca175-problem-02-sqoop-and-hdfs.sh
sh cca175-problem-03-import-all-tables.sh
sh cca175-problem-03-create-hive-table.sh