FLUME INSTALLATION:
==================
Download latest FLUME tar ball from:
Update the HADOOP_CLASSPATH in $HADOOP_HOME/hadoop-env.sh with flume-ng-core-1.5.0.jar file:
root@centos6:~ #grep -i HADOOP_CLASSPATH /home/bigdata/hadoop-1.2.1/ conf/hadoop-env.sh
# export HADOOP_CLASSPATH=
export HADOOP_CLASSPATH=/home/ bigdata/apache-hive-0.13.1- bin/lib/*:/home/bigdata/ apache-flume-1.5.0-bin/lib/ flume-ng-core-1.5.0.jar:/home/ bigdata/hadoop-1.2.1/hadoop- core-1.2.1.jar:/home/bigdata/ hadoop-1.2.1/HadoopProj
root@centos6:~ #
Update /etc/profile with:
export FLUME_HOME=/home/bigdata/ apache-flume-1.5.0-bin
export PATH=$HIVE_HOME/bin:$HADOOP_ HOME/bin:$HBASE_HOME/bin:$ FLUME_HOME/bin:$JAVA_HOME/bin: $PATH
SETUP CONFIGURATION FILE:
========================
Create configuration file $FLUME_HOME/conf
The following conf file listens for file events, other events could be network etc, for list of events (source types) visit https://cwiki.apache.org/ confluence/display/FLUME/ Getting+Started :
root@centos6:~/apache-flume-1. 5.0-bin #pwd
/home/bigdata/apache-flume-1. 5.0-bin
root@databliz-centos6:~/ apache-flume-1.5.0-bin #cat conf/flume-conf.conf
agent1.sources = s1
agent1.channels = c1
agent1.sinks = k1
# Define source and type of event (in this case it is exec)
agent1.sources.s1.type = exec
agent1.sources.s1.command = tail -f /tmp/esplog.log
# Define channel and type (in this case channel is stored in memory, other types are: file, database etc)
agent1.channels.c1.type = memory
agent1.channels.c1.capacity = 1000
agent1.channels.c1. transactionCapacity = 100
# Define sink and type (in this case it is hdfs, i.e an event type result is stored in hdfs)
# Default fileType outputs Writable (LongWritable) contents.
agent1.sinks.k1.type = hdfs
agent1.sinks.k1.hdfs.path = hdfs://<IP>:<Port>/ user/flume/esplog.log
agent1.sinks.k1.hdfs.fileType= DataStream
# Bind source and sink to a channel
agent1.sources.s1.channels = c1
agent1.sinks.k1.channel = c1
root@centos6:~/apache-flume-1. 5.0-bin #
RUN THE FLUME:
==============
root@centos6:~/apache-flume-1. 5.0-bin #bin/flume-ng agent --conf ./conf --conf-file conf/flume-conf.conf --name agent1 -Dflume.root.logger=INFO, console
We can also use the debug option:-Dflume.root.logger= DEBUG,console
Each time any new entry is added to /tmp/esplog.log flume generates a file in hdfs://<IP>:<Port>/ user/flume/esplog.log/
Example:
root@centos6:~ #hadoop dfs -cat /user/flume/esplog.log/ FlumeData.1406475505975
Warning: $HADOOP_HOME is deprecated.
spi messages
spi messages
conn messages
conn messages
SA messages
ISKAMP messages
conn5007 messages
conn5008 messages
root@databliz-centos6:~ #
ANALYZE FLUME DATA WITH hive:
=============================
hive> create table flumedata (col1 string, col2 string)
> row format delimited fields terminated by ' ';
OK
Time taken: 0.935 seconds
hive> load data inpath '/user/flume/esplog.log/ FlumeData.1406475505975' into table flumedata;
hive> select * from flumedata;
OK
flumedata.col1 flumedata.col2
spi messages
spi messages
conn messages
conn messages
SA messages
ISKAMP messages
conn5007 messages
conn5008 messages
Time taken: 0.521 seconds, Fetched: 8 row(s)
hive>
No comments:
Post a Comment