hadoop通过map/reduce统计tomcat日志统计请求类型

  • hadoop通过map/reduce统计tomcat日志统计请求类型已关闭评论
  • 16 views
  • A+
所属分类:hadoop

Hadoop 案例7-----日志分析:分析非结构化文件

1、需求:

根据tomcat日志计算url访问了情况,具体的url如下,
 结果为:访问方式、URL、访问量
127.0.0.1 - - [03/Jul/2014:23:36:38 +0800] "GET /course/detail/3.htm HTTP/1.0" 200 38435 0.038
182.131.89.195 - - [03/Jul/2014:23:37:43 +0800] "GET / HTTP/1.0" 301 - 0.000
127.0.0.1 - - [03/Jul/2014:23:38:27 +0800] "POST /service/notes/addViewTimes_23.htm HTTP/1.0" 200 2 0.003
127.0.0.1 - - [03/Jul/2014:23:39:03 +0800] "GET /html/notes/20140617/779.html HTTP/1.0" 200 69539 0.046
127.0.0.1 - - [03/Jul/2014:23:43:00 +0800] "GET /html/notes/20140318/24.html HTTP/1.0" 200 67171 0.049
127.0.0.1 - - [03/Jul/2014:23:43:59 +0800] "POST /service/notes/addViewTimes_779.htm HTTP/1.0" 200 1 0.003
127.0.0.1 - - [03/Jul/2014:23:45:51 +0800] "GET / HTTP/1.0" 200 70044 0.060
127.0.0.1 - - [03/Jul/2014:23:46:17 +0800] "GET /course/list/73.htm HTTP/1.0" 200 12125 0.010
127.0.0.1 - - [03/Jul/2014:23:46:58 +0800] "GET /html/notes/20140609/542.html HTTP/1.0" 200 94971 0.077
127.0.0.1 - - [03/Jul/2014:23:48:31 +0800] "POST /service/notes/addViewTimes_24.htm HTTP/1.0" 200 2 0.003
127.0.0.1 - - [03/Jul/2014:23:48:34 +0800] "POST /service/notes/addViewTimes_542.htm HTTP/1.0" 200 2 0.003
127.0.0.1 - - [03/Jul/2014:23:49:31 +0800] "GET /notes/index-top-3.htm HTTP/1.0" 200 53494 0.041
127.0.0.1 - - [03/Jul/2014:23:50:55 +0800] "GET /html/notes/20140609/544.html HTTP/1.0" 200 183694 0.076
127.0.0.1 - - [03/Jul/2014:23:53:32 +0800] "POST /service/notes/addViewTimes_544.htm HTTP/1.0" 200 2 0.004
127.0.0.1 - - [03/Jul/2014:23:54:53 +0800] "GET /html/notes/20140620/900.html HTTP/1.0" 200 151770 0.054
127.0.0.1 - - [03/Jul/2014:23:57:42 +0800] "GET /html/notes/20140620/872.html HTTP/1.0" 200 52373 0.034
127.0.0.1 - - [03/Jul/2014:23:58:17 +0800] "POST /service/notes/addViewTimes_900.htm HTTP/1.0" 200 2 0.003
127.0.0.1 - - [03/Jul/2014:23:58:51 +0800] "GET / HTTP/1.0" 200 70044 0.057

2、编写代码

map

package com.hadoop.twenty;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class LogMap extends Mapper<LongWritable, Text, Text, IntWritable> {

private IntWritable counter = new IntWritable(1);
 @Override
 protected void map(LongWritable key, Text value,Context context)
 throws IOException, InterruptedException {
 String line = value.toString();
 String result = handleLog(line);
 if(result!=null && result.length()>0){
 context.write(new Text(result),counter);
 }
 }

/**
 * 日志处理
 * @param line
 * @return
 */
 private String handleLog(String line){
 StringBuffer sBuffer = new StringBuffer();
 try{
 if(line.length()>0){
 if(line.indexOf("GET")>0){
 String tmp = line.substring(line.indexOf("GET"),line.indexOf("HTTP/1.0"));
 sBuffer.append(tmp.trim());
 }else if(line.indexOf("POST")>0 ){
 String tmp = line.substring(line.indexOf("POST"),line.indexOf("HTTP/1.0"));
 sBuffer.append(tmp.trim());
 }
 }else{
 return null;
 }

}catch (Exception e) {
 e.printStackTrace();
 System.out.println(line);
 }
 return sBuffer.toString();
 }

}

 

reduce

 

package com.hadoop.twenty;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class LogReduce extends Reducer<Text, IntWritable, Text, IntWritable> {

@Override
 protected void reduce(Text key, Iterable<IntWritable> values,Context context)
 throws IOException, InterruptedException {

int sum = 0;
 for(IntWritable val:values){
 sum+=val.get();
 }

context.write(key, new IntWritable(sum));

}


}

 

main

 

package com.hadoop.twenty;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class JobMain {

/**
 * @param args
 */
 public static void main(String[] args)throws Exception {
 Configuration conf = new Configuration();
 Job job = new Job(conf,"log-job");
 job.setJarByClass(JobMain.class);

job.setMapperClass(LogMap.class);
 job.setMapOutputKeyClass(Text.class);
 job.setMapOutputValueClass(IntWritable.class);

job.setReducerClass(LogReduce.class);
 job.setOutputKeyClass(Text.class);
 job.setOutputValueClass(IntWritable.class);

FileInputFormat.addInputPath(job, new Path(args[0]));

FileSystem fs = FileSystem.get(conf);
 Path output = new Path(args[1]);
 if(fs.exists(output)){
 fs.delete(output,true);
 }
 FileOutputFormat.setOutputPath(job, output);

System.exit(job.waitForCompletion(true)?0:1);

}



}

3、执行

a、准备数据,打jar包

b、数据上传到hdfs
 [root@x00 ~]# sudo -u hdfs hadoop fs -mkdir /user/data/twenty
 [root@x00 ~]# sudo -u hdfs hadoop fs -put /opt/test/hd/twenty.txt /user/data/twenty
 [root@x00 ~]# sudo -u hdfs hadoop fs -ls /user/data/twenty
 Found 2 items
 -rw-r--r-- 3 hdfs hadoop 501886 2014-07-04 10:21 /user/data/twenty/twenty.txt

c、执行
 sudo -u hdfs hadoop jar rn.jar com.wy.hadoop.JobMain /user/data/twenty/ /user/data/twenty/output

d、查看结果
 [root@x00 ~]# sudo -u hdfs hadoop fs -ls /user/data/twenty/output
 Found 3 items
 -rw-r--r-- 3 hdfs hadoop 0 2014-07-04 10:29 /user/data/twenty/output/_SUCCESS
 drwxr-xr-x - hdfs hadoop 0 2014-07-04 10:29 /user/data/twenty/output/_logs
 -rw-r--r-- 3 hdfs hadoop 48053 2014-07-04 10:29 /user/data/twenty/output/part-r-00000
 [root@x00 ~]# sudo -u hdfs hadoop fs -cat /user/data/twenty/output/part*
  • 安卓客户端下载
  • 微信扫一扫
  • weinxin
  • 微信公众号
  • 微信公众号扫一扫
  • weinxin
avatar