Partitioner


使用自定義partitioner來處理手機上網日志信息

為什么要使用分區?

  1.根據業務需要,產生多個輸出文件
  2.多個reduce任務在運行,提高整體job的運行效率

  1 package partitioner;
  2 
  3 import java.io.DataInput;
  4 import java.io.DataOutput;
  5 import java.io.IOException;
  6 
  7 import org.apache.hadoop.conf.Configuration;
  8 import org.apache.hadoop.fs.Path;
  9 import org.apache.hadoop.io.LongWritable;
 10 import org.apache.hadoop.io.Text;
 11 import org.apache.hadoop.io.Writable;
 12 import org.apache.hadoop.mapreduce.Job;
 13 import org.apache.hadoop.mapreduce.Mapper;
 14 import org.apache.hadoop.mapreduce.Partitioner;
 15 import org.apache.hadoop.mapreduce.Reducer;
 16 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 17 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 18 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 19 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 20 import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
 21 /**
 22  * 分區必須打包jar運行  23  *
 24  */
 25 public class KpiApp {
 26     static final String INPUT_PATH = "hdfs://chaoren:9000/wlan";//wlan是個文件夾,日志文件放在/wlan目錄下
 27     static final String OUT_PATH = "hdfs://chaoren:9000/out";
 28 
 29     public static void main(String[] args) throws Exception {
 30         final Job job = new Job(new Configuration(),
 31                 KpiApp.class.getSimpleName());
 32         
 33         //打包運行
 34         job.setJarByClass(KpiApp.class);  35         
 36         // 1.1 指定輸入文件路徑
 37         FileInputFormat.setInputPaths(job, INPUT_PATH);
 38         // 指定哪個類用來格式化輸入文件
 39         job.setInputFormatClass(TextInputFormat.class);
 40 
 41         // 1.2指定自定義的Mapper類
 42         job.setMapperClass(MyMapper.class);
 43         // 指定輸出<k2,v2>的類型
 44         job.setMapOutputKeyClass(Text.class);
 45         job.setMapOutputValueClass(KpiWritable.class);
 46 
 47         // 1.3 指定分區類
 48         job.setPartitionerClass(KpiPartitioner.class);
 49         job.setNumReduceTasks(2);//分成兩個區
 50 
 51         // 1.4 TODO 排序、分區
 52 
 53         // 1.5 TODO (可選)歸約
 54 
 55         // 2.2 指定自定義的reduce類
 56         job.setReducerClass(MyReducer.class);
 57         // 指定輸出<k3,v3>的類型
 58         job.setOutputKeyClass(Text.class);
 59         job.setOutputValueClass(KpiWritable.class);
 60 
 61         // 2.3 指定輸出到哪里
 62         FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));
 63         // 設定輸出文件的格式化類
 64         job.setOutputFormatClass(TextOutputFormat.class);
 65 
 66         // 把代碼提交給JobTracker執行
 67         job.waitForCompletion(true);
 68     }
 69 
 70     static class KpiPartitioner extends HashPartitioner<Text, KpiWritable>{
 71 
 72         @Override
 73         public int getPartition(Text key, KpiWritable value, int numReduceTasks) {
 74             return (key.toString().length() == 11) ? 0 : 1;//0代表的是手機號碼 1代表非手機號碼
 75         }
 76         
 77     }
 78     
 79     static class MyMapper extends Mapper<LongWritable, Text, Text, KpiWritable> {
 80         protected void map(
 81                 LongWritable key,
 82                 Text value,
 83                 org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text, KpiWritable>.Context context)
 84                 throws IOException, InterruptedException {
 85             final String[] splited = value.toString().split("\t");
 86             final String msisdn = splited[1];
 87             final Text k2 = new Text(msisdn);
 88             final KpiWritable v2 = new KpiWritable(splited[6], splited[7],
 89                     splited[8], splited[9]);
 90             context.write(k2, v2);
 91         };
 92     }
 93 
 94     static class MyReducer extends
 95             Reducer<Text, KpiWritable, Text, KpiWritable> {
 96         /**
 97          * @param k2
 98          *            表示整個文件中不同的手機號碼
 99          * @param v2s
100          *            表示該手機號在不同時段的流量的集合
101          */
102         protected void reduce(
103                 Text k2,
104                 java.lang.Iterable<KpiWritable> v2s,
105                 org.apache.hadoop.mapreduce.Reducer<Text, KpiWritable, Text, KpiWritable>.Context context)
106                 throws IOException, InterruptedException {
107             long upPackNum = 0L;
108             long downPackNum = 0L;
109             long upPayLoad = 0L;
110             long downPayLoad = 0L;
111 
112             for (KpiWritable kpiWritable : v2s) {
113                 upPackNum += kpiWritable.upPackNum;
114                 downPackNum += kpiWritable.downPackNum;
115                 upPayLoad += kpiWritable.upPayLoad;
116                 downPayLoad += kpiWritable.downPayLoad;
117             }
118 
119             final KpiWritable v3 = new KpiWritable(upPackNum + "", downPackNum
120                     + "", upPayLoad + "", downPayLoad + "");
121             context.write(k2, v3);
122         };
123     }
124 }
125 
126 class KpiWritable implements Writable {
127     long upPackNum;
128     long downPackNum;
129     long upPayLoad;
130     long downPayLoad;
131 
132     public KpiWritable() {
133     }
134 
135     public KpiWritable(String upPackNum, String downPackNum, String upPayLoad,
136             String downPayLoad) {
137         this.upPackNum = Long.parseLong(upPackNum);
138         this.downPackNum = Long.parseLong(downPackNum);
139         this.upPayLoad = Long.parseLong(upPayLoad);
140         this.downPayLoad = Long.parseLong(downPayLoad);
141     }
142 
143     public void readFields(DataInput in) throws IOException {
144         this.upPackNum = in.readLong();
145         this.downPackNum = in.readLong();
146         this.upPayLoad = in.readLong();
147         this.downPayLoad = in.readLong();
148     }
149 
150     public void write(DataOutput out) throws IOException {
151         out.writeLong(upPackNum);
152         out.writeLong(downPackNum);
153         out.writeLong(upPayLoad);
154         out.writeLong(downPayLoad);
155     }
156 
157     @Override
158     public String toString() {
159         return upPackNum + "\t" + downPackNum + "\t" + upPayLoad + "\t"
160                 + downPayLoad;
161     }
162 }

 將上面代碼打包導出,復制到Linux中,然后在命令行下執行並查看結果,也可以在chaoren:50030中查看到作業的相關情況

 

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM