一、安裝Hadoop插件
1. 所需環境
hadoop2.0偽分布式環境平台正常運行
所需壓縮包:eclipse-jee-luna-SR2-linux-gtk-x86_64.tar.gz
在Linux環境下運行的eclipse軟件壓縮包,解壓后文件名為eclipse
hadoop2x-eclipse-plugin-master.zip
在eclipse中需要安裝的Hadoop插件,解壓后文件名為hadoop2x-eclipse-plugin-master
如圖所示,將所有的壓縮包放在同一個文件夾下並解壓。

2.編譯jar包
編譯hadoop2x-eclipse-plugin-master的plugin 的插件源碼,需要先安裝ant工具

接着輸入命令(注意ant命令在什么路徑下使用,具體路徑在下一張截圖中,不然這個命令會用不了):
ant jar -Dversion=2.6.0 -Declipse.home='/home/xiaow/hadoop2.0/eclipse' # 剛才放進去的eclipse軟件包的路徑 -Dversion=2.6.0 hadoop的版本號 -Dhadoop.home='/home/xiaow/hadoop2.0/hadoop-2.6.0' # hadoop安裝文件的路徑

等待一小會時間就好了
編譯成功后,找到放在 /home/xiaow/ hadoop2.0/hadoop2x-eclipse-pluginmaster/build/contrib/eclipse-plugin下, 名為hadoop-eclipse-plugin-2.6.0.jar的jar包, 並將其拷貝到/hadoop2.0/eclipse/plugins下
輸入命令:
cp -r /home/xiaow/hadoop2.0/hadoop2x-eclipse-plugin-master/build/contrib/eclipse-plugin/hadoop-eclipse-plugin-2.6.0.jar /home/xiaow/hadoop2.0/eclipse/plugins/



二、Eclipse配置
接下來打開eclipse軟件

一定要出現這個圖標,沒有出現的話前面步驟可能錯了,或者重新啟動幾次Eclipse

然后按照下面的截圖操作:


如此,Eclipse環境搭建完成。
三、wordcount程序
建工程:







輸入如下代碼:
package wordcount;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer;
import org.apache.hadoop.util.GenericOptionsParser;
public class wordcount {
// 自定義的mapper,繼承org.apache.hadoop.mapreduce.Mapper
public static class WordCountMap extends Mapper<LongWritable, Text, Text, IntWritable> {
private final IntWritable one = new IntWritable(1);
private Text word = new Text();
// Mapper<LongWritable, Text, Text, LongWritable>.Context context
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
System.out.println(line);
// split 函數是用於按指定字符(串)或正則去分割某個字符串,結果以字符串數組形式返回,這里按照“\t”來分割text文件中字符,即一個制表符
// ,這就是為什么我在文本中用了空格分割,導致最后的結果有很大的出入
StringTokenizer token = new StringTokenizer(line);
while (token.hasMoreTokens()) {
word.set(token.nextToken());
context.write(word, one);
}
}
}
// 自定義的reducer,繼承org.apache.hadoop.mapreduce.Reducer
public static class WordCountReduce extends Reducer<Text, IntWritable, Text, IntWritable> {
// Reducer<Text, LongWritable, Text, LongWritable>.Context context
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
System.out.println(key);
System.out.println(values);
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
context.write(key, new IntWritable(sum));
}
}
// 客戶端代碼,寫完交給ResourceManager框架去執行
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf,"word count");
// 打成jar執行
job.setJarByClass(wordcount.class);
// 數據在哪里?
FileInputFormat.addInputPath(job, new Path(args[0]));
// 使用哪個mapper處理輸入的數據?
job.setMapperClass(WordCountMap.class);
// map輸出的數據類型是什么?
//job.setMapOutputKeyClass(Text.class);
//job.setMapOutputValueClass(LongWritable.class);
job.setCombinerClass(IntSumReducer.class);
// 使用哪個reducer處理輸入的數據
job.setReducerClass(WordCountReduce.class);
// reduce輸出的數據類型是什么?
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// job.setInputFormatClass(TextInputFormat.class);
// job.setOutputFormatClass(TextOutputFormat.class);
// 數據輸出到哪里?
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 交給yarn去執行,直到執行結束才退出本程序
job.waitForCompletion(true);
/*
String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();
if(otherArgs.length<2){
System.out.println("Usage:wordcount <in> [<in>...] <out>");
System.exit(2);
}
for(int i=0;i<otherArgs.length-1;i++){
FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
}
System.exit(job.waitForCompletion(tr0ue)?0:1);
*/
}
}



將准備到的文檔導入進去


目錄結構如下:

運行mapreduce程序





OK,搞定收工!!!
