1、下載hadoop-2.6.0.tar.gz包到本地
我解壓后放入D:\hadoop-2.6.0-cdh5.9.3\
https://github.com/steveloughran/winutils.git
選擇相應的版本
下載hadoop.dll、winutils.exe文件放入D:\hadoop-2.6.0-cdh5.9.3\hadoop-2.6.0\bin
配置環境變量
HADOOP_HOME=D:\hadoop-2.6.0-cdh5.9.3\hadoop-2.6.0
HADOOP_BIN_PATH=%HADOOP_HOME%\bin
HADOOP_PREFIX=D:\hadoop-2.6.0-cdh5.9.3\hadoop-2.6.0
PATH后增加;%HADOOP_HOME%\bin
基於官方WordCount 修改 增加了,如果輸出目錄存在就刪除的邏輯和系統參數
1 import org.apache.hadoop.conf.Configuration; 2 import org.apache.hadoop.fs.FileSystem; 3 import org.apache.hadoop.fs.Path; 4 import org.apache.hadoop.io.IntWritable; 5 import org.apache.hadoop.io.Text; 6 import org.apache.hadoop.mapreduce.Job; 7 import org.apache.hadoop.mapreduce.Mapper; 8 import org.apache.hadoop.mapreduce.Reducer; 9 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 10 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 11 import org.apache.hadoop.util.GenericOptionsParser; 12 13 import java.io.IOException; 14 import java.util.Properties; 15 import java.util.StringTokenizer; 16 17 public class WordCount { 18 19 public static class TokenizerMapper 20 extends Mapper<Object, Text, Text, IntWritable> { 21 22 private final static IntWritable one = new IntWritable(1); 23 private Text word = new Text(); 24 25 public void map(Object key, Text value, Context context) throws IOException, InterruptedException { 26 StringTokenizer itr = new StringTokenizer(value.toString()); 27 while (itr.hasMoreTokens()) { 28 word.set(itr.nextToken()); 29 context.write(word, one); 30 } 31 } 32 } 33 34 public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> { 35 private IntWritable result = new IntWritable(); 36 37 public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { 38 int sum = 0; 39 for (IntWritable val : values) { 40 sum += val.get(); 41 } 42 result.set(sum); 43 context.write(key, result); 44 } 45 } 46 47 48 /** 49 * 刪除指定目錄 50 * 51 * @param conf 52 * @param dirPath 53 * @throws IOException 54 */ 55 private static void deleteDir(Configuration conf, String dirPath) throws IOException { 56 FileSystem fs = FileSystem.get(conf); 57 Path targetPath = new Path(dirPath); 58 if (fs.exists(targetPath)) { 59 boolean delResult = fs.delete(targetPath, true); 60 if (delResult) { 61 System.out.println(targetPath + " has been deleted sucessfullly."); 62 } else { 63 System.out.println(targetPath + " deletion failed."); 64 } 65 } 66 67 } 68 69 public static void main(String[] args) throws Exception { 70 Properties props = System.getProperties(); //系統屬性 71 System.out.println("Java的運行環境版本:" + props.getProperty("java.version")); 72 System.out.println("Java的運行環境供應商:" + props.getProperty("java.vendor")); 73 System.out.println("Java供應商的URL:" + props.getProperty("java.vendor.url")); 74 System.out.println("Java的安裝路徑:" + props.getProperty("java.home")); 75 System.out.println("Java的虛擬機規范版本:" + props.getProperty("java.vm.specification.version")); 76 System.out.println("Java的虛擬機規范供應商:" + props.getProperty("java.vm.specification.vendor")); 77 System.out.println("Java的虛擬機規范名稱:" + props.getProperty("java.vm.specification.name")); 78 System.out.println("Java的虛擬機實現版本:" + props.getProperty("java.vm.version")); 79 System.out.println("Java的虛擬機實現供應商:" + props.getProperty("java.vm.vendor")); 80 System.out.println("Java的虛擬機實現名稱:" + props.getProperty("java.vm.name")); 81 System.out.println("Java運行時環境規范版本:" + props.getProperty("java.specification.version")); 82 System.out.println("Java運行時環境規范供應商:" + props.getProperty("java.specification.vender")); 83 System.out.println("Java運行時環境規范名稱:" + props.getProperty("java.specification.name")); 84 System.out.println("Java的類格式版本號:" + props.getProperty("java.class.version")); 85 String jars = props.getProperty("java.class.path"); 86 String[] split = jars.split(";", -1); 87 for (String jar : split) { 88 System.out.println("Java的類路徑jar: " + jar); 89 } 90 //System.out.println("Java的類路徑:" + props.getProperty("java.class.path")); 91 String paths = props.getProperty("java.library.path"); 92 String[] pathsSplit = paths.split(";", -1); 93 for (String path : pathsSplit) { 94 System.out.println("加載庫時搜索的路徑列表:" + path); 95 } 96 //System.out.println("加載庫時搜索的路徑列表:" + props.getProperty("java.library.path")); 97 System.out.println("默認的臨時文件路徑:" + props.getProperty("java.io.tmpdir")); 98 System.out.println("一個或多個擴展目錄的路徑:" + props.getProperty("java.ext.dirs")); 99 System.out.println("操作系統的名稱:" + props.getProperty("os.name")); 100 System.out.println("操作系統的構架:" + props.getProperty("os.arch")); 101 System.out.println("操作系統的版本:" + props.getProperty("os.version")); 102 System.out.println("文件分隔符:" + props.getProperty("file.separator")); //在 unix 系統中是"/" 103 System.out.println("路徑分隔符:" + props.getProperty("path.separator")); //在 unix 系統中是":" 104 System.out.println("行分隔符:" + props.getProperty("line.separator")); //在 unix 系統中是"/n" 105 System.out.println("用戶的賬戶名稱:" + props.getProperty("user.name")); 106 System.out.println("用戶的主目錄:" + props.getProperty("user.home")); 107 System.out.println("用戶的當前工作目錄:" + props.getProperty("user.dir")); 108 Configuration conf = new Configuration(); 109 String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); 110 if (otherArgs.length < 2) { 111 System.err.println("Usage: wordcount <in> [<in>...] <out>"); 112 System.exit(2); 113 } 114 115 //先刪除output目錄 116 deleteDir(conf, otherArgs[otherArgs.length - 1]); 117 118 Job job = Job.getInstance(conf, "word count"); 119 job.setJarByClass(WordCount.class); 120 job.setMapperClass(TokenizerMapper.class); 121 job.setCombinerClass(IntSumReducer.class); 122 job.setReducerClass(IntSumReducer.class); 123 job.setOutputKeyClass(Text.class); 124 job.setOutputValueClass(IntWritable.class); 125 for (int i = 0; i < otherArgs.length - 1; ++i) { 126 FileInputFormat.addInputPath(job, new Path(otherArgs[i])); 127 } 128 FileOutputFormat.setOutputPath(job, 129 new Path(otherArgs[otherArgs.length - 1])); 130 System.exit(job.waitForCompletion(true) ? 0 : 1); 131 } 132 }
pom依賴
1 <dependency> 2 <groupId>org.apache.hadoop</groupId> 3 <artifactId>hadoop-client</artifactId> 4 <version>2.6.0</version> 5 </dependency> 6 <dependency> 7 <groupId>org.apache.hadoop</groupId> 8 <artifactId>hadoop-common</artifactId> 9 <version>2.6.0</version> 10 </dependency> 11 <dependency> 12 <groupId>org.apache.hadoop</groupId> 13 <artifactId>hadoop-mapreduce-client-jobclient</artifactId> 14 <version>2.6.0</version> 15 </dependency>
添加本地依賴



集群信息
hdfs-site.xml
core-site.xml
放入resources 文件
給出輸入輸出參數運行即可
確保本地文件和maven依賴都被加載到
