1、下载hadoop-2.6.0.tar.gz包到本地
我解压后放入D:\hadoop-2.6.0-cdh5.9.3\
https://github.com/steveloughran/winutils.git
选择相应的版本
下载hadoop.dll、winutils.exe文件放入D:\hadoop-2.6.0-cdh5.9.3\hadoop-2.6.0\bin
配置环境变量
HADOOP_HOME=D:\hadoop-2.6.0-cdh5.9.3\hadoop-2.6.0
HADOOP_BIN_PATH=%HADOOP_HOME%\bin
HADOOP_PREFIX=D:\hadoop-2.6.0-cdh5.9.3\hadoop-2.6.0
PATH后增加;%HADOOP_HOME%\bin
基于官方WordCount 修改 增加了,如果输出目录存在就删除的逻辑和系统参数

1 import org.apache.hadoop.conf.Configuration; 2 import org.apache.hadoop.fs.FileSystem; 3 import org.apache.hadoop.fs.Path; 4 import org.apache.hadoop.io.IntWritable; 5 import org.apache.hadoop.io.Text; 6 import org.apache.hadoop.mapreduce.Job; 7 import org.apache.hadoop.mapreduce.Mapper; 8 import org.apache.hadoop.mapreduce.Reducer; 9 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 10 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 11 import org.apache.hadoop.util.GenericOptionsParser; 12 13 import java.io.IOException; 14 import java.util.Properties; 15 import java.util.StringTokenizer; 16 17 public class WordCount { 18 19 public static class TokenizerMapper 20 extends Mapper<Object, Text, Text, IntWritable> { 21 22 private final static IntWritable one = new IntWritable(1); 23 private Text word = new Text(); 24 25 public void map(Object key, Text value, Context context) throws IOException, InterruptedException { 26 StringTokenizer itr = new StringTokenizer(value.toString()); 27 while (itr.hasMoreTokens()) { 28 word.set(itr.nextToken()); 29 context.write(word, one); 30 } 31 } 32 } 33 34 public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> { 35 private IntWritable result = new IntWritable(); 36 37 public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { 38 int sum = 0; 39 for (IntWritable val : values) { 40 sum += val.get(); 41 } 42 result.set(sum); 43 context.write(key, result); 44 } 45 } 46 47 48 /** 49 * 删除指定目录 50 * 51 * @param conf 52 * @param dirPath 53 * @throws IOException 54 */ 55 private static void deleteDir(Configuration conf, String dirPath) throws IOException { 56 FileSystem fs = FileSystem.get(conf); 57 Path targetPath = new Path(dirPath); 58 if (fs.exists(targetPath)) { 59 boolean delResult = fs.delete(targetPath, true); 60 if (delResult) { 61 System.out.println(targetPath + " has been deleted sucessfullly."); 62 } else { 63 System.out.println(targetPath + " deletion failed."); 64 } 65 } 66 67 } 68 69 public static void main(String[] args) throws Exception { 70 Properties props = System.getProperties(); //系统属性 71 System.out.println("Java的运行环境版本:" + props.getProperty("java.version")); 72 System.out.println("Java的运行环境供应商:" + props.getProperty("java.vendor")); 73 System.out.println("Java供应商的URL:" + props.getProperty("java.vendor.url")); 74 System.out.println("Java的安装路径:" + props.getProperty("java.home")); 75 System.out.println("Java的虚拟机规范版本:" + props.getProperty("java.vm.specification.version")); 76 System.out.println("Java的虚拟机规范供应商:" + props.getProperty("java.vm.specification.vendor")); 77 System.out.println("Java的虚拟机规范名称:" + props.getProperty("java.vm.specification.name")); 78 System.out.println("Java的虚拟机实现版本:" + props.getProperty("java.vm.version")); 79 System.out.println("Java的虚拟机实现供应商:" + props.getProperty("java.vm.vendor")); 80 System.out.println("Java的虚拟机实现名称:" + props.getProperty("java.vm.name")); 81 System.out.println("Java运行时环境规范版本:" + props.getProperty("java.specification.version")); 82 System.out.println("Java运行时环境规范供应商:" + props.getProperty("java.specification.vender")); 83 System.out.println("Java运行时环境规范名称:" + props.getProperty("java.specification.name")); 84 System.out.println("Java的类格式版本号:" + props.getProperty("java.class.version")); 85 String jars = props.getProperty("java.class.path"); 86 String[] split = jars.split(";", -1); 87 for (String jar : split) { 88 System.out.println("Java的类路径jar: " + jar); 89 } 90 //System.out.println("Java的类路径:" + props.getProperty("java.class.path")); 91 String paths = props.getProperty("java.library.path"); 92 String[] pathsSplit = paths.split(";", -1); 93 for (String path : pathsSplit) { 94 System.out.println("加载库时搜索的路径列表:" + path); 95 } 96 //System.out.println("加载库时搜索的路径列表:" + props.getProperty("java.library.path")); 97 System.out.println("默认的临时文件路径:" + props.getProperty("java.io.tmpdir")); 98 System.out.println("一个或多个扩展目录的路径:" + props.getProperty("java.ext.dirs")); 99 System.out.println("操作系统的名称:" + props.getProperty("os.name")); 100 System.out.println("操作系统的构架:" + props.getProperty("os.arch")); 101 System.out.println("操作系统的版本:" + props.getProperty("os.version")); 102 System.out.println("文件分隔符:" + props.getProperty("file.separator")); //在 unix 系统中是"/" 103 System.out.println("路径分隔符:" + props.getProperty("path.separator")); //在 unix 系统中是":" 104 System.out.println("行分隔符:" + props.getProperty("line.separator")); //在 unix 系统中是"/n" 105 System.out.println("用户的账户名称:" + props.getProperty("user.name")); 106 System.out.println("用户的主目录:" + props.getProperty("user.home")); 107 System.out.println("用户的当前工作目录:" + props.getProperty("user.dir")); 108 Configuration conf = new Configuration(); 109 String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); 110 if (otherArgs.length < 2) { 111 System.err.println("Usage: wordcount <in> [<in>...] <out>"); 112 System.exit(2); 113 } 114 115 //先删除output目录 116 deleteDir(conf, otherArgs[otherArgs.length - 1]); 117 118 Job job = Job.getInstance(conf, "word count"); 119 job.setJarByClass(WordCount.class); 120 job.setMapperClass(TokenizerMapper.class); 121 job.setCombinerClass(IntSumReducer.class); 122 job.setReducerClass(IntSumReducer.class); 123 job.setOutputKeyClass(Text.class); 124 job.setOutputValueClass(IntWritable.class); 125 for (int i = 0; i < otherArgs.length - 1; ++i) { 126 FileInputFormat.addInputPath(job, new Path(otherArgs[i])); 127 } 128 FileOutputFormat.setOutputPath(job, 129 new Path(otherArgs[otherArgs.length - 1])); 130 System.exit(job.waitForCompletion(true) ? 0 : 1); 131 } 132 }
pom依赖

1 <dependency> 2 <groupId>org.apache.hadoop</groupId> 3 <artifactId>hadoop-client</artifactId> 4 <version>2.6.0</version> 5 </dependency> 6 <dependency> 7 <groupId>org.apache.hadoop</groupId> 8 <artifactId>hadoop-common</artifactId> 9 <version>2.6.0</version> 10 </dependency> 11 <dependency> 12 <groupId>org.apache.hadoop</groupId> 13 <artifactId>hadoop-mapreduce-client-jobclient</artifactId> 14 <version>2.6.0</version> 15 </dependency>
添加本地依赖
集群信息
hdfs-site.xml
core-site.xml
放入resources 文件
给出输入输出参数运行即可
确保本地文件和maven依赖都被加载到