idea+Windows+hadoop本地調試


1、下載hadoop-2.6.0.tar.gz包到本地

我解壓后放入D:\hadoop-2.6.0-cdh5.9.3\

https://github.com/steveloughran/winutils.git

選擇相應的版本

下載hadoop.dll、winutils.exe文件放入D:\hadoop-2.6.0-cdh5.9.3\hadoop-2.6.0\bin

配置環境變量

HADOOP_HOME=D:\hadoop-2.6.0-cdh5.9.3\hadoop-2.6.0

HADOOP_BIN_PATH=%HADOOP_HOME%\bin

HADOOP_PREFIX=D:\hadoop-2.6.0-cdh5.9.3\hadoop-2.6.0

PATH后增加;%HADOOP_HOME%\bin

基於官方WordCount 修改 增加了,如果輸出目錄存在就刪除的邏輯和系統參數

  1 import org.apache.hadoop.conf.Configuration;
  2 import org.apache.hadoop.fs.FileSystem;
  3 import org.apache.hadoop.fs.Path;
  4 import org.apache.hadoop.io.IntWritable;
  5 import org.apache.hadoop.io.Text;
  6 import org.apache.hadoop.mapreduce.Job;
  7 import org.apache.hadoop.mapreduce.Mapper;
  8 import org.apache.hadoop.mapreduce.Reducer;
  9 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 10 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 11 import org.apache.hadoop.util.GenericOptionsParser;
 12 
 13 import java.io.IOException;
 14 import java.util.Properties;
 15 import java.util.StringTokenizer;
 16 
 17 public class WordCount {
 18 
 19     public static class TokenizerMapper
 20             extends Mapper<Object, Text, Text, IntWritable> {
 21 
 22         private final static IntWritable one = new IntWritable(1);
 23         private Text word = new Text();
 24 
 25         public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
 26             StringTokenizer itr = new StringTokenizer(value.toString());
 27             while (itr.hasMoreTokens()) {
 28                 word.set(itr.nextToken());
 29                 context.write(word, one);
 30             }
 31         }
 32     }
 33 
 34     public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
 35         private IntWritable result = new IntWritable();
 36 
 37         public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
 38             int sum = 0;
 39             for (IntWritable val : values) {
 40                 sum += val.get();
 41             }
 42             result.set(sum);
 43             context.write(key, result);
 44         }
 45     }
 46 
 47 
 48     /**
 49      * 刪除指定目錄
 50      *
 51      * @param conf
 52      * @param dirPath
 53      * @throws IOException
 54      */
 55     private static void deleteDir(Configuration conf, String dirPath) throws IOException {
 56         FileSystem fs = FileSystem.get(conf);
 57         Path targetPath = new Path(dirPath);
 58         if (fs.exists(targetPath)) {
 59             boolean delResult = fs.delete(targetPath, true);
 60             if (delResult) {
 61                 System.out.println(targetPath + " has been deleted sucessfullly.");
 62             } else {
 63                 System.out.println(targetPath + " deletion failed.");
 64             }
 65         }
 66 
 67     }
 68 
 69     public static void main(String[] args) throws Exception {
 70         Properties props = System.getProperties(); //系統屬性
 71         System.out.println("Java的運行環境版本:" + props.getProperty("java.version"));
 72         System.out.println("Java的運行環境供應商:" + props.getProperty("java.vendor"));
 73         System.out.println("Java供應商的URL:" + props.getProperty("java.vendor.url"));
 74         System.out.println("Java的安裝路徑:" + props.getProperty("java.home"));
 75         System.out.println("Java的虛擬機規范版本:" + props.getProperty("java.vm.specification.version"));
 76         System.out.println("Java的虛擬機規范供應商:" + props.getProperty("java.vm.specification.vendor"));
 77         System.out.println("Java的虛擬機規范名稱:" + props.getProperty("java.vm.specification.name"));
 78         System.out.println("Java的虛擬機實現版本:" + props.getProperty("java.vm.version"));
 79         System.out.println("Java的虛擬機實現供應商:" + props.getProperty("java.vm.vendor"));
 80         System.out.println("Java的虛擬機實現名稱:" + props.getProperty("java.vm.name"));
 81         System.out.println("Java運行時環境規范版本:" + props.getProperty("java.specification.version"));
 82         System.out.println("Java運行時環境規范供應商:" + props.getProperty("java.specification.vender"));
 83         System.out.println("Java運行時環境規范名稱:" + props.getProperty("java.specification.name"));
 84         System.out.println("Java的類格式版本號:" + props.getProperty("java.class.version"));
 85         String jars = props.getProperty("java.class.path");
 86         String[] split = jars.split(";", -1);
 87         for (String jar : split) {
 88             System.out.println("Java的類路徑jar: " + jar);
 89         }
 90         //System.out.println("Java的類路徑:" + props.getProperty("java.class.path"));
 91         String paths = props.getProperty("java.library.path");
 92         String[] pathsSplit = paths.split(";", -1);
 93         for (String path : pathsSplit) {
 94             System.out.println("加載庫時搜索的路徑列表:" + path);
 95         }
 96         //System.out.println("加載庫時搜索的路徑列表:" + props.getProperty("java.library.path"));
 97         System.out.println("默認的臨時文件路徑:" + props.getProperty("java.io.tmpdir"));
 98         System.out.println("一個或多個擴展目錄的路徑:" + props.getProperty("java.ext.dirs"));
 99         System.out.println("操作系統的名稱:" + props.getProperty("os.name"));
100         System.out.println("操作系統的構架:" + props.getProperty("os.arch"));
101         System.out.println("操作系統的版本:" + props.getProperty("os.version"));
102         System.out.println("文件分隔符:" + props.getProperty("file.separator"));   //在 unix 系統中是"/"
103         System.out.println("路徑分隔符:" + props.getProperty("path.separator"));   //在 unix 系統中是":"
104         System.out.println("行分隔符:" + props.getProperty("line.separator"));   //在 unix 系統中是"/n"
105         System.out.println("用戶的賬戶名稱:" + props.getProperty("user.name"));
106         System.out.println("用戶的主目錄:" + props.getProperty("user.home"));
107         System.out.println("用戶的當前工作目錄:" + props.getProperty("user.dir"));
108         Configuration conf = new Configuration();
109         String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
110         if (otherArgs.length < 2) {
111             System.err.println("Usage: wordcount <in> [<in>...] <out>");
112             System.exit(2);
113         }
114 
115         //先刪除output目錄
116         deleteDir(conf, otherArgs[otherArgs.length - 1]);
117 
118         Job job = Job.getInstance(conf, "word count");
119         job.setJarByClass(WordCount.class);
120         job.setMapperClass(TokenizerMapper.class);
121         job.setCombinerClass(IntSumReducer.class);
122         job.setReducerClass(IntSumReducer.class);
123         job.setOutputKeyClass(Text.class);
124         job.setOutputValueClass(IntWritable.class);
125         for (int i = 0; i < otherArgs.length - 1; ++i) {
126             FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
127         }
128         FileOutputFormat.setOutputPath(job,
129                 new Path(otherArgs[otherArgs.length - 1]));
130         System.exit(job.waitForCompletion(true) ? 0 : 1);
131     }
132 }
View Code

 pom依賴

 1         <dependency>
 2             <groupId>org.apache.hadoop</groupId>
 3             <artifactId>hadoop-client</artifactId>
 4             <version>2.6.0</version>
 5         </dependency>
 6         <dependency>
 7             <groupId>org.apache.hadoop</groupId>
 8             <artifactId>hadoop-common</artifactId>
 9             <version>2.6.0</version>
10         </dependency>
11         <dependency>
12             <groupId>org.apache.hadoop</groupId>
13             <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
14             <version>2.6.0</version>
15         </dependency>
View Code

添加本地依賴

 

集群信息

hdfs-site.xml

core-site.xml

放入resources 文件

 

給出輸入輸出參數運行即可

確保本地文件和maven依賴都被加載到

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM