Hadoop 中的Hello world 代碼如下:
1 package com.wordcount; 2 3 import org.apache.hadoop.conf.Configuration; 4 import org.apache.hadoop.conf.Configured; 5 import org.apache.hadoop.fs.FileSystem; 6 import org.apache.hadoop.fs.Path; 7 import org.apache.hadoop.io.IntWritable; 8 import org.apache.hadoop.io.LongWritable; 9 import org.apache.hadoop.io.Text; 10 import org.apache.hadoop.mapreduce.Job; 11 import org.apache.hadoop.mapreduce.Mapper; 12 import org.apache.hadoop.mapreduce.Reducer; 13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 15 import org.apache.hadoop.util.Tool; 16 import org.apache.hadoop.util.ToolRunner; 17 18 import java.io.IOException; 19 20 /** 21 * @program: hadoop_demo 22 * @description: 23 * @author: Mr.Walloce 24 * @create: 2018/11/03 15:04 25 **/ 26 public class WordCount extends Configured implements Tool { 27 28 /** 29 * <LongWritable, Text, Text, IntWritable> 輸入和輸出的key-value類型 30 */ 31 static class MyMap extends Mapper<LongWritable, Text, Text, IntWritable> { 32 //結果輸出的字符串 33 Text out_key = new Text(); 34 35 //結果輸出的默認值 36 IntWritable out_value = new IntWritable(1); 37 38 /** 39 * @param key 輸入的字符串的偏移量 40 * @param value 輸入的字符串 41 * @param context 42 * @throws IOException 43 * @throws InterruptedException 44 */ 45 @Override 46 protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { 47 System.out.println("map階段開始執行,,,"); 48 String line = value.toString(); 49 long index = key.get(); 50 //對字符串進行處理,獲取到單詞 51 String[] words = line.split(" "); 52 if (words.length > 0) { 53 for (String word : words) { 54 out_key.set(word); 55 context.write(out_key, out_value); 56 } 57 } 58 System.out.println("map階段結束。。。"); 59 } 60 } 61 62 /** 63 * <Text, IntWritable, Text, IntWritable>輸入和輸出的key-value類型 64 */ 65 static class MyReduce extends Reducer<Text, IntWritable, Text, IntWritable> { 66 private IntWritable result = new IntWritable(); 67 68 @Override 69 protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { 70 System.out.println("Reduce階段開始執行..."); 71 int sum = 0; 72 for (IntWritable value : values) { 73 sum += value.get(); 74 } 75 result.set(sum); 76 System.out.println("單詞" + key.toString() + ": " + result.get()); 77 context.write(key, result); 78 System.out.println("Reduce階段結束。。。"); 79 } 80 } 81 82 static class MyCombiner extends Reducer<Text, IntWritable, Text, IntWritable> { 83 private IntWritable result = new IntWritable(); 84 85 @Override 86 protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { 87 System.out.println("Combiner階段開始..."); 88 int sum = 0; 89 for (IntWritable value : values) { 90 sum += value.get(); 91 } 92 result.set(sum); 93 context.write(key, result); 94 System.out.println("Combiner階段結束。。。"); 95 } 96 } 97 98 public int run(String[] args) throws Exception { 99 100 //Hadoop的八股文 101 Configuration conf = this.getConf(); 102 Job job = Job.getInstance(conf, this.getClass().getSimpleName()); 103 104 //************************對job進行具體的設置************************* 105 //在集群中運行時不寫會報錯,本地運行科不寫(最好寫上) 106 job.setJarByClass(WordCount.class); 107 108 //設置輸入輸出路徑 109 Path in_path = new Path(args[0]); 110 FileInputFormat.addInputPath(job, in_path); 111 Path out_path = new Path(args[1]); 112 FileOutputFormat.setOutputPath(job, out_path); 113 114 //輸出前判斷輸出路徑是否存在,存在則刪除(輸出路徑不能重復) 115 FileSystem fs = out_path.getFileSystem(conf); 116 if (fs.exists(out_path)) { 117 fs.delete(out_path, true); 118 } 119 120 //運行map類相關的參數設置 121 job.setMapperClass(MyMap.class); 122 job.setMapOutputKeyClass(Text.class); 123 job.setMapOutputValueClass(IntWritable.class); 124 125 //運行Shuffle相關的參數設置 126 job.setCombinerClass(MyCombiner.class); 127 128 //設置reduce類相關的參數設置 129 job.setReducerClass(MyReduce.class); 130 job.setOutputKeyClass(Text.class); 131 job.setOutputValueClass(IntWritable.class); 132 133 //運行是否成功 134 boolean isSuccess = job.waitForCompletion(true); 135 136 //運行成功返回0,反之返回1 137 return isSuccess ? 0 : 1; 138 } 139 140 public static void main(String args[]) { 141 Configuration conf = new Configuration(); 142 143 args = new String[]{ 144 "hdfs://walloce.one:8020/walloce/data/test.txt", 145 "hdfs://walloce.one:8020/walloce/output/"+ Math.random() 146 }; 147 148 try { 149 ToolRunner.run(conf, new WordCount(), args); 150 } catch (Exception e) { 151 e.printStackTrace(); 152 } 153 } 154 }
運行異常代碼:
18/11/22 15:06:00 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 18/11/22 15:06:02 INFO client.RMProxy: Connecting to ResourceManager at walloce.one/192.168.206.143:8032 18/11/22 15:06:03 WARN mapreduce.JobSubmitter: No job jar file set. User classes may not be found. See Job or Job#setJar(String). 18/11/22 15:06:03 INFO input.FileInputFormat: Total input paths to process : 1 18/11/22 15:06:03 INFO mapreduce.JobSubmitter: number of splits:1 18/11/22 15:06:04 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1542897380554_0004 18/11/22 15:06:04 INFO mapred.YARNRunner: Job jar is not present. Not adding any jar to the list of resources. 18/11/22 15:06:04 INFO impl.YarnClientImpl: Submitted application application_1542897380554_0004 18/11/22 15:06:04 INFO mapreduce.Job: The url to track the job: http://walloce.one:8088/proxy/application_1542897380554_0004/ 18/11/22 15:06:04 INFO mapreduce.Job: Running job: job_1542897380554_0004 18/11/22 15:06:08 INFO mapreduce.Job: Job job_1542897380554_0004 running in uber mode : false 18/11/22 15:06:08 INFO mapreduce.Job: map 0% reduce 0% 18/11/22 15:06:08 INFO mapreduce.Job: Job job_1542897380554_0004 failed with state FAILED due to: Application application_1542897380554_0004 failed 2 times due to AM Container for appattempt_1542897380554_0004_000002 exited with exitCode: 1 due to: Exception from container-launch: ExitCodeException exitCode=1: /bin/bash: line 0: fg: no job control ExitCodeException exitCode=1: /bin/bash: line 0: fg: no job control at org.apache.hadoop.util.Shell.runCommand(Shell.java:538) at org.apache.hadoop.util.Shell.run(Shell.java:455) at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:702) at org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:195) at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:300) at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:81) at java.util.concurrent.FutureTask.run(FutureTask.java:262) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) Container exited with a non-zero exit code 1 .Failing this attempt.. Failing the application.
檢查配置文件yarn-site.xml:
1 <!-- 指定計算模型在yarn上 --> 2 <property> 3 <name>mapreduce.framework.name</name> 4 <value>yarn</value> 5 </property>
由於是在本地運行,所以計算模型要指定在本地:
1 <!-- 指定計算模型在yarn上 --> 2 <property> 3 <name>mapreduce.framework.name</name> 4 <value>local</value> 5 </property>
文件修改后,可以運行成功。。
運行結果:
1 "C:\Program Files\Java\jdk1.8.0_162\bin\java.exe" "-javaagent:C:\Program Files\JetBrains\IntelliJ IDEA Community Edition 2018.1.6\lib\idea_rt.jar=13319:C:\Program Files\JetBrains\IntelliJ IDEA Community Edition 2018.1.6\bin" -Dfile.encoding=UTF-8 -classpath "C:\Program Files\Java\jdk1.8.0_162\jre\lib\charsets.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\deploy.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\access-bridge-64.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\cldrdata.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\dnsns.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\jaccess.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\jfxrt.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\localedata.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\nashorn.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\sunec.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\sunjce_provider.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\sunmscapi.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\sunpkcs11.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\zipfs.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\javaws.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\jce.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\jfr.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\jfxswt.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\jsse.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\management-agent.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\plugin.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\resources.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\rt.jar;E:\IdeaWorkspace\hadoop_demo\target\classes;E:\repository3\org\apache\hadoop\hadoop-common\2.5.0\hadoop-common-2.5.0.jar;E:\repository3\org\apache\hadoop\hadoop-annotations\2.5.0\hadoop-annotations-2.5.0.jar;C:\Program Files\Java\jdk1.8.0_162\lib\tools.jar;E:\repository3\com\google\guava\guava\11.0.2\guava-11.0.2.jar;E:\repository3\commons-cli\commons-cli\1.2\commons-cli-1.2.jar;E:\repository3\org\apache\commons\commons-math3\3.1.1\commons-math3-3.1.1.jar;E:\repository3\xmlenc\xmlenc\0.52\xmlenc-0.52.jar;E:\repository3\commons-httpclient\commons-httpclient\3.1\commons-httpclient-3.1.jar;E:\repository3\commons-codec\commons-codec\1.4\commons-codec-1.4.jar;E:\repository3\commons-io\commons-io\2.4\commons-io-2.4.jar;E:\repository3\commons-net\commons-net\3.1\commons-net-3.1.jar;E:\repository3\commons-collections\commons-collections\3.2.1\commons-collections-3.2.1.jar;E:\repository3\javax\servlet\servlet-api\2.5\servlet-api-2.5.jar;E:\repository3\org\mortbay\jetty\jetty\6.1.26\jetty-6.1.26.jar;E:\repository3\org\mortbay\jetty\jetty-util\6.1.26\jetty-util-6.1.26.jar;E:\repository3\com\sun\jersey\jersey-core\1.9\jersey-core-1.9.jar;E:\repository3\com\sun\jersey\jersey-json\1.9\jersey-json-1.9.jar;E:\repository3\org\codehaus\jettison\jettison\1.1\jettison-1.1.jar;E:\repository3\com\sun\xml\bind\jaxb-impl\2.2.3-1\jaxb-impl-2.2.3-1.jar;E:\repository3\javax\xml\bind\jaxb-api\2.2.2\jaxb-api-2.2.2.jar;E:\repository3\javax\xml\stream\stax-api\1.0-2\stax-api-1.0-2.jar;E:\repository3\javax\activation\activation\1.1\activation-1.1.jar;E:\repository3\org\codehaus\jackson\jackson-jaxrs\1.8.3\jackson-jaxrs-1.8.3.jar;E:\repository3\org\codehaus\jackson\jackson-xc\1.8.3\jackson-xc-1.8.3.jar;E:\repository3\com\sun\jersey\jersey-server\1.9\jersey-server-1.9.jar;E:\repository3\asm\asm\3.1\asm-3.1.jar;E:\repository3\tomcat\jasper-compiler\5.5.23\jasper-compiler-5.5.23.jar;E:\repository3\tomcat\jasper-runtime\5.5.23\jasper-runtime-5.5.23.jar;E:\repository3\javax\servlet\jsp\jsp-api\2.1\jsp-api-2.1.jar;E:\repository3\commons-el\commons-el\1.0\commons-el-1.0.jar;E:\repository3\commons-logging\commons-logging\1.1.3\commons-logging-1.1.3.jar;E:\repository3\log4j\log4j\1.2.17\log4j-1.2.17.jar;E:\repository3\net\java\dev\jets3t\jets3t\0.9.0\jets3t-0.9.0.jar;E:\repository3\org\apache\httpcomponents\httpclient\4.1.2\httpclient-4.1.2.jar;E:\repository3\org\apache\httpcomponents\httpcore\4.1.2\httpcore-4.1.2.jar;E:\repository3\com\jamesmurty\utils\java-xmlbuilder\0.4\java-xmlbuilder-0.4.jar;E:\repository3\commons-lang\commons-lang\2.6\commons-lang-2.6.jar;E:\repository3\commons-configuration\commons-configuration\1.6\commons-configuration-1.6.jar;E:\repository3\commons-digester\commons-digester\1.8\commons-digester-1.8.jar;E:\repository3\commons-beanutils\commons-beanutils\1.7.0\commons-beanutils-1.7.0.jar;E:\repository3\commons-beanutils\commons-beanutils-core\1.8.0\commons-beanutils-core-1.8.0.jar;E:\repository3\org\slf4j\slf4j-api\1.7.5\slf4j-api-1.7.5.jar;E:\repository3\org\slf4j\slf4j-log4j12\1.7.5\slf4j-log4j12-1.7.5.jar;E:\repository3\org\codehaus\jackson\jackson-core-asl\1.9.13\jackson-core-asl-1.9.13.jar;E:\repository3\org\codehaus\jackson\jackson-mapper-asl\1.9.13\jackson-mapper-asl-1.9.13.jar;E:\repository3\org\apache\avro\avro\1.7.4\avro-1.7.4.jar;E:\repository3\com\thoughtworks\paranamer\paranamer\2.3\paranamer-2.3.jar;E:\repository3\org\xerial\snappy\snappy-java\1.0.4.1\snappy-java-1.0.4.1.jar;E:\repository3\com\google\protobuf\protobuf-java\2.5.0\protobuf-java-2.5.0.jar;E:\repository3\org\apache\hadoop\hadoop-auth\2.5.0\hadoop-auth-2.5.0.jar;E:\repository3\org\apache\directory\server\apacheds-kerberos-codec\2.0.0-M15\apacheds-kerberos-codec-2.0.0-M15.jar;E:\repository3\org\apache\directory\server\apacheds-i18n\2.0.0-M15\apacheds-i18n-2.0.0-M15.jar;E:\repository3\org\apache\directory\api\api-asn1-api\1.0.0-M20\api-asn1-api-1.0.0-M20.jar;E:\repository3\org\apache\directory\api\api-util\1.0.0-M20\api-util-1.0.0-M20.jar;E:\repository3\com\jcraft\jsch\0.1.42\jsch-0.1.42.jar;E:\repository3\com\google\code\findbugs\jsr305\1.3.9\jsr305-1.3.9.jar;E:\repository3\org\apache\commons\commons-compress\1.4.1\commons-compress-1.4.1.jar;E:\repository3\org\tukaani\xz\1.0\xz-1.0.jar;E:\repository3\org\apache\hadoop\hadoop-hdfs\2.5.0\hadoop-hdfs-2.5.0.jar;E:\repository3\commons-daemon\commons-daemon\1.0.13\commons-daemon-1.0.13.jar;E:\repository3\io\netty\netty\3.6.2.Final\netty-3.6.2.Final.jar;E:\repository3\org\apache\hadoop\hadoop-client\2.5.0\hadoop-client-2.5.0.jar;E:\repository3\org\apache\hadoop\hadoop-mapreduce-client-app\2.5.0\hadoop-mapreduce-client-app-2.5.0.jar;E:\repository3\org\apache\hadoop\hadoop-mapreduce-client-common\2.5.0\hadoop-mapreduce-client-common-2.5.0.jar;E:\repository3\org\apache\hadoop\hadoop-yarn-client\2.5.0\hadoop-yarn-client-2.5.0.jar;E:\repository3\com\sun\jersey\jersey-client\1.9\jersey-client-1.9.jar;E:\repository3\org\apache\hadoop\hadoop-yarn-server-common\2.5.0\hadoop-yarn-server-common-2.5.0.jar;E:\repository3\org\apache\hadoop\hadoop-mapreduce-client-shuffle\2.5.0\hadoop-mapreduce-client-shuffle-2.5.0.jar;E:\repository3\org\fusesource\leveldbjni\leveldbjni-all\1.8\leveldbjni-all-1.8.jar;E:\repository3\org\apache\hadoop\hadoop-yarn-api\2.5.0\hadoop-yarn-api-2.5.0.jar;E:\repository3\org\apache\hadoop\hadoop-mapreduce-client-core\2.5.0\hadoop-mapreduce-client-core-2.5.0.jar;E:\repository3\org\apache\hadoop\hadoop-yarn-common\2.5.0\hadoop-yarn-common-2.5.0.jar;E:\repository3\org\apache\hadoop\hadoop-mapreduce-client-jobclient\2.5.0\hadoop-mapreduce-client-jobclient-2.5.0.jar;E:\repository3\org\apache\hadoop\hadoop-mapreduce-examples\2.5.0\hadoop-mapreduce-examples-2.5.0.jar;E:\repository3\org\apache\zookeeper\zookeeper\3.4.5\zookeeper-3.4.5.jar;E:\repository3\jline\jline\0.9.94\jline-0.9.94.jar;E:\repository3\org\jboss\netty\netty\3.2.2.Final\netty-3.2.2.Final.jar" com.wordcount.WordCount 2 18/11/22 15:37:44 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 3 18/11/22 15:37:47 INFO Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id 4 18/11/22 15:37:47 INFO jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId= 5 18/11/22 15:37:49 WARN mapreduce.JobSubmitter: No job jar file set. User classes may not be found. See Job or Job#setJar(String). 6 18/11/22 15:37:49 INFO input.FileInputFormat: Total input paths to process : 1 7 18/11/22 15:37:50 INFO mapreduce.JobSubmitter: number of splits:1 8 18/11/22 15:37:50 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_local857633983_0001 9 18/11/22 15:37:50 WARN conf.Configuration: file:/opt/module/hadoop-2.5.0/data/tmp/mapred/staging/YinYichang857633983/.staging/job_local857633983_0001/job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval; Ignoring. 10 18/11/22 15:37:50 WARN conf.Configuration: file:/opt/module/hadoop-2.5.0/data/tmp/mapred/staging/YinYichang857633983/.staging/job_local857633983_0001/job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts; Ignoring. 11 18/11/22 15:37:51 WARN conf.Configuration: file:/opt/module/hadoop-2.5.0/data/tmp/mapred/local/localRunner/YinYichang/job_local857633983_0001/job_local857633983_0001.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval; Ignoring. 12 18/11/22 15:37:51 WARN conf.Configuration: file:/opt/module/hadoop-2.5.0/data/tmp/mapred/local/localRunner/YinYichang/job_local857633983_0001/job_local857633983_0001.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts; Ignoring. 13 18/11/22 15:37:51 INFO mapreduce.Job: The url to track the job: http://localhost:8080/ 14 18/11/22 15:37:51 INFO mapreduce.Job: Running job: job_local857633983_0001 15 18/11/22 15:37:51 INFO mapred.LocalJobRunner: OutputCommitter set in config null 16 18/11/22 15:37:51 INFO mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter 17 18/11/22 15:37:51 INFO mapred.LocalJobRunner: Waiting for map tasks 18 18/11/22 15:37:51 INFO mapred.LocalJobRunner: Starting task: attempt_local857633983_0001_m_000000_0 19 18/11/22 15:37:51 INFO util.ProcfsBasedProcessTree: ProcfsBasedProcessTree currently is supported only on Linux. 20 18/11/22 15:37:51 INFO mapred.Task: Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@3a8763db 21 18/11/22 15:37:51 INFO mapred.MapTask: Processing split: hdfs://walloce.one:8020/walloce/data/test.txt:0+173 22 18/11/22 15:37:51 INFO mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer 23 18/11/22 15:37:51 INFO mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584) 24 18/11/22 15:37:51 INFO mapred.MapTask: mapreduce.task.io.sort.mb: 100 25 18/11/22 15:37:51 INFO mapred.MapTask: soft limit at 83886080 26 18/11/22 15:37:51 INFO mapred.MapTask: bufstart = 0; bufvoid = 104857600 27 18/11/22 15:37:51 INFO mapred.MapTask: kvstart = 26214396; length = 6553600 28 map階段開始執行,,, 29 map階段結束。。。 30 map階段開始執行,,, 31 map階段結束。。。 32 map階段開始執行,,, 33 map階段結束。。。 34 map階段開始執行,,, 35 map階段結束。。。 36 map階段開始執行,,, 37 map階段結束。。。 38 map階段開始執行,,, 39 map階段結束。。。 40 18/11/22 15:37:52 INFO mapred.LocalJobRunner: 41 18/11/22 15:37:52 INFO mapreduce.Job: Job job_local857633983_0001 running in uber mode : false 42 18/11/22 15:37:52 INFO mapreduce.Job: map 0% reduce 0% 43 18/11/22 15:37:52 INFO mapred.MapTask: Starting flush of map output 44 18/11/22 15:37:52 INFO mapred.MapTask: Spilling map output 45 18/11/22 15:37:52 INFO mapred.MapTask: bufstart = 0; bufend = 321; bufvoid = 104857600 46 18/11/22 15:37:52 INFO mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214252(104857008); length = 145/6553600 47 Combiner階段開始... 48 Combiner階段結束。。。 49 Combiner階段開始... 50 Combiner階段結束。。。 51 Combiner階段開始... 52 Combiner階段結束。。。 53 Combiner階段開始... 54 Combiner階段結束。。。 55 Combiner階段開始... 56 Combiner階段結束。。。 57 Combiner階段開始... 58 Combiner階段結束。。。 59 Combiner階段開始... 60 Combiner階段結束。。。 61 Combiner階段開始... 62 Combiner階段結束。。。 63 Combiner階段開始... 64 Combiner階段結束。。。 65 18/11/22 15:37:52 INFO mapred.MapTask: Finished spill 0 66 18/11/22 15:37:52 INFO mapred.Task: Task:attempt_local857633983_0001_m_000000_0 is done. And is in the process of committing 67 18/11/22 15:37:52 INFO mapred.LocalJobRunner: map 68 18/11/22 15:37:52 INFO mapred.Task: Task 'attempt_local857633983_0001_m_000000_0' done. 69 18/11/22 15:37:52 INFO mapred.LocalJobRunner: Finishing task: attempt_local857633983_0001_m_000000_0 70 18/11/22 15:37:52 INFO mapred.LocalJobRunner: map task executor complete. 71 18/11/22 15:37:52 INFO mapred.LocalJobRunner: Waiting for reduce tasks 72 18/11/22 15:37:52 INFO mapred.LocalJobRunner: Starting task: attempt_local857633983_0001_r_000000_0 73 18/11/22 15:37:52 INFO util.ProcfsBasedProcessTree: ProcfsBasedProcessTree currently is supported only on Linux. 74 18/11/22 15:37:52 INFO mapred.Task: Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@32eb8b1d 75 18/11/22 15:37:52 INFO mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@5b831a05 76 18/11/22 15:37:52 INFO reduce.MergeManagerImpl: MergerManager: memoryLimit=1291845632, maxSingleShuffleLimit=322961408, mergeThreshold=852618176, ioSortFactor=10, memToMemMergeOutputsThreshold=10 77 18/11/22 15:37:52 INFO reduce.EventFetcher: attempt_local857633983_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events 78 18/11/22 15:37:52 INFO reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local857633983_0001_m_000000_0 decomp: 101 len: 105 to MEMORY 79 18/11/22 15:37:52 INFO reduce.InMemoryMapOutput: Read 101 bytes from map-output for attempt_local857633983_0001_m_000000_0 80 18/11/22 15:37:52 INFO reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 101, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->101 81 18/11/22 15:37:52 INFO reduce.EventFetcher: EventFetcher is interrupted.. Returning 82 18/11/22 15:37:52 INFO mapred.LocalJobRunner: 1 / 1 copied. 83 18/11/22 15:37:52 INFO reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs 84 18/11/22 15:37:52 INFO mapred.Merger: Merging 1 sorted segments 85 18/11/22 15:37:52 INFO mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 97 bytes 86 18/11/22 15:37:52 INFO reduce.MergeManagerImpl: Merged 1 segments, 101 bytes to disk to satisfy reduce memory limit 87 18/11/22 15:37:52 INFO reduce.MergeManagerImpl: Merging 1 files, 105 bytes from disk 88 18/11/22 15:37:52 INFO reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce 89 18/11/22 15:37:52 INFO mapred.Merger: Merging 1 sorted segments 90 18/11/22 15:37:52 INFO mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 97 bytes 91 18/11/22 15:37:52 INFO mapred.LocalJobRunner: 1 / 1 copied. 92 18/11/22 15:37:52 INFO Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords 93 Reduce階段開始執行... 94 單詞I: 5 95 Reduce階段結束。。。 96 Reduce階段開始執行... 97 單詞am: 5 98 Reduce階段結束。。。 99 Reduce階段開始執行... 100 單詞an: 5 101 Reduce階段結束。。。 102 Reduce階段開始執行... 103 單詞for: 5 104 Reduce階段結束。。。 105 Reduce階段開始執行... 106 單詞hello: 1 107 Reduce階段結束。。。 108 Reduce階段開始執行... 109 單詞linux: 5 110 Reduce階段結束。。。 111 Reduce階段開始執行... 112 單詞new: 5 113 Reduce階段結束。。。 114 Reduce階段開始執行... 115 單詞programer: 5 116 Reduce階段結束。。。 117 Reduce階段開始執行... 118 單詞world!: 1 119 Reduce階段結束。。。 120 18/11/22 15:37:52 INFO mapred.Task: Task:attempt_local857633983_0001_r_000000_0 is done. And is in the process of committing 121 18/11/22 15:37:52 INFO mapred.LocalJobRunner: 1 / 1 copied. 122 18/11/22 15:37:52 INFO mapred.Task: Task attempt_local857633983_0001_r_000000_0 is allowed to commit now 123 18/11/22 15:37:52 INFO output.FileOutputCommitter: Saved output of task 'attempt_local857633983_0001_r_000000_0' to hdfs://walloce.one:8020/walloce/output/_temporary/0/task_local857633983_0001_r_000000 124 18/11/22 15:37:52 INFO mapred.LocalJobRunner: reduce > reduce 125 18/11/22 15:37:52 INFO mapred.Task: Task 'attempt_local857633983_0001_r_000000_0' done. 126 18/11/22 15:37:52 INFO mapred.LocalJobRunner: Finishing task: attempt_local857633983_0001_r_000000_0 127 18/11/22 15:37:52 INFO mapred.LocalJobRunner: reduce task executor complete. 128 18/11/22 15:37:53 INFO mapreduce.Job: map 100% reduce 100% 129 18/11/22 15:37:53 INFO mapreduce.Job: Job job_local857633983_0001 completed successfully 130 18/11/22 15:37:53 INFO mapreduce.Job: Counters: 38 131 File System Counters 132 FILE: Number of bytes read=576 133 FILE: Number of bytes written=490527 134 FILE: Number of read operations=0 135 FILE: Number of large read operations=0 136 FILE: Number of write operations=0 137 HDFS: Number of bytes read=346 138 HDFS: Number of bytes written=63 139 HDFS: Number of read operations=15 140 HDFS: Number of large read operations=0 141 HDFS: Number of write operations=6 142 Map-Reduce Framework 143 Map input records=6 144 Map output records=37 145 Map output bytes=321 146 Map output materialized bytes=105 147 Input split bytes=110 148 Combine input records=37 149 Combine output records=9 150 Reduce input groups=9 151 Reduce shuffle bytes=105 152 Reduce input records=9 153 Reduce output records=9 154 Spilled Records=18 155 Shuffled Maps =1 156 Failed Shuffles=0 157 Merged Map outputs=1 158 GC time elapsed (ms)=3 159 CPU time spent (ms)=0 160 Physical memory (bytes) snapshot=0 161 Virtual memory (bytes) snapshot=0 162 Total committed heap usage (bytes)=372244480 163 Shuffle Errors 164 BAD_ID=0 165 CONNECTION=0 166 IO_ERROR=0 167 WRONG_LENGTH=0 168 WRONG_MAP=0 169 WRONG_REDUCE=0 170 File Input Format Counters 171 Bytes Read=173 172 File Output Format Counters 173 Bytes Written=63 174 175 Process finished with exit code 0
由wordcount運行結果可以看出,MapReduce的執行順序:
1、Map階段開始
因為map階段輸入的數據時以行為單位,偏移量即為行序號,有多少行map就執行多少次。
2、Combiner階段開始
經過map階段的Shuffle后Map階段結束,開始Combiner階段,Combiner階段進行的是每個分區里的數據小聚合,有多少key進行多少次。
3、Reduce階段開始
Combiner階段結束后,Reduce階段需要將所有分區的所有數據進行聚合,得出最終的結果。
記: 心酸的學習歷程!