這個問題花費了我將近兩天的時間,經過多次試錯和嘗試,現在想分享給大家來解決此問題避免大家入坑,以前都是在局域網上搭建的hadoop集群,並且是局域網訪問的,沒遇見此問題。
因為阿里雲上搭建的hadoop集群,需要配置映射集群經過內網訪問,也就是局域網的ip地址。
如果配置為公網IP地址,就會出現集群啟動不了,namenode和secondarynamenode啟動不了,如果將主機的映射文件配置為內網IP集群就可以正常啟動了。但通過eclipse開發工具訪問
會出錯,顯示了阿里雲內網的ip地址來訪問datanode,這肯定訪問不了啊,這問題真實醉了,就這樣想了找了好久一致沒有思路。
最終發現需要在hdfs-site.xml中修改配置項dfs.client.use.datanode.hostname設置為true,就是說客戶端訪問datanode的時候是通過主機域名訪問,就不會出現通過內網IP來訪問了
最初查看日志發現:
一、查看日志
1. less hadoop-hadoop-namenode-master.log
2.less hadoop-hadoop-secondarynamenode-master.log
二、解決集群訪問問題
1.查看hosts映射文件
上面是公網IP需要替換為內網IP
然后正常搭建hadoop集群
2.core-site.xml
<!-- 指定HADOOP所使用的文件系統schema(URI),HDFS的老大(NameNode)的地址 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
<!-- 指定hadoop運行時產生文件的存儲目錄 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/BigData/hadoop-2.7.3/data</value>
</property>
3.hadoop-env.sh 修改export JAVA_HOME值
export JAVA_HOME=/home/hadoop/BigData/jdk1.8
4.hdfs-site.xml 注意:添加一個dfs.client.use.datanode.hostname配置
<!-- 指定namenode的http通信地址 -->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master:50090</value>
</property>
<!-- 指定HDFS副本的數量 -->
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<!-- 如果是通過公網IP訪問阿里雲上內網搭建的集群 -->
<property>
<name>dfs.client.use.datanode.hostname</name>
<value>true</value>
<description>only cofig in clients</description>
</property>
5.mapred-site.xml
<!-- 指定mr運行在yarn上 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- jobhistory的address -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<!-- jobhistory的webapp.address -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
6. yarn-site.xml
<!-- 指定YARN的老大(ResourceManager)的地址 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<!-- reducer獲取數據的方式 -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
7.hadoop namenode -format格式化,然后啟動start-all.sh
8.在本地IDE環境中編寫單詞統計測試集群訪問
public class WordCount { public static class TokenizerMapper extends Mapper<LongWritable, Text, Text, IntWritable>{ private final static IntWritable one = new IntWritable(1); private Text word = new Text(); @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while(itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); } } public static class WordCountReducer extends Reducer<Text, IntWritable, Text,IntWritable>{ private IntWritable result = new IntWritable(); @Override protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { int sum = 0; for(IntWritable item:values) { sum += item.get(); } result.set(sum); context.write(key, result); } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if(otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>....] <out>"); System.exit(2); } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(WordCountReducer.class); job.setReducerClass(WordCountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for(int i = 0; i < otherArgs.length -1; i++) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileSystem fs = FileSystem.get(conf); Path output = new Path(otherArgs[otherArgs.length - 1]); if(fs.exists(output)) { fs.delete(output, true); System.out.println("output directory existed! deleted!"); } FileOutputFormat.setOutputPath(job, output); System.exit(job.waitForCompletion(true) ? 0 : 1); } } }