通過java api連接Hadoop集群時,如果集群支持HA方式,那么可以通過如下方式設置來自動切換到活動的master節點上。其中,ClusterName 是可以任意指定的,跟集群配置無關,dfs.ha.namenodes.ClusterName也可以任意指定名稱,有幾個master就寫幾個,后面根據相應的設置添加master節點地址即可。
private static String ClusterName = "nsstargate"; private static final String HADOOP_URL = "hdfs://"+ClusterName; public static Configuration conf; static { conf = new Configuration(); conf.set("fs.defaultFS", HADOOP_URL); conf.set("dfs.nameservices", ClusterName); conf.set("dfs.ha.namenodes."+ClusterName, "nn1,nn2"); conf.set("dfs.namenode.rpc-address."+ClusterName+".nn1", "172.16.50.24:8020"); conf.set("dfs.namenode.rpc-address."+ClusterName+".nn2", "172.16.50.21:8020"); //conf.setBoolean(name, value); conf.set("dfs.client.failover.proxy.provider."+ClusterName, "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"); }
上傳文件到HDFS的代碼如下,至於讀取等其他操作,可以參考網絡上其他文章。
/** * 上傳文件到HDFS上去 */ private static void uploadToHdfs() throws IOException { String localSrc = "E:\\test\\article01.txt"; String dst = "/user/test/article04.txt"; FileSystem fs = FileSystem.get(URI.create(HADOOP_URL), conf); long start = new Date().getTime(); /* InputStream in = new FileInputStream(localSrc); InputStreamReader isr = new InputStreamReader(in, "GBK"); OutputStream out = fs.create(new Path(HADOOP_URL+dst), true); IOUtils.copy(isr, out, "UTF8");*/ //該方法更快 FSDataOutputStream outputStream=fs.create(new Path(dst)); String fileContent = FileUtils.readFileToString(new File(localSrc), "GBK"); outputStream.write(fileContent.getBytes()); outputStream.close(); long end = new Date().getTime(); System.out.println("use:"+(end-start)); }