package com.zhen.hdfs; import java.io.IOException; import java.io.OutputStream; import java.net.URI; import java.net.URISyntaxException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; /** * @author FengZhen * @date 2018年8月12日 * */ public class FileSystemStatusAPI { /** * 文件元數據:FileStatus * 任何文件系統的一個重要特征都是提供其目錄結構瀏覽和檢索它所在文件和目錄相關信息的功能。 * FileStatus類封裝了文件系統中文件和目錄的元數據,包括文件長度、塊大小、復本、修改時間、所有者以及權限信息 */ private static FileSystem fileSystem; public static void main(String[] args) { //setUp(); //fileStatusForFile(); //tearDown(); //globbing(); pathFilter(); } public static void setUp() { String uri = "/user/hdfs/MapReduce/data/test_status"; Configuration configuration = new Configuration(); try { fileSystem = FileSystem.get(new URI(uri), configuration); OutputStream outputStream = fileSystem.create(new Path(uri)); outputStream.write("content".getBytes("UTF-8")); outputStream.close(); } catch (IOException e) { e.printStackTrace(); } catch (URISyntaxException e) { e.printStackTrace(); } } public static void tearDown() { if (fileSystem != null) { try { fileSystem.close(); } catch (IOException e) { e.printStackTrace(); } } } /** * path=/user/hdfs/MapReduce/data/test_status isDir=false length=7 modificationTime=1534080334126 replication=3 blockSize=134217728 owner=FengZhen group=hdfs permissions=rw-r--r-- */ public static void fileStatusForFile() { Path file = new Path("/user/hdfs/MapReduce/data/test_status"); try { FileStatus fileStatus = fileSystem.getFileStatus(file); String path = fileStatus.getPath().toUri().getPath(); System.out.println("path="+path); Boolean isDir = fileStatus.isDirectory(); System.out.println("isDir="+isDir); long length = fileStatus.getLen(); System.out.println("length="+length); long modificationTime = fileStatus.getModificationTime(); System.out.println("modificationTime="+modificationTime); int replication = fileStatus.getReplication(); System.out.println("replication="+replication); long blockSize = fileStatus.getBlockSize(); System.out.println("blockSize="+blockSize); String owner = fileStatus.getOwner(); System.out.println("owner="+owner); String group = fileStatus.getGroup(); System.out.println("group="+group); String permissions = fileStatus.getPermission().toString(); System.out.println("permissions="+permissions); } catch (IOException e) { e.printStackTrace(); } } /** * 文件模式 * 在單個操作中處理一批文件是一個很常見的需求。 * 在一個表達式中使用通配符來匹配多個文件是比較方便的,無需列舉每個文件和目錄來指定輸入,該操作稱為"通配"(globbing)。 * Hadoop為執行通配提供了兩個FileSystem方法 * public FileStatus[] globStatus(Path pathPattern) throws IOException { return new Globber(this, pathPattern, DEFAULT_FILTER).glob(); } public FileStatus[] globStatus(Path pathPattern, PathFilter filter) throws IOException { return new Globber(this, pathPattern, filter).glob(); } globStatus方法返回與其路徑匹配於指定模式的所有文件的FileStatus對象數組,並按路徑排序。 PathFilter命令作為可選項可以進一步對匹配結果進行限制 */ public static void globbing() { String uri = "/user/hdfs/MapReduce/data"; Configuration configuration = new Configuration(); try { fileSystem = FileSystem.get(new URI(uri), configuration); // /2018/08/12 /2017/08/11 FileStatus[] fileStatus = fileSystem.globStatus(new Path("/user/hdfs/MapReduce/data/*/*/{11,12}")); // 1./user/hdfs/MapReduce/data/201[78](201[7-8] 、 201[^01234569]) hdfs://fz/user/hdfs/MapReduce/data/2017 hdfs://fz/user/hdfs/MapReduce/data/2018 // 2./user/hdfs/MapReduce/data/*/*/11 hdfs://fz/user/hdfs/MapReduce/data/2017/08/11 // 3./user/hdfs/MapReduce/data/*/*/{11,12} hdfs://fz/user/hdfs/MapReduce/data/2017/08/11 hdfs://fz/user/hdfs/MapReduce/data/2018/08/12 for (FileStatus fileStatus2 : fileStatus) { System.out.println(fileStatus2.getPath().toString()); } fileSystem.close(); } catch (Exception e) { e.printStackTrace(); } } /** * PathFilter * 通配符模式並不總能夠精確地描述我們想要訪問的字符集。比如,使用通配格式排除一個特定文件就不太可能。 * FileSystem中的listStatus和globStatus方法提供了可選的pathFilter對象,以編程方式控制通配符 */ public static void pathFilter() { String uri = "/user/hdfs/MapReduce/data"; Configuration configuration = new Configuration(); try { fileSystem = FileSystem.get(new URI(uri), configuration); // /2018/08/12 /2017/08/11 新增一個/2017/08/12 FileStatus[] fileStatus = fileSystem.globStatus(new Path("/user/hdfs/MapReduce/data/201[78]/*/*"), new RegexExcludePathFilter("^.*/2017/08/11$")); //FileStatus[] fileStatus = fileSystem.globStatus(new Path("/user/hdfs/MapReduce/data/2017/*/*"), new RegexExcludePathFilter("/user/hdfs/MapReduce/data/2017/08/11")); for (FileStatus fileStatus2 : fileStatus) { System.out.println(fileStatus2.getPath().toString()); } fileSystem.close(); } catch (Exception e) { e.printStackTrace(); } } }