查找HDFS有哪些小文件以及統計數量
hdfs 導出fsimage文件
hdfs dfsadmin -fetchImage /data
轉換為可視化數據
hdfs oiv -i /data/fsimage_0000000000930647029 -o /data/fsimage.csv -p Delimited -delimiter ","
刪除數據第一行標題
$ sed -i 1d /data/fsimage.csv
把數據導入mysql
CREATE TABLE IF NOT EXISTS `images_info` ( `Path` VARCHAR ( 100 ), `Replication` INT ( 20 ), `ModificationTime` VARCHAR ( 100 ), `AccessTime` VARCHAR ( 100 ), `PreferredBlockSize` BIGINT ( 100 ), `BlocksCount` INT ( 20 ), `FileSize` BIGINT ( 100 ), `NSQUOTA` VARCHAR ( 100 ), `DSQUOTA` VARCHAR ( 100 ), `Permission` VARCHAR ( 100 ), `UserName` VARCHAR ( 100 ), `GroupName` VARCHAR ( 100 ) ) CHARSET = utf8
加載數據到mysql
load data local infile '/data/fsimage.csv' into table imgaes_info;
查找小文件
select count(Path), count(FileSize) from images_info where FileSize < 500;