使用client.walk()可以遍歷hdfs的文件和目錄,加上參數status=True可以同時返回這個文件的基本屬性,指示這個文件是file還是directory,以及創建日期和修改日期
# -*- coding: utf-8 -*-
from hdfs import *
import os
from hdfs.ext.avro import AvroReader, AvroWriter
def main():
client=Client("http://192.168.56.101:50070")
path = "/home"
for root, dir, files in client.walk(path,status=True):
for file in files:
#full_path = os.path.join(root, file)
print(root)
print(file)
main()
--返回結果:
('
/home/test', {u'group': u'supergroup', u'permission': u'755', u'blockSize': 0, u'accessTime': 0, u'pathSuffix': u'', u'modificationTime': 1530603160696L, u'replication': 0, u'length': 0, u'childrenNum': 210, u'owner': u'apuser', u'storagePolicy': 0, u'type': u'
DIRECTORY', u'fileId': 16752})
(u'ods_gps.sh20180224095355.sh', {u'group': u'supergroup', u'permission': u'644', u'blockSize': 134217728, u'accessTime': 1530008211494L, u'pathSuffix': u'ods_gps.sh20180224095355.sh', u'modificationTime': 1528799458770L, u'replication': 3, u'length': 1366, u'childrenNum': 0, u'owner': u'apuser', u'storagePolicy': 0, u'type': u'
FILE', u'fileId': 30176})
('/home/test', {u'group': u'supergroup', u'permission': u'755', u'blockSize': 0, u'accessTime': 0, u'pathSuffix': u'', u'modificationTime': 1530603160696L, u'replication': 0, u'length': 0, u'childrenNum': 210, u'owner': u'apuser', u'storagePolicy': 0, u'type': u'
DIRECTORY', u'fileId': 16752})
(u'risk_platform20180330114834.sh', {u'group': u'supergroup', u'permission': u'644', u'blockSize': 134217728, u'accessTime': 1528799458774L, u'pathSuffix': u'risk_platform20180330114834.sh', u'modificationTime': 1528799458784L, u'replication': 3, u'length': 859, u'childrenNum': 0, u'owner': u'apuser', u'storagePolicy': 0, u'type': u'
FILE', u'fileId': 30177})