from hdfs.client import Client # 關於python操作hdfs的API可以查看官網: # https://hdfscli.readthedocs.io/en/latest/api.html # 讀取hdfs文件內容,將每行存入數組返回 def read_hdfs_file(client, filename): # with client.read('samples.csv', encoding='utf-8', delimiter='\n') as reader: # for line in reader: # pass lines = [] with client.read(filename, encoding='utf-8', delimiter='\n') as reader: for line in reader: # pass # print line.strip() lines.append(line.strip()) return lines # 創建目錄 def mkdirs(client, hdfs_path): client.makedirs(hdfs_path) # 刪除hdfs文件 def delete_hdfs_file(client, hdfs_path): client.delete(hdfs_path) # 上傳文件到hdfs def put_to_hdfs(client, local_path, hdfs_path): client.upload(hdfs_path, local_path, cleanup=True) # 從hdfs獲取文件到本地 def get_from_hdfs(client, hdfs_path, local_path): client.download(hdfs_path, local_path, overwrite=False) # 追加數據到hdfs文件 def append_to_hdfs(client, hdfs_path, data): client.write(hdfs_path, data, overwrite=False, append=True, encoding='utf-8') # 覆蓋數據寫到hdfs文件 def write_to_hdfs(client, hdfs_path, data): client.write(hdfs_path, data, overwrite=True, append=False, encoding='utf-8') # 移動或者修改文件 def move_or_rename(client, hdfs_src_path, hdfs_dst_path): client.rename(hdfs_src_path, hdfs_dst_path) # 返回目錄下的文件 def list(client, hdfs_path): return client.list(hdfs_path, status=False) # client = Client(url, root=None, proxy=None, timeout=None, session=None) # client = Client("http://hadoop:50070") client = Client("http://120.78.186.82:50070/",root="/",timeout=10000,session=False) # client = InsecureClient("http://120.78.186.82:50070", user='ann'); # move_or_rename(client,'/input/2.csv', '/input/emp.csv') # read_hdfs_file(client,'/input/emp.csv') put_to_hdfs(client, 'D:\\bbb.txt', '/file') # append_to_hdfs(client,'/input/emp.csv','我愛你'+'\n') # write_to_hdfs(client, '/emp.csv', "sadfafdadsf") # read_hdfs_file(client,'/input/emp.csv') # move_or_rename(client,'/input/emp.csv', '/input/2.csv') # mkdirs(client,'/input/python') # print(list(client, '/')) # chown(client,'/input/1.csv', 'root')