目前有兩個庫可以操作HBASE:hbase-thrift 和 happybase
happybase使用起來比較簡單方便,因此重點學習該庫,hbase-thrift只做簡要介紹。
(一)hbase-thrift
1、使用前先添加庫和依賴庫:
pip install thrift pip install hbase-thrift pip install google-cloud pip install google-cloud-vision pip install kazoo
2、連接數據庫的配置信息:
#先在Linux上啟動HBASE server #/opt/cloudera/parcels/CDH/lib/hbase/bin/hbase-daemon.sh --config /opt/cloudera/parcels/CDH/lib/hbase/conf foreground_start thrift --infoport 9096 -p 9091 #再運行該python腳本連接服務器 from thrift.transport import TSocket from hbase import Hbase from hbase.ttypes import * host = "xxx.xxx.xxx.xxx" port = 9091 framed = False socket = TSocket.TSocket(host, port) if framed: transport = TTransport.TFramedTransport(socket) else: transport = TTransport.TBufferedTransport(socket) protocol = TBinaryProtocol.TBinaryProtocol(transport) client = Hbase.Client(protocol)
3、操作數據庫
print ("Thrift2 Demo") print ("This demo assumes you have a table called \"example\" with a column family called \"family1\"") #打開連接 transport.open() # 獲取所有表名 tableNames = client.getTableNames() print('tableNames:', tableNames) #關閉連接 transport.close() ################################################# # #結果為: # Thrift2 Demo # This demo assumes you have a table called "example" with a column family called "family1" # tableNames: ['lrx_hbase_test', 'lrx_hbase_test2', 'lrx_hbase_test3', 'lrx_test']
(二)happybase
# pip install thrift # pip install happybase # 先在Linux上啟動HBASE server # /opt/cloudera/parcels/CDH/lib/hbase/bin/hbase-daemon.sh --config /opt/cloudera/parcels/CDH/lib/hbase/conf foreground_start thrift --infoport 9096 -p 9091 & # 再運行python腳本連接服務器 import happybase from conf import setting # 創建連接,通過參數size來設置連接池中連接的個數 connection = happybase.Connection(**setting.HBASE) # 打開傳輸,無返回值 connection.open() # 創建表,無返回值 # connection.create_table('lrx_test', # { # 'data':dict() # }) # 獲取一個表對象,返回一個happybase.table.Table對象(返回二進制表名) table0 = connection.table('lrx_test') print('表對象為:') print(table0) #<happybase.table.Table name=b'lrx_test'> # 獲取表實例,返回一個happybase.table.Table對象(返回表名) table = happybase.Table('lrx_test',connection) print('表實例為:') print(table) #<happybase.table.Table name='lrx_test'> # 插入數據,無返回值 ----在row1行,data:1列插入值value1 for i in range(5): table.put('row%s' %i,{'data:%s'%i:'%s' %i} ) table.put('row5',{'data:5':'value1'}) # 獲取單元格數據,返回一個list content = table.cells('row1','data:1') print (content) #[b'value1', b'value1'] # 獲取計數器列的值,返回當前單元格的值 # content2 = table.counter_get('row2','data:2') # print(content2) #0 # 獲取一個掃描器,返回一個generator scanner = table.scan() for k,v in scanner: print(k,v) ########################################### # #結果為: # #b'row0' {b'data:0': b'0'} # b'row1' {b'data:1': b'value1'} # b'row2' {b'data:2': b'2'} # b'row3' {b'data:3': b'3'} # b'row4' {b'data:4': b'4'} print(scanner) #<generator object Table.scan at 0x000001E17CCDAF10> # 獲取一行數據,返回一個dict info = table.row('row2') info1={} for k,v in info.items(): info1[k.decode()]=v.decode() print(info1) #獲取表名 table = connection.tables() print(table) # 關閉傳輸,無返回值 connection.close()