HBase Python API
HBase通過thrift機制可以實現多語言編程,信息通過端口傳遞,因此Python是個不錯的選擇
吐槽
博主在Mac上配置HBase,奈何Zoomkeeper一直報錯,結果Ubuntu虛擬機上10min解決……但是虛擬機里沒有IDE寫Java代碼還是不方便,因此用Mac主機連接虛擬機的想法孕育而生,這樣又可以愉快地使用主機的IDE了~
一、服務端啟動Hbase Thrift RPC
HBase的啟動方式有很多,這里不再贅述,Ubuntu啟動HBase之后,啟動thrift
hbase-daemon.sh start thrift
默認的服務端口是9090
二、客戶端安裝依賴包
sudo pip install thrift
sudo pip install hbase-thrift
三、編寫客戶端代碼
# coding=utf-8
from thrift.transport import TSocket
from thrift.transport.TTransport import TBufferedTransport
from thrift.protocol import TBinaryProtocol
from hbase import Hbase
from hbase.ttypes import ColumnDescriptor
from hbase.ttypes import Mutation
class HBaseClient(object):
def __init__(self, ip, port=9090):
"""
建立與thrift server端的連接
"""
# server端地址和端口設定
self.__transport = TBufferedTransport(TSocket.TSocket(ip, port))
# 設置傳輸協議
protocol = TBinaryProtocol.TBinaryProtocol(self.__transport)
# 客戶端
self.__client = Hbase.Client(protocol)
# 打開連接
self.__transport.open()
def __del__(self):
self.__transport.close()
def get_tables(self):
"""
獲得所有表
:return:表名列表
"""
return self.__client.getTableNames()
def create_table(self, table, *columns):
"""
創建表格
:param table:表名
:param columns:列族名
"""
func = lambda col: ColumnDescriptor(col)
column_families = map(func, columns)
self.__client.createTable(table, column_families)
def put(self, table, row, columns):
"""
添加記錄
:param table:表名
:param row:行鍵
:param columns:列名
:return:
"""
func = lambda (k, v): Mutation(column=k, value=v)
mutations = map(func, columns.items())
self.__client.mutateRow(table, row, mutations)
def delete(self, table, row, column):
"""
刪除記錄
:param table:表名
:param row:行鍵
"""
self.__client.deleteAll(table, row, column)
def scan(self, table, start_row="", columns=None):
"""
獲得記錄
:param table: 表名
:param start_row: 起始行
:param columns: 列族
:param attributes:
"""
scanner = self.__client.scannerOpen(table, start_row, columns)
func = lambda (k, v): (k, v.value)
while True:
r = self.__client.scannerGet(scanner)
if not r:
break
yield dict(map(func, r[0].columns.items()))
if __name__ == '__main__':
client = HBaseClient("10.211.55.7")
# client.create_table('student', 'name', 'course')
client.put("student", "1",
{"name:": "Jack",
"course:art": "88",
"course:math": "12"})
client.put("student", "2",
{"name:": "Tom", "course:art": "90",
"course:math": "100"})
client.put("student", "3",
{"name:": "Jerry"})
client.delete('student', '1', 'course:math')
for v in client.scan('student'):
print v
四、測試結果
{'course:art': '88', 'name:': 'Jack'}
{'course:art': '90', 'name:': 'Tom', 'course:math': '100'}
{'name:': 'Jerry'}
五、小結
有了Python接口后,編寫簡單任務腳本變得非常方便,這大大得益於RPC機制,很好地解耦了Client和Server,方便開發人員合作。