參考:
https://blog.csdn.net/a6822342/article/details/80713652 #簡單
https://www.zhihu.com/question/269333988 #復雜點的
方法一:
ali-bigdata-gateway-guard-1
deactivate
sudo -i
. /mnt/disk1/data/venv_bi/bin/activate
pip install pyhive
pip install thrift
pip install sasl
pip install thrift_sasl
vim h.py
from pyhive import hive
conn = hive.Connection(host='127.0.0.1', port=10000, username='feng.hong', database='test_db')
cursor = conn.cursor()
cursor.execute('show databases')
print(cursor.fetchall())
或使用select查詢
conn = hive.Connection(host='127.0.0.1', port=10000, username='feng.hong', database='test_db')
cursor = conn.cursor()
cursor.execute('select * from users limit 10')
print(cursor.fetchall())
#長的查詢語句可用三個引號表示忽略換行
#設置隊列可以新起一個execute,因為是在同一個connect中所以不會失效.
from pyhive import hive conn = hive.Connection(host='10.52.5.190', port=10000, username='feng.hong', database='default') cursor = conn.cursor() cursor.execute('SET mapreduce.job.queuename=data_bi') cursor.execute("""SELECT a.dt, a.city_id, a.city_name, a.product_id, a.driver_id, a.phone_number FROM oride_dw.dim_oride_driver_base a where a.dt=DATE_SUB(from_unixtime(unix_timestamp(),'yyyy-MM-dd'),6) limit 10""") print(cursor.fetchall())
方法二:通過os.system,具體見博客python發郵件
import os import smtplib import csv from email.mime.text import MIMEText from email.mime.multipart import MIMEMultipart from email.mime.application import MIMEApplication import numpy as np import pandas as pd import pymysql import sqlalchemy os.system("hive -e \"SET mapreduce.job.queuename=data_bi;SELECT a.dt,a.city_id,a.city_name,a.product_id,a.driver_id,a.phone_number FROM oride_dw.dim_oride_driver_base a where a.dt >= DATE_SUB(current_date(),2) and a.dt<= DATE_SUB(current_date(),1) limit 10\" >/tmp/1.txt")
方法三: 沒跑出來,待研究
"""表示注釋
import pandas as pd
import sqlalchemy as sa
sql = """SELECT a.dt, a.city_id, a.city_name, a.product_id, a.driver_id, a.phone_number
FROM oride_dw.dim_oride_driver_base a
where a.dt=DATE_SUB(from_unixtime(unix_timestamp(),'yyyy-MM-dd'),6) limit 10"""
engine = sa.create_engine('hive://10.52.5.190:10000/opay_dw')
pd.read_sql(sql, engine)