Python讀取Excel批量自動創建Hive數據表SQL


需要使用請自行下載使用說明和Excel

百度網盤鏈接地址:https://pan.baidu.com/s/1RNqJwAF8T-KLGQUFvvyZwg/812l

import xlrd


# 打開文件
data = xlrd.open_workbook("Create.xlsx")
# 獲取第一個sheet內容
table = data.sheet_by_index(0)

tabNameList = []  # 表名稱
tabColList = []  # 表列
tabTypeList = []  # 列類型
tabColCommList = []  # 列描述
tabCommList = []  # 表描述
tabParColList = []  # 分區字段
tabParTypeList = []  # 分區字段屬性
tabParCommList = []  # 分區字段描述
index = [1]#下標值
result=[]#最終結果

def getIndex():
    number = 0
    name = table.col_values(0)[1:]  # 表名稱
    for item in range(0, len(name)):
        if item + 1 == len(name):
            number = item + 2
            index.append(number)
        else:
            if name[item] != name[item + 1]:
                number = item + 2
                index.append(number)


def Create_table():
        creat = ("\nCREATE TABLE IF NOT EXISTS %s ( \n" % str(tabNameList[0]))  # 添加表名稱
        col = []  # 存儲列名和屬性
        for it in range(0, len(tabTypeList)):
            col.append(tabColList[it] + " " + tabTypeList[it] + " COMMENT \'" + tabColCommList[it] + "\'")

        for item in range(0, len(col)):
            creat += col[item]
            if item != len(tabTypeList) - 1:
                creat += ",\n"
        creat = (creat + "\n) COMMENT \'%s\'\n" % str(tabCommList[0]))
        par = []  # 存儲分區列名和屬性和備注
        if len(tabParColList) > 0:
            for it in range(0, len(tabParColList)):
                par.append(tabParColList[it] + " " + tabParTypeList[it] + " COMMENT \'" + tabParCommList[it] + "\'")

            creat = creat + "PARTITIONED BY (\n"
            for item in range(0, len(par)):
                creat += par[item]
                if item != len(tabParColList) - 1:
                    creat += ",\n"
            creat = creat + ");\n" #stored as parquet可以添加壓縮屬性
        result.append(creat)
        print(result)

def getInFo():
    getIndex()
    for item in range(0, len(index)-1):
        tabNameList.clear()
        tabColList.clear()
        tabTypeList.clear()
        tabColCommList.clear()
        tabCommList.clear()
        tabParColList.clear()
        tabParTypeList.clear()
        tabParCommList.clear()

        if len(index)==2:
            left = index[0]
            right = index[1]
        else:
            left = index[item]
            right = index[item + 1]
        name = table.col_values(0)[left:right]  # 表名稱
        for item in name:
            if (item not in tabNameList):
                tabNameList.append(item)
        col = table.col_values(1)[left:right]  # 表列
        for item in col:
            tabColList.append(item)
        type = table.col_values(2)[left:right]  # 列類型
        for item in type:
            if item != "":
                tabTypeList.append(item)
            else:
                tabTypeList.append("string")
        colcomm = table.col_values(3)[left:right]  # 列描述
        for item in colcomm:
            if item != "":
                tabColCommList.append(item)
            else:
                tabColCommList.append("")
        comm = table.col_values(4)[left:right]  # 表描述
        for item in comm:
            if item != "":
                tabCommList.append(item)
                break
            else:
                tabCommList.append("")
                break
        parcol = table.col_values(5)[left:right]  # 分區字段
        for item in parcol:
            if item != "":
                tabParColList.append(item)
        partype = table.col_values(6)[left:right]  # 分區字段屬性
        for item in partype:
            if item != "":
                tabParTypeList.append(item)
            else:
                tabParTypeList.append("string")
        parcomm = table.col_values(7)[left:right]  # 分區字段描述
        for item in parcomm:
            if item != "":
                tabParCommList.append(item)
            else:
                tabParCommList.append("")
        Create_table()

getInFo()



# 輸出文件設置
fname = "建表.sql"


for item in result:
    print(item)
    outFile = open(fname, 'a+',encoding = 'utf-8')
    outFile.write(item)
outFile.close()

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM