1、關聯規則挖掘算法
關聯規則挖掘算法可以實現從兩種經典算法Apriori或FP-Growth中任意選取算法,輸出各個頻繁項集和強關聯規則。輸入文件由本地導入,可自行設置最小支持度計數和最小置信度參數值。
2、 Apriori算法設計思想
Apriori算法本質上使用一種稱作逐層搜索的迭代方法,使用候選項集找頻繁項集,其特點在於每找一次頻繁項集就需要掃描一次數據庫。
3、FP-growth算法設計思想
FP-growth算法將數據集存儲在一個特定的稱作FP樹的結構,只需要遍歷數據集2次,就能夠完成頻繁模式發現,其發現頻繁項集的分為兩個階段,第一個階是段構建FP樹,第二個階段從FP樹中挖掘頻繁項集。
4、用戶界面
1)點擊讀取文件按鈕,讀取的文件時,如圖4-1所示:
圖4-1 關聯規則挖掘系統導入文件
2)選擇Apriori算法,單擊按鈕,讀取的文件並運行,運行結果如圖5-2所示:

圖4-2 關聯規則挖掘系統的Apriori算法實現
3)在2)的基礎上,單擊“清屏”按鈕,修改最小支持度和最小置信度,單擊“Apriori算法”再次運行,運行結果如圖5-3所示:

圖4-3 關聯規則挖掘系統的Apriori算法修改參數實現
4)選擇FP-Growth算法,單擊按鈕,讀取的文件並運行,運行結果如圖5-4所示:
圖4-4 關聯規則挖掘系統的FP-Growth算法實現
5)在4)的基礎上,單擊“清屏”按鈕,如圖4-5修改最小支持度和最小置信度,單擊“FP-Growth算法”再次運行,運行結果如圖4-6所示:

圖4-5關聯規則挖掘系統的清屏
圖4-6關聯規則挖掘系統的FP-Growth算法修改參數實現
6)點擊“退出”,實現控制台清空和系統退出,如圖4-7所示:
圖4-7 關聯規則挖掘系統的退出
5、實驗源碼
編譯環境為Spyder,所用語言及版本為python3.7,GUI環境為tkinter。
1)主運行界面 GUI.py
# -*- coding: utf-8 -*-
import sys
import fp
import tkinter as tk
from tkinter import filedialog
from tkinter import scrolledtext
class GUI(object):
#布局界面
def __init__(self):
#設置初始界面
self.window=tk.Tk()
self.window.title('關聯規則挖掘系統')
self.window.geometry('1150x550')
#導入文件按鈕
self.botton1=tk.Button(self.window, text='導入文件',bg='green',fg='white', font=('楷體', 12, 'bold'), width=8, height=1,command=self.openfile)
self.botton1.place(x=70,y=60)
#標簽配置
self.label2=tk.Label(self.window, text='最小支持數',bg='light blue',fg='white', font=('楷體', 16, 'bold'), width=10, height=1).place(x=10,y=160)
self.label3=tk.Label(self.window, text='最小置信度',bg='light blue',fg='white', font=('楷體', 16, 'bold'), width=10, height=1).place(x=10,y=220)
#導入文件內容的輸出顯示
self.label4=tk.Label(self.window, text='導入文件內容如下',font=('楷體', 16, 'bold'), width=16, height=1).place(x=260,y=20)
#創建結果顯示框
self.text1=scrolledtext.ScrolledText(self.window, height=28, width=23,font=('楷體', 13))
self.text1.place(x=250,y=60)
self.text1.bind("<Button-1>",self.clear)
#各個頻繁項集和強關聯規則的輸出顯示
self.label5=tk.Label(self.window, text='頻繁項集和強關聯規則',font=('楷體', 16, 'bold'), width=20, height=1).place(x=700,y=20)
#創建結果顯示框
self.text2=scrolledtext.ScrolledText(self.window, height=28, width=60,font=('楷體', 10))
self.text2.place(x=550,y=60)
self.text2.bind("<Button-1>",self.clear)
# self.text2.bind("<Button-1>",self.run)
#顯示導入文件的路徑
self.var0=tk.StringVar()
self.entry1=tk.Entry(self.window, show=None, width='25', font=('Arial', 10), textvariable=self.var0)
self.entry1.place(x=10,y=100)
#自行設置最小支持度計數值,默認為0.5
self.var1=tk.StringVar()
self.var1.set('3')
self.entry2=tk.Entry(self.window, show=None, width='3', font=('Arial', 16), textvariable=self.var1)
self.entry2.place(x=180,y=160)
#自行設置最小置信度參數值,默認為0.7
self.var2=tk.StringVar()
self.var2.set('0.7')
self.entry3=tk.Entry(self.window, show=None, width='3', font=('Arial', 16), textvariable=self.var2)
self.entry3.place(x=180,y=220)
#選擇所需算法
self.btnlist=tk.IntVar()
self.radiobtn1=tk.Radiobutton(self.window, variable=self.btnlist, value=0, text='Apriori算法', font=('bold'), command=self.runApriori)
self.radiobtn1.place(x=30,y=290)
self.radiobtn2=tk.Radiobutton(self.window, variable=self.btnlist, value=1,text='FP-Growth算法', font=('bold'), command=self.runFPGrowth)
self.radiobtn2.place(x=30,y=330)
self.btnlist.set(0)
#開始運行按鈕
# self.btn1=tk.Button(self.window, bg='green',fg='white', text='運行', font=('楷體', 12,'bold'), width=6, height=1, command=self.run)
# self.btn1.place(x=80,y=360)
#清空頁面按鈕
self.btn2=tk.Button(self.window, bg='green',fg='white', text='清屏', font=('楷體', 12,'bold'), width=6, height=1)
self.btn2.place(x=80,y=390)
self.btn2.bind("<Button-1>",self.clear)
#關閉頁面按鈕
self.btn3=tk.Button(self.window, bg='green',fg='white', text='退出', font=('楷體', 12,'bold'), width=6, height=1)
self.btn3.place(x=80,y=450)
self.btn3.bind("<Button-1>",self.close)
#主窗口循環顯示
self.window.mainloop()
#清空所填內容
def clear(self,event):
# 連同導入文件一起刪除的話,會影響操作的連貫性,故注釋掉
# self.entry1.delete(0,tk.END)
# self.entry2.delete(0,tk.END)
# self.entry3.delete(0,tk.END)
self.text1.delete("1.0",tk.END)
self.text2.delete("1.0",tk.END)
#退出系統,對控制台清屏
def close(self,event):
e=tk.messagebox.askokcancel('詢問','確定退出系統嗎?')
if e==True:
exit()
self.window.destroy()
def __del__(self):
# 恢復sys.stdout
sys.stdout = sys.__stdout__
sys.stderr = sys.__stderr__
#從輸入文本框中獲取文本並返回數字列表
def getDataSupport(self):
entry_num1 = float(self.var1.get())
return entry_num1
def getDataConfidence(self):
entry_num2 =float(self.var2.get())
return entry_num2
def openfile(self):
nameFile = filedialog.askopenfilename(title='打開文件', filetypes=[('csv', '*.csv'),('txt', '*.txt')])
self.entry1.insert('insert', nameFile)
def getnamefile(self):
namefile=self.var0.get()
return namefile
#讀取導入的文件並轉化為列表
def loadDataSet(self):
nameFile=self.getnamefile()
with open(nameFile,"r",encoding='utf-8') as myfile:
data=myfile.read()
self.text1.insert("0.0",data)
self.text1.see("end")
list_result=data.split("\n")# 以回車符\n分割成單獨的行
length=len(list_result)
for i in range(length):
list_result[i]=list_result[i].split(",") # csv文件中的元素是以逗號分隔的
return list_result
def runApriori(self):
loadDataSet = self.loadDataSet()
C1=self.createC1(loadDataSet)
D = list(map(set,loadDataSet))
minSupport = self.getDataSupport()
L1, suppData0 = self.scanD(D,C1,minSupport)
L,suppData = self.apriori(loadDataSet,minSupport)
minConf = self.getDataConfidence()
rules = self.generateRules(L,suppData,minConf)
s='#######################Apriori算法##########################\n'
self.text2.insert('insert',s)
t1='\n頻繁項集:\n'
self.text2.insert('insert',t1)
self.text2.insert('insert',L)
t2='\n\n強關聯規則:\n'
self.text2.insert('insert',t2)
for line in rules:
r =str(line[0]) + '-->' + str(line[1]) + '置信度:' + str(line[2]) + '\n'
self.text2.insert('insert',r)
def runFPGrowth(self):
dataSet = self.loadDataSet()
frozenDataSet = fp.transfer2FrozenDataSet(dataSet)
minSupport = self.getDataSupport()
s='#######################FP_Growth算法########################\n'
self.text2.insert('insert',s)
t='\nFP樹:\n'
self.text2.insert('insert',t)
fptree, headPointTable = fp.createFPTree(frozenDataSet, minSupport)
fptree.disp()
self.text2.insert('insert',fptree.display())
frequentPatterns = {}
prefix = set([])
fp.mineFPTree(headPointTable, prefix, frequentPatterns, minSupport)
t1='\n頻繁項集:\n'
self.text2.insert('insert',t1)
t2=frequentPatterns
self.text2.insert('insert',t2)
minConf = self.getDataConfidence()
rules = []
fp.rulesGenerator(frequentPatterns, minConf, rules)
t3='\n\n強關聯規則:\n'
self.text2.insert('insert',t3)
for line in rules:
r =str(line[0]) + '-->' + str(line[1]) + '置信度:' + str(line[2]) + '\n'
self.text2.insert('insert',r)
#創建集合C1,C1是大小為1的所有候選項集合
def createC1(self,dataSet):
C1 = []
for transaction in dataSet:
for item in transaction:
if not [item] in C1:
C1.append([item])
C1.sort()
return list(map(frozenset,C1)) #對C1中每個項構建一個不變集合
#掃描數據集,返回最頻繁項集的支持度supportData
def scanD(self,D, Ck, minSupport):
ssCnt = {}
for tid in D:
for can in Ck:
if can.issubset(tid):
if can not in ssCnt:
ssCnt[can] = 1
else:
ssCnt[can] += 1
# numItems = float(len(D))
retList = []
supportData = {}
for key in ssCnt:
# support = ssCnt[key] / numItems #計算所有項集支持度
support = ssCnt[key]
if support >= minSupport:
retList.insert(0,key)
supportData[key] = support
return retList, supportData
#創建候選項集Ck
def aprioriGen(self,Lk, k):
retList = []
lenLk = len(Lk)
for i in range(lenLk):#前k-2個項相同時,將兩個集合合並
for j in range(i + 1, lenLk):
L1 = list(Lk[i])[:k - 2]
L2 = list(Lk[j])[:k - 2]
L1.sort()
L2.sort()
if L1 == L2:
retList.append(Lk[i] | Lk[j])
return retList
#Apriori算法函數
def apriori(self,dataSet, minSupport):
minSupport = self.getDataSupport()
C1 = self.createC1(dataSet)
D = list(map(set, dataSet))
L1, supportData = self.scanD(D, C1, minSupport)
L = [L1]
k = 2
while (len(L[k - 2]) > 0):
Ck = self.aprioriGen(L[k - 2], k)
Lk, supK = self.scanD(D, Ck, minSupport)#掃描數據集,從Ck得到Lk
supportData.update(supK)
L.append(Lk)
k += 1
return L, supportData
#生成關聯規則
def generateRules(self,L, supportData, minConf):
minConf = self.getDataConfidence()
bigRuleList = []
for i in range(1, len(L)):
for freqSet in L[i]:
H1 = [frozenset([item]) for item in freqSet]
if (i > 1):
self.rulesFromConseq(freqSet, H1, supportData, bigRuleList, minConf)
else:
self.calcConf(freqSet, H1, supportData, bigRuleList, minConf)
return bigRuleList
#計算可信度值
def calcConf(self,freqSet, H, supportData, brl, minConf):
minConf = self.getDataConfidence()
prunedH = []
for conseq in H:
conf = supportData[freqSet]/supportData[freqSet-conseq]
if conf >= minConf:
# print (freqSet-conseq,'-->',conseq,'conf:',conf)
brl.append((freqSet-conseq, conseq, conf))
prunedH.append(conseq)
return prunedH
#從最初的項集中生成更多的關聯規則
def rulesFromConseq(self,freqSet, H, supportData, brl, minConf):
minConf = self.getDataConfidence()
m = len(H[0])
if (len(freqSet) > (m + 1)):
Hmp1 = self.aprioriGen(H, m+1)
Hmp1 = self.calcConf(freqSet, Hmp1, supportData, brl, minConf)
if (len(Hmp1) > 1):
self.rulesFromConseq(freqSet, Hmp1, supportData, brl, minConf)
if __name__ == '__main__':
GUI()
2)導入的fp.py
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 24 10:48:56 2019
@author: 29493
"""
#import GUI
def transfer2FrozenDataSet(dataSet):
frozenDataSet = {}
for elem in dataSet:
frozenDataSet[frozenset(elem)] = 1
return frozenDataSet
res1=[]
res2=[]
res3=[]
class TreeNode:
def __init__(self, nodeName, count, nodeParent):
self.nodeName = nodeName
self.count = count
self.nodeParent = nodeParent
self.nextSimilarItem = None
self.children = {}
def increaseC(self, count):
self.count += count
def disp(self, ind=1):
res1.append(self.nodeName)
res2.append(self.count)
res3.append(ind)
for child in self.children.values():
child.disp(ind + 1)
def display(self):
s=''
for i in range(0,len(res1)):
s+=' ' * res3[i]+res1[i]+' '+str(res2[i])+'\n'
return s
def createFPTree(frozenDataSet, minSupport):
#scan dataset at the first time, filter out items which are less than minSupport
headPointTable = {}
for items in frozenDataSet:
for item in items:
headPointTable[item] = headPointTable.get(item, 0) + frozenDataSet[items]
headPointTable = {k:v for k,v in headPointTable.items() if v >= minSupport}
frequentItems = set(headPointTable.keys())
if len(frequentItems) == 0: return None, None
for k in headPointTable:
headPointTable[k] = [headPointTable[k], None]
fptree = TreeNode("null", 1, None)
#scan dataset at the second time, filter out items for each record
for items,count in frozenDataSet.items():
frequentItemsInRecord = {}
for item in items:
if item in frequentItems:
frequentItemsInRecord[item] = headPointTable[item][0]
if len(frequentItemsInRecord) > 0:
orderedFrequentItems = [v[0] for v in sorted(frequentItemsInRecord.items(), key=lambda v:v[1], reverse = True)]
updateFPTree(fptree, orderedFrequentItems, headPointTable, count)
return fptree, headPointTable
def updateFPTree(fptree, orderedFrequentItems, headPointTable, count):
#handle the first item
if orderedFrequentItems[0] in fptree.children:
fptree.children[orderedFrequentItems[0]].increaseC(count)
else:
fptree.children[orderedFrequentItems[0]] = TreeNode(orderedFrequentItems[0], count, fptree)
#update headPointTable
if headPointTable[orderedFrequentItems[0]][1] == None:
headPointTable[orderedFrequentItems[0]][1] = fptree.children[orderedFrequentItems[0]]
else:
updateHeadPointTable(headPointTable[orderedFrequentItems[0]][1], fptree.children[orderedFrequentItems[0]])
#handle other items except the first item
if(len(orderedFrequentItems) > 1):
updateFPTree(fptree.children[orderedFrequentItems[0]], orderedFrequentItems[1::], headPointTable, count)
def updateHeadPointTable(headPointBeginNode, targetNode):
while(headPointBeginNode.nextSimilarItem != None):
headPointBeginNode = headPointBeginNode.nextSimilarItem
headPointBeginNode.nextSimilarItem = targetNode
def mineFPTree(headPointTable, prefix, frequentPatterns, minSupport):
#for each item in headPointTable, find conditional prefix path, create conditional fptree, then iterate until there is only one element in conditional fptree
headPointItems = [v[0] for v in sorted(headPointTable.items(), key = lambda v:v[1][0])]
if(len(headPointItems) == 0): return
for headPointItem in headPointItems:
newPrefix = prefix.copy()
newPrefix.add(headPointItem)
support = headPointTable[headPointItem][0]
frequentPatterns[frozenset(newPrefix)] = support
prefixPath = getPrefixPath(headPointTable, headPointItem)
if(prefixPath != {}):
conditionalFPtree, conditionalHeadPointTable = createFPTree(prefixPath, minSupport)
if conditionalHeadPointTable != None:
mineFPTree(conditionalHeadPointTable, newPrefix, frequentPatterns, minSupport)
def getPrefixPath(headPointTable, headPointItem):
prefixPath = {}
beginNode = headPointTable[headPointItem][1]
prefixs = ascendTree(beginNode)
if((prefixs != [])):
prefixPath[frozenset(prefixs)] = beginNode.count
while(beginNode.nextSimilarItem != None):
beginNode = beginNode.nextSimilarItem
prefixs = ascendTree(beginNode)
if (prefixs != []):
prefixPath[frozenset(prefixs)] = beginNode.count
return prefixPath
def ascendTree(treeNode):
prefixs = []
while((treeNode.nodeParent != None) and (treeNode.nodeParent.nodeName != 'null')):
treeNode = treeNode.nodeParent
prefixs.append(treeNode.nodeName)
return prefixs
def rulesGenerator(frequentPatterns, minConf, rules):
for frequentset in frequentPatterns:
if(len(frequentset) > 1):
getRules(frequentset,frequentset, rules, frequentPatterns, minConf)
def removeStr(set, str):
tempSet = []
for elem in set:
if(elem != str):
tempSet.append(elem)
tempFrozenSet = frozenset(tempSet)
return tempFrozenSet
def getRules(frequentset,currentset, rules, frequentPatterns, minConf):
for frequentElem in currentset:
subSet = removeStr(currentset, frequentElem)
confidence = frequentPatterns[frequentset] / frequentPatterns[subSet]
if (confidence >= minConf):
flag = False
for rule in rules:
if(rule[0] == subSet and rule[1] == frequentset - subSet):
flag = True
if(flag == False):
rules.append((subSet, frequentset - subSet, confidence))
if(len(subSet) >= 2):
getRules(frequentset, subSet, rules, frequentPatterns, minConf)

