资讯 小学 初中 高中 语言 会计职称 学历提升 法考 计算机考试 医护考试 建工考试 教育百科
栏目分类:
子分类:
返回
空麓网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
空麓网 > 计算机考试 > 软件开发 > 后端开发 > Python

完整正确的fpgrowth代码-python

Python 更新时间: 发布时间: 计算机考试归档 最新发布

完整正确的fpgrowth代码-python

完整正确的fpgrowth代码-python

网上关于fpgrowth代码基本上都是错的,跑出来的结果不唯一,这里我给一份正确的fpgrowth代码

# coding:utf-8class treeNode:    def __init__(self, nameValue, numOccur, parentNode):        self.name = nameValue        self.count = numOccur        self.nodeLink = None        self.parent = parentNode        self.children = {}        def inc(self, numOccur):        self.count += numOccur        def disp(self, ind=1):        print '  '*ind, self.name, ' ', self.count        for child in self.children.values():            child.disp(ind+1)def updateHeader(nodeToTest, targetNode):    while nodeToTest.nodeLink != None:        nodeToTest = nodeToTest.nodeLink    nodeToTest.nodeLink = targetNodedef updateFPtree(items, inTree, headerTable, count):    if items[0] in inTree.children:        # 判断items的第一个结点是否已作为子结点        inTree.children[items[0]].inc(count)    else:        # 创建新的分支        inTree.children[items[0]] = treeNode(items[0], count, inTree)        if headerTable[items[0]][1] == None:            headerTable[items[0]][1] = inTree.children[items[0]]        else:            updateHeader(headerTable[items[0]][1], inTree.children[items[0]])    # 递归    if len(items) > 1:        updateFPtree(items[1::], inTree.children[items[0]], headerTable, count)def createFPtree(dataSet, minSup=1):    headerTable = {}    #print dataSet.keys()[0:10]    for trans in dataSet:       # print(trans)        for item in trans:            headerTable[item] = headerTable.get(item, 0) + dataSet[trans]    for k in headerTable.keys():     #   print(headerTable[k])        if int(headerTable[k]) < minSup:          #  print "yes",int(headerTable[k]) < minSup            del(headerTable[k]) # 删除不满足最小支持度的元素        freqItemSet = set(headerTable.keys()) # 满足最小支持度的频繁项集    if len(freqItemSet) == 0:        return None, None    for k in headerTable:        headerTable[k] = [headerTable[k], None] # element: [count, node]        retTree = treeNode('Null Set', 1, None)    for tranSet, count in dataSet.items():        # dataSet:[element, count]        localD = {}        for item in tranSet:            if item in freqItemSet: # 过滤,只取该样本中满足最小支持度的频繁项                localD[item] = headerTable[item][0] # element : count        if len(localD) > 0:            # 根据全局频数从大到小对单样本排序            # orderedItem = [v[0] for v in sorted(localD.iteritems(), key=lambda p:(p[1], -ord(p[0])), reverse=True)]            orderedItem = [v[0] for v in sorted(localD.iteritems(), key=lambda p:(p[1], int(p[0])), reverse=True)]            # 用过滤且排序后的样本更新树            updateFPtree(orderedItem, retTree, headerTable, count)  # print(headerTable)    return retTree, headerTable# 回溯def ascendFPtree(leafNode, prefixPath):    if leafNode.parent != None:        prefixPath.append(leafNode.name)        ascendFPtree(leafNode.parent, prefixPath)# 条件模式基def findPrefixPath(basePat, myHeaderTab):    treeNode = myHeaderTab[basePat][1] # basePat在FP树中的第一个结点    condPats = {}    while treeNode != None:        prefixPath = []        ascendFPtree(treeNode, prefixPath) # prefixPath是倒过来的,从treeNode开始到根        if len(prefixPath) > 1:            condPats[frozenset(prefixPath[1:])] = treeNode.count # 关联treeNode的计数        treeNode = treeNode.nodeLink # 下一个basePat结点    return condPatsdef mineFPtree(inTree, headerTable, minSup, preFix, freqItemList):    # 最开始的频繁项集是headerTable中的各元素    bigL = [v[0] for v in sorted(headerTable.items(), key=lambda p:p[1])] # 根据频繁项的总频次排序    for basePat in bigL: # 对每个频繁项        newFreqSet = preFix.copy()        newFreqSet.add(basePat)        freqItemList.append(newFreqSet)        condPattBases = findPrefixPath(basePat, headerTable) # 当前频繁项集的条件模式基        myCondTree, myHead = createFPtree(condPattBases, minSup) # 构造当前频繁项的条件FP树        if myHead != None:            # print 'conditional tree for: ', newFreqSet            # myCondTree.disp(1)            mineFPtree(myCondTree, myHead, minSup, newFreqSet, freqItemList) # 递归挖掘条件FP树def loadSimpDat():    simDat = [['r','z','h','j','p'],              ['z','y','x','w','v','u','t','s'],              ['z'],              ['r','x','n','o','s'],              ['y','r','x','z','q','t','p'],              ['y','z','x','e','q','s','t','m']]    return simDatdef createInitSet(dataSet):    retDict={}    for trans in dataSet:	    key = frozenset(trans)	    if retDict.has_key(key):	        retDict[frozenset(trans)] += 1	    else:		    retDict[frozenset(trans)] = 1    return retDictdef calSuppData(headerTable, freqItemList, total):    suppData = {}    for Item in freqItemList:        # 找到最底下的结点        Item = sorted(Item, key=lambda x:headerTable[x][0])        base = findPrefixPath(Item[0], headerTable)        # 计算支持度        support = 0        for B in base:            if frozenset(Item[1:]).issubset(set(B)):                support += base[B]        # 对于根的儿子,没有条件模式基        if len(base)==0 and len(Item)==1:            support = headerTable[Item[0]][0]                    suppData[frozenset(Item)] = support/float(total)    return suppDatadef aprioriGen(Lk, k):    retList = []    lenLk = len(Lk)    for i in range(lenLk):        for j in range(i+1, lenLk):            L1 = list(Lk[i])[:k-2]; L2 = list(Lk[j])[:k-2]            L1.sort(); L2.sort()            if L1 == L2:                 retList.append(Lk[i] | Lk[j])    return retListdef calcConf(freqSet, H, supportData, br1, minConf=0.7):    prunedH = []    for conseq in H:        if supportData[freqSet - conseq]!=0:            conf = supportData[freqSet] / supportData[freqSet - conseq]            if conf >= minConf:                print "{0} --> {1} conf:{2}".format(freqSet - conseq, conseq, conf)                br1.append((freqSet - conseq, conseq, conf))                prunedH.append(conseq)    return prunedHdef rulesFromConseq(freqSet, H, supportData, br1, minConf=0.7):    m = len(H[0])    if len(freqSet) > m+1:        Hmp1 = aprioriGen(H, m+1)        Hmp1 = calcConf(freqSet, Hmp1, supportData, br1, minConf)        if len(Hmp1)>1:            rulesFromConseq(freqSet, Hmp1, supportData, br1, minConf)def generateRules(freqItemList, supportData, minConf=0.7):    bigRuleList = []    for freqSet in freqItemList:        H1 = [frozenset([item]) for item in freqSet]        if len(freqSet)>1:            rulesFromConseq(freqSet, H1, supportData, bigRuleList, minConf)        else:            calcConf(freqSet, H1, supportData, bigRuleList, minConf)    return bigRuleList

main 函数如下:
注意处理后的数据集的形式是一个二级列表,如(parsedDat)
l=[[a,b,c],[,d,c,e,g],[a,e,c,e]]这样就可以了

import fpgrowth import timeimport data_process# '''simple data'''# simDat = fpgrowth.loadSimpDat()# initSet = fpgrowth.createInitSet(simDat)# myFPtree, myHeaderTab = fpgrowth.createFPtree(initSet, 3)# myFPtree.disp()# print fpgrowth.findPrefixPath('z', myHeaderTab)# print fpgrowth.findPrefixPath('r', myHeaderTab)# print fpgrowth.findPrefixPath('t', myHeaderTab)# freqItems = []# fpgrowth.mineFPtree(myFPtree, myHeaderTab, 3, set([]), freqItems)# for x in freqItems:#     print x#先跑一下'''kosarak data'''start = time.time()n = 11#最小支持度#C:Usersgaoxisourcereposfpgrowthfpgrowthfpgrowth-masterdatakosarak.dat#with open(r"C:Usersgaoxisourcereposfpgrowthfpgrowthfpgrowth-masterdatakosarak.dat", "rb") as f:#    parsedDat = [line.split() for line in f.readlines()]#print parsedDatparsedDat=data_process.get_data()initSet = fpgrowth.createInitSet(parsedDat)myFPtree, myHeaderTab = fpgrowth.createFPtree(initSet, n)freqItems = []fpgrowth.mineFPtree(myFPtree, myHeaderTab, n, set([]), freqItems)print(time.time()-start, 'sec')# compute support values of freqItemssuppData = fpgrowth.calSuppData(myHeaderTab, freqItems, len(parsedDat))suppData[frozenset([])] = 1.0for x,v in suppData.iteritems():    print(x,v)minConf=0.8freqItems = [frozenset(x) for x in freqItems]fpgrowth.generateRules(freqItems, suppData,minConf)
转载请注明:文章转载自 http://www.konglu.com/
本文地址:http://www.konglu.com/it/1095867.html
免责声明:

我们致力于保护作者版权,注重分享,被刊用文章【完整正确的fpgrowth代码-python】因无法核实真实出处,未能及时与作者取得联系,或有版权异议的,请联系管理员,我们会立即处理,本文部分文字与图片资源来自于网络,转载此文是出于传递更多信息之目的,若有来源标注错误或侵犯了您的合法权益,请立即通知我们,情况属实,我们会第一时间予以删除,并同时向您表示歉意,谢谢!

我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2023 成都空麓科技有限公司

ICP备案号:蜀ICP备2023000828号-2