營銷渠道客戶轉化分析(歸因分析模型)


1.背景及問題

現某IT產品銷售公司,有一定量的小公司水平的用戶,這些用戶在做出購買時,會接觸到銷售公司的多個營銷渠道,不同的渠道上投入怎樣分配,以實現營銷效益的最大化,便成為了很多公司的市場營銷部門亟需解決的問題。

即:找出轉化率最高的渠道路徑或方式


2.思路步驟

  • 線性模型分析

  • 馬爾科夫鏈分析

  • 可視化馬爾科夫鏈

轉換率計算

  • 第一次點擊 用戶訪問路徑上的第一個觸點獲取所有貢獻值

  • 最后一次點擊 用戶購買之前最后一個觸點獲取所有貢獻值

  • 線性模型分析 用戶訪問路徑上的所有觸點平分貢獻值

  • 馬爾科夫鏈 馬爾科夫鏈的轉移矩陣 -> 每個觸點的移除效應-> 觸點貢獻值


3.數據集介紹

Id: 某IT產品銷售公司的客戶,客戶類型是小公司

Segment: 客戶的畫像

Channel:客戶生命周期中觸及過的渠道; DM(直郵),EM(電子郵件), PHONE(電話)和 WEB(產品銷售官網瀏覽記錄)

Date: 客戶觸及渠道的日期,觸及時間長度為1年

Pur_flag: 等於1表示該客戶在接觸完相應渠道后,完成了IT產品的購買


4.代碼及具體步驟

導入模塊

import numpy as np
import pandas as pd
import networkx as nx
from pprint import pprint
import os
import matplotlib.pyplot as plt
os.chdir(r'C:/Users/pc/Desktop/數據分析項目/客戶轉化分析/')
# 導入數據,將第四列解析為日期格式

df = pd.read_csv(r'./ChannelAttribute.csv', parse_dates=[3])

df.head()
id segment channel date pur_flag
0 20398764672 Tier 2 DM 2018-03-19 0
1 20408399343 Tier 2 WEB 2017-09-27 0
2 20438922645 Tier 2 WEB 2017-11-15 0
3 20225918468 Tier 2 DM 2017-05-24 0
4 20278581048 Tier 3 DM 2018-04-23 0

創建路徑數據

def create_path_with_value(data, element):
    path = []
    path.append('start')
    df2 = data.loc[data['id'] == element, :].sort_values(['id', 'date'], ascending=[False, True])

    for i in range(len(df2)):
        path.append(df2.iloc[i]['channel'])

    if df2['pur_flag'].unique() == 1:
        path.append('conversion')
        conv = 1
        conv_null = 0
    else:
        path.append('null')
        conv = 0
        conv_null = 1
    return [path, conv, conv_null]


final_path, conv, conv_null = [], [], []
for element in df['id'].unique():
    rst = create_path_with_value(df, element)
    final_path.append(rst[0])
    conv.append(rst[1])
    conv_null.append(rst[2])

# 路徑數據
path_data = pd.DataFrame({'path': final_path, 'conv': conv, 'conv_nulls': conv_null})
path_data.head(10)
path conv conv_nulls
0 [start, WEB, EM, DM, null] 0 1
1 [start, EM, WEB, DM, null] 0 1
2 [start, WEB, EM, DM, null] 0 1
3 [start, DM, EM, WEB, null] 0 1
4 [start, EM, WEB, DM, null] 0 1
5 [start, PHONE, EM, DM, WEB, null] 0 1
6 [start, PHONE, WEB, DM, null] 0 1
7 [start, DM, PHONE, WEB, null] 0 1
8 [start, WEB, EM, DM, conversion] 1 0
9 [start, PHONE, WEB, DM, null] 0 1

歸因分析(最后一次點擊、第一次點擊和線性模型)

def create_last_click_stats_pair(data):
    temp_path, temp_conv = [], []
    for i in range(len(data)):
        temp_path.append(data.iloc[i]['path'][-2])
        temp_conv.append(data.iloc[i]['conv'])
    return pd.DataFrame({'touch': temp_path, 'Last_Conv': temp_conv})


def create_first_order_states_pair(data):
    temp_path, temp_conv = [], []
    for i in range(len(data)):
        temp_path.append(data.iloc[i]['path'][1])
        temp_conv.append(data.iloc[i]['conv'])
    return pd.DataFrame({'touch': temp_path, 'First_Conv': temp_conv})


def create_linear_click_stats_pair(data):
    temp_path, temp_conv = [], []
    for i in range(len(data)):
        if len(data.iloc[i]['path'])==6:
            for j in range(1,5):
                temp_path.append(data.iloc[i]['path'][j])
                temp_conv.append(data.iloc[i]['conv'] / (6 - 2))
        elif len(data.iloc[i]['path'])==5:
            for j in range(1,4):
                temp_path.append(data.iloc[i]['path'][j])
                temp_conv.append(data.iloc[i]['conv'] / (5 - 2))
        elif len(data.iloc[i]['path'])==4:
            for j in range(1,3):
                temp_path.append(data.iloc[i]['path'][j])
                temp_conv.append(data.iloc[i]['conv'] / (4 - 2))
        else:
            for j in range(1, 2):
                temp_path.append(data.iloc[i]['path'][j])
                temp_conv.append(data.iloc[i]['conv'] / (3 - 2))
    return pd.DataFrame({'touch': temp_path, 'Linear_Conv': temp_conv})
last_touch = create_last_click_stats_pair(path_data).groupby('touch')['Last_Conv'].sum().reset_index()
linear_touch = create_linear_click_stats_pair(path_data).groupby('touch')['Linear_Conv'].sum().reset_index()
first_touch = create_first_order_states_pair(path_data).groupby('touch')['First_Conv'].sum().reset_index()
lst = last_touch.set_index('touch').iloc[:, 0:].apply(lambda x: x / x.sum())
li = linear_touch.set_index('touch').iloc[:, 0:].apply(lambda x: x / x.sum())
fst = first_touch.set_index('touch').iloc[:, 0:].apply(lambda x: x / x.sum())
dfs = [fst, lst, li]
dfs = [df for df in dfs]
dfs[0].join(dfs[1:])
First_Conv Last_Conv Linear_Conv
touch
DM 0.341152 0.744850 0.504964
EM 0.278233 0.097915 0.198478
PHONE 0.094440 0.014768 0.050488
WEB 0.286175 0.142467 0.246070

**線性模型分析結論: 相比於其他渠道,DM(直郵)是轉化率較優的渠道**

馬爾科夫鏈

# 手動計算狀態轉移矩陣
def split_states(data):
    temp_data = []
    for i in range(len(data)):
        path = data.iloc[i]['path']
        state_pairs, values = [], []
        for j in range(len(path)-1):
            state_pairs.append((path[j], path[j+1]))
            values.append(1)

        temp_data.append([state_pairs, values])
    return temp_data
temps = split_states(path_data)
temps[0:3]
[[[('start', 'WEB'), ('WEB', 'EM'), ('EM', 'DM'), ('DM', 'null')],
  [1, 1, 1, 1]],
 [[('start', 'EM'), ('EM', 'WEB'), ('WEB', 'DM'), ('DM', 'null')],
  [1, 1, 1, 1]],
 [[('start', 'WEB'), ('WEB', 'EM'), ('EM', 'DM'), ('DM', 'null')],
  [1, 1, 1, 1]]]
def transition_maxtrix(data):
    state_pairs, values = [], []
    for i in range(len(data)):
        for j, z in zip(data[i][0], data[i][1]):
            state_pairs.append(j)
            values.append(z)
    temp_df = pd.DataFrame({'state_pairs': state_pairs, 'values': values})
    grp_df = temp_df.groupby('state_pairs')['values'].sum().reset_index()
    grp_df[['start', 'end']] = grp_df['state_pairs'].apply(pd.Series)

    table = pd.crosstab(grp_df['end'], grp_df['start'], values=grp_df['values'], aggfunc=np.sum, normalize='columns')\
        .applymap(lambda x: "{:3.2f}".format(x))
    return table
# 輸出狀態轉移矩陣
tmp = transition_maxtrix(temps)
tmp1 = tmp.transpose()
tmp1
end DM EM PHONE WEB conversion null
start
DM 0.00 0.09 0.01 0.12 0.21 0.57
EM 0.43 0.00 0.02 0.41 0.04 0.09
PHONE 0.18 0.11 0.00 0.69 0.01 0.01
WEB 0.58 0.22 0.03 0.00 0.05 0.12
start 0.14 0.29 0.32 0.25 0.00 0.00

計算渠道的移除效應

def channel_remove(data,channel_removed):
    state_pairs, values = [], []
    for i in range(len(data)):
        for j, z in zip(data[i][0], data[i][1]):
            state_pairs.append(j)
            values.append(z)
    temp_df = pd.DataFrame({'state_pairs': state_pairs, 'values': values})
    grp_df = temp_df.groupby('state_pairs')['values'].sum().reset_index()
    grp_df[['start', 'end']] = grp_df['state_pairs'].apply(pd.Series)
    temp = grp_df.copy()
    grp_df['start'] = grp_df['start'].replace(channel_removed, 'unknown')
    grp_df['end'] = grp_df['end'].replace(channel_removed, 'unknown')
    return [grp_df, temp]


# 篩選出成功轉化路徑

path_data_pur = path_data[path_data['conv']==1]
temps = split_states(path_data_pur)

conversion =[]
columns = ['start', 'end', 'values_x', 'values_y', 'perct']

# 所有渠道
channels_list = list(df['channel'].unique())

df_dummy1 = pd.DataFrame({'start': ['start', 'conversion', 'null'],
                          'end': ['start', 'conversion', 'null'],
                          'values_x': [0, 0, 0],
                          'values_y': [0, 0, 0],
                          'perct': [0, 1, 1]})
dy_dummy = pd.DataFrame(df_dummy1, columns=columns)


df_dummy2 = pd.DataFrame({'start': ['start', 'conversion', 'null'],
                         'end': ['start', 'conversion', 'null']})

# 逐個計算移除單個渠道后的總轉化數

for chnl in channels_list:
    df_remove = channel_remove(temps, chnl)[0]
    df_noremove = channel_remove(temps, chnl)[1]
    
    df_temp = df_remove.groupby('start')['values'].sum().reset_index()
    df_temp = pd.merge(df_remove, df_temp, on='start', how='left')
    df_temp['perct'] = df_temp['values_x']/df_temp['values_y']

    df_temp = pd.DataFrame(df_temp, columns=columns)

    df_temp = pd.concat([df_temp, dy_dummy], axis=0)

    df_ini = pd.DataFrame(df_noremove, columns=['start', 'end'])

    df_temp2 = pd.concat([df_ini, df_dummy2], axis=0)

    df_temp = pd.merge(df_temp2, df_temp, on=['start', 'end'], how='left')

#     用0填充由於左連接出現的NaN
    df_temp['values_x'].fillna(0, inplace=True)
    df_temp['values_y'].fillna(0, inplace=True)
    df_temp['perct'].fillna(0, inplace=True)

    df_trans1 = pd.crosstab(df_temp['start'], df_temp['end'], values=df_temp['perct'], aggfunc=np.sum)

    df_trans1.update(df_trans1[['DM', 'EM', 'PHONE', 'WEB', 'conversion', 'null', 'start']].fillna(0))
    
#     轉化為numpy矩陣
    df_trans_mat = np.matrix(df_trans1)

    inist_n1 = pd.crosstab(df_temp['start'], df_temp['end'], values=df_temp['values_x'], aggfunc=np.sum)
    inist_n1.update(inist_n1[['DM', 'EM', 'PHONE', 'WEB', 'conversion', 'null', 'start']].fillna(0))

    inist_mat = np.matrix(inist_n1.iloc[-1])
    
#     矩陣乘積
    mat = inist_mat*df_trans_mat

# 取出估計出來的轉化數
    conversion.append(mat[0,4])


# 計算單個渠道的移除效應
chnl_conversion = pd.DataFrame({'channel': channels_list, 'conv': conversion})

df_remove = channel_remove(temps, chnl)[0]
df_noremove = channel_remove(temps, chnl)[1]
tot_conv = df_remove['values'].sum()

chnl_conversion['impact'] = (tot_conv-chnl_conversion['conv'])/tot_conv

tot_impact = chnl_conversion['impact'].sum()

chnl_conversion['convet_rate']= chnl_conversion['impact']/tot_impact

chnl_conversion

channel conv impact convet_rate
0 DM 1003.039274 0.961468 0.265241
1 WEB 2724.928034 0.895320 0.246993
2 EM 2856.954317 0.890248 0.245594
3 PHONE 3179.825240 0.877845 0.242172

馬爾科夫鏈模型分析結論: 相比於其他渠道,DM(直郵)的轉換貢獻值最高,是轉化率較優的渠道




可視化馬爾科夫鏈

states = ['start', 'DM', 'EM', 'PHONE', 'WEB', 'conversion', 'null']

def _get_markov_edges(Q):
    edges = {}
    for col in Q.columns:
        for idx in Q.index:
            edges[(idx,col)] = Q.loc[idx,col]
    return edges
edges_wts = _get_markov_edges(tmp1)
edges_wts
{('DM', 'DM'): '0.00',
 ('EM', 'DM'): '0.43',
 ('PHONE', 'DM'): '0.18',
 ('WEB', 'DM'): '0.58',
 ('start', 'DM'): '0.14',
 ('DM', 'EM'): '0.09',
 ('EM', 'EM'): '0.00',
 ('PHONE', 'EM'): '0.11',
 ('WEB', 'EM'): '0.22',
 ('start', 'EM'): '0.29',
 ('DM', 'PHONE'): '0.01',
 ('EM', 'PHONE'): '0.02',
 ('PHONE', 'PHONE'): '0.00',
 ('WEB', 'PHONE'): '0.03',
 ('start', 'PHONE'): '0.32',
 ('DM', 'WEB'): '0.12',
 ('EM', 'WEB'): '0.41',
 ('PHONE', 'WEB'): '0.69',
 ('WEB', 'WEB'): '0.00',
 ('start', 'WEB'): '0.25',
 ('DM', 'conversion'): '0.21',
 ('EM', 'conversion'): '0.04',
 ('PHONE', 'conversion'): '0.01',
 ('WEB', 'conversion'): '0.05',
 ('start', 'conversion'): '0.00',
 ('DM', 'null'): '0.57',
 ('EM', 'null'): '0.09',
 ('PHONE', 'null'): '0.01',
 ('WEB', 'null'): '0.12',
 ('start', 'null'): '0.00'}
# 移除0轉化概率的邊
for key, value in list(edges_wts.items()):
    if value == '0.00':
        edges_wts.pop(key)

pprint(edges_wts)
{('DM', 'EM'): '0.09',
 ('DM', 'PHONE'): '0.01',
 ('DM', 'WEB'): '0.12',
 ('DM', 'conversion'): '0.21',
 ('DM', 'null'): '0.57',
 ('EM', 'DM'): '0.43',
 ('EM', 'PHONE'): '0.02',
 ('EM', 'WEB'): '0.41',
 ('EM', 'conversion'): '0.04',
 ('EM', 'null'): '0.09',
 ('PHONE', 'DM'): '0.18',
 ('PHONE', 'EM'): '0.11',
 ('PHONE', 'WEB'): '0.69',
 ('PHONE', 'conversion'): '0.01',
 ('PHONE', 'null'): '0.01',
 ('WEB', 'DM'): '0.58',
 ('WEB', 'EM'): '0.22',
 ('WEB', 'PHONE'): '0.03',
 ('WEB', 'conversion'): '0.05',
 ('WEB', 'null'): '0.12',
 ('start', 'DM'): '0.14',
 ('start', 'EM'): '0.29',
 ('start', 'PHONE'): '0.32',
 ('start', 'WEB'): '0.25'}
# 用networkx繪制馬爾科夫鏈

os.environ["PATH"] += os.pathsep + './graphviz-2.38/release/bin/'

G = nx.MultiDiGraph()

# 增加節點狀態
G.add_nodes_from(states)
print('Nodes:\n{G.nodes()}\n')

# 邊表示轉換概率
for k, v in edges_wts.items():
    tmp_origin, tmp_destination = k[0], k[1]
    G.add_edge(tmp_origin, tmp_destination, weight=v, label=v)
print('Edges:')
pprint(G.edges(data=True))

pos = nx.drawing.nx_pydot.graphviz_layout(G, prog='dot')
nx.draw_networkx(G, pos)

# 創建邊標簽
edge_labels = {(n1,n2):d['label'] for n1,n2,d in G.edges(data=True)}
nx.draw_networkx_edge_labels(G , pos, edge_labels=edge_labels)
nx.drawing.nx_pydot.write_dot(G, 'customer_markov.dot')
Nodes:
{G.nodes()}

Edges:
OutMultiEdgeDataView([('DM', 'WEB', {'label': '0.12', 'weight': '0.12'}), ('DM', 'EM', {'label': '0.09', 'weight': '0.09'}), ('DM', 'null', {'label': '0.57', 'weight': '0.57'}), ('DM', 'PHONE', {'label': '0.01', 'weight': '0.01'}), ('DM', 'conversion', {'label': '0.21', 'weight': '0.21'}), ('start', 'EM', {'label': '0.29', 'weight': '0.29'}), ('start', 'DM', {'label': '0.14', 'weight': '0.14'}), ('start', 'PHONE', {'label': '0.32', 'weight': '0.32'}), ('start', 'WEB', {'label': '0.25', 'weight': '0.25'}), ('EM', 'DM', {'label': '0.43', 'weight': '0.43'}), ('EM', 'WEB', {'label': '0.41', 'weight': '0.41'}), ('EM', 'null', {'label': '0.09', 'weight': '0.09'}), ('EM', 'PHONE', {'label': '0.02', 'weight': '0.02'}), ('EM', 'conversion', {'label': '0.04', 'weight': '0.04'}), ('WEB', 'DM', {'label': '0.58', 'weight': '0.58'}), ('WEB', 'EM', {'label': '0.22', 'weight': '0.22'}), ('WEB', 'null', {'label': '0.12', 'weight': '0.12'}), ('WEB', 'PHONE', {'label': '0.03', 'weight': '0.03'}), ('WEB', 'conversion', {'label': '0.05', 'weight': '0.05'}), ('PHONE', 'WEB', {'label': '0.69', 'weight': '0.69'}), ('PHONE', 'DM', {'label': '0.18', 'weight': '0.18'}), ('PHONE', 'null', {'label': '0.01', 'weight': '0.01'}), ('PHONE', 'EM', {'label': '0.11', 'weight': '0.11'}), ('PHONE', 'conversion', {'label': '0.01', 'weight': '0.01'})])


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM