營銷渠道客戶轉化分析（歸因分析模型）

本文轉載自查看原文 2020-03-12 17:46 1212 Data Analysis

1.背景及問題

現某IT產品銷售公司，有一定量的小公司水平的用戶，這些用戶在做出購買時，會接觸到銷售公司的多個營銷渠道，不同的渠道上投入怎樣分配，以實現營銷效益的最大化，便成為了很多公司的市場營銷部門亟需解決的問題。

即：找出轉化率最高的渠道路徑或方式

2.思路步驟

線性模型分析
馬爾科夫鏈分析
可視化馬爾科夫鏈

轉換率計算

第一次點擊 用戶訪問路徑上的第一個觸點獲取所有貢獻值
最后一次點擊 用戶購買之前最后一個觸點獲取所有貢獻值
線性模型分析 用戶訪問路徑上的所有觸點平分貢獻值
馬爾科夫鏈 馬爾科夫鏈的轉移矩陣 -> 每個觸點的移除效應-> 觸點貢獻值

3.數據集介紹

Id：某IT產品銷售公司的客戶，客戶類型是小公司

Segment：客戶的畫像

Channel:客戶生命周期中觸及過的渠道; DM（直郵），EM（電子郵件）, PHONE(電話）和 WEB（產品銷售官網瀏覽記錄）

Date: 客戶觸及渠道的日期，觸及時間長度為1年

Pur_flag: 等於1表示該客戶在接觸完相應渠道后，完成了IT產品的購買

4.代碼及具體步驟

導入模塊

import numpy as np
import pandas as pd
import networkx as nx
from pprint import pprint
import os
import matplotlib.pyplot as plt

os.chdir(r'C:/Users/pc/Desktop/數據分析項目/客戶轉化分析/')

# 導入數據，將第四列解析為日期格式

df = pd.read_csv(r'./ChannelAttribute.csv', parse_dates=[3])

df.head()

	id	segment	channel	date
0	20398764672	Tier 2	DM	2018-03-19
1	20408399343	Tier 2	WEB	2017-09-27
2	20438922645	Tier 2	WEB	2017-11-15
3	20225918468	Tier 2	DM	2017-05-24
4	20278581048	Tier 3	DM	2018-04-23

創建路徑數據

def create_path_with_value(data, element):
    path = []
    path.append('start')
    df2 = data.loc[data['id'] == element, :].sort_values(['id', 'date'], ascending=[False, True])

    for i in range(len(df2)):
        path.append(df2.iloc[i]['channel'])

    if df2['pur_flag'].unique() == 1:
        path.append('conversion')
        conv = 1
        conv_null = 0
    else:
        path.append('null')
        conv = 0
        conv_null = 1
    return [path, conv, conv_null]


final_path, conv, conv_null = [], [], []
for element in df['id'].unique():
    rst = create_path_with_value(df, element)
    final_path.append(rst[0])
    conv.append(rst[1])
    conv_null.append(rst[2])

# 路徑數據
path_data = pd.DataFrame({'path': final_path, 'conv': conv, 'conv_nulls': conv_null})
path_data.head(10)

	path	conv	conv_nulls
0	[start, WEB, EM, DM, null]	0	1
1	[start, EM, WEB, DM, null]	0	1
2	[start, WEB, EM, DM, null]	0	1
3	[start, DM, EM, WEB, null]	0	1
4	[start, EM, WEB, DM, null]	0	1
5	[start, PHONE, EM, DM, WEB, null]	0	1
6	[start, PHONE, WEB, DM, null]	0	1
7	[start, DM, PHONE, WEB, null]	0	1
8	[start, WEB, EM, DM, conversion]	1	0
9	[start, PHONE, WEB, DM, null]	0	1

歸因分析（最后一次點擊、第一次點擊和線性模型）

def create_last_click_stats_pair(data):
    temp_path, temp_conv = [], []
    for i in range(len(data)):
        temp_path.append(data.iloc[i]['path'][-2])
        temp_conv.append(data.iloc[i]['conv'])
    return pd.DataFrame({'touch': temp_path, 'Last_Conv': temp_conv})


def create_first_order_states_pair(data):
    temp_path, temp_conv = [], []
    for i in range(len(data)):
        temp_path.append(data.iloc[i]['path'][1])
        temp_conv.append(data.iloc[i]['conv'])
    return pd.DataFrame({'touch': temp_path, 'First_Conv': temp_conv})


def create_linear_click_stats_pair(data):
    temp_path, temp_conv = [], []
    for i in range(len(data)):
        if len(data.iloc[i]['path'])==6:
            for j in range(1,5):
                temp_path.append(data.iloc[i]['path'][j])
                temp_conv.append(data.iloc[i]['conv'] / (6 - 2))
        elif len(data.iloc[i]['path'])==5:
            for j in range(1,4):
                temp_path.append(data.iloc[i]['path'][j])
                temp_conv.append(data.iloc[i]['conv'] / (5 - 2))
        elif len(data.iloc[i]['path'])==4:
            for j in range(1,3):
                temp_path.append(data.iloc[i]['path'][j])
                temp_conv.append(data.iloc[i]['conv'] / (4 - 2))
        else:
            for j in range(1, 2):
                temp_path.append(data.iloc[i]['path'][j])
                temp_conv.append(data.iloc[i]['conv'] / (3 - 2))
    return pd.DataFrame({'touch': temp_path, 'Linear_Conv': temp_conv})

last_touch = create_last_click_stats_pair(path_data).groupby('touch')['Last_Conv'].sum().reset_index()
linear_touch = create_linear_click_stats_pair(path_data).groupby('touch')['Linear_Conv'].sum().reset_index()
first_touch = create_first_order_states_pair(path_data).groupby('touch')['First_Conv'].sum().reset_index()

lst = last_touch.set_index('touch').iloc[:, 0:].apply(lambda x: x / x.sum())
li = linear_touch.set_index('touch').iloc[:, 0:].apply(lambda x: x / x.sum())
fst = first_touch.set_index('touch').iloc[:, 0:].apply(lambda x: x / x.sum())

dfs = [fst, lst, li]
dfs = [df for df in dfs]
dfs[0].join(dfs[1:])

	First_Conv	Last_Conv	Linear_Conv
touch
DM	0.341152	0.744850	0.504964
EM	0.278233	0.097915	0.198478
PHONE	0.094440	0.014768	0.050488
WEB	0.286175	0.142467	0.246070

**線性模型分析結論：相比於其他渠道，DM（直郵）是轉化率較優的渠道**

馬爾科夫鏈

# 手動計算狀態轉移矩陣
def split_states(data):
    temp_data = []
    for i in range(len(data)):
        path = data.iloc[i]['path']
        state_pairs, values = [], []
        for j in range(len(path)-1):
            state_pairs.append((path[j], path[j+1]))
            values.append(1)

        temp_data.append([state_pairs, values])
    return temp_data

temps = split_states(path_data)
temps[0:3]

[[[('start', 'WEB'), ('WEB', 'EM'), ('EM', 'DM'), ('DM', 'null')],
  [1, 1, 1, 1]],
 [[('start', 'EM'), ('EM', 'WEB'), ('WEB', 'DM'), ('DM', 'null')],
  [1, 1, 1, 1]],
 [[('start', 'WEB'), ('WEB', 'EM'), ('EM', 'DM'), ('DM', 'null')],
  [1, 1, 1, 1]]]

def transition_maxtrix(data):
    state_pairs, values = [], []
    for i in range(len(data)):
        for j, z in zip(data[i][0], data[i][1]):
            state_pairs.append(j)
            values.append(z)
    temp_df = pd.DataFrame({'state_pairs': state_pairs, 'values': values})
    grp_df = temp_df.groupby('state_pairs')['values'].sum().reset_index()
    grp_df[['start', 'end']] = grp_df['state_pairs'].apply(pd.Series)

    table = pd.crosstab(grp_df['end'], grp_df['start'], values=grp_df['values'], aggfunc=np.sum, normalize='columns')\
        .applymap(lambda x: "{:3.2f}".format(x))
    return table

# 輸出狀態轉移矩陣
tmp = transition_maxtrix(temps)
tmp1 = tmp.transpose()

tmp1

end	DM	EM	PHONE	WEB	conversion	null
start
DM	0.00	0.09	0.01	0.12	0.21	0.57
EM	0.43	0.00	0.02	0.41	0.04	0.09
PHONE	0.18	0.11	0.00	0.69	0.01	0.01
WEB	0.58	0.22	0.03	0.00	0.05	0.12
start	0.14	0.29	0.32	0.25	0.00	0.00

計算渠道的移除效應

def channel_remove(data,channel_removed):
    state_pairs, values = [], []
    for i in range(len(data)):
        for j, z in zip(data[i][0], data[i][1]):
            state_pairs.append(j)
            values.append(z)
    temp_df = pd.DataFrame({'state_pairs': state_pairs, 'values': values})
    grp_df = temp_df.groupby('state_pairs')['values'].sum().reset_index()
    grp_df[['start', 'end']] = grp_df['state_pairs'].apply(pd.Series)
    temp = grp_df.copy()
    grp_df['start'] = grp_df['start'].replace(channel_removed, 'unknown')
    grp_df['end'] = grp_df['end'].replace(channel_removed, 'unknown')
    return [grp_df, temp]


# 篩選出成功轉化路徑

path_data_pur = path_data[path_data['conv']==1]
temps = split_states(path_data_pur)

conversion =[]
columns = ['start', 'end', 'values_x', 'values_y', 'perct']

# 所有渠道
channels_list = list(df['channel'].unique())

df_dummy1 = pd.DataFrame({'start': ['start', 'conversion', 'null'],
                          'end': ['start', 'conversion', 'null'],
                          'values_x': [0, 0, 0],
                          'values_y': [0, 0, 0],
                          'perct': [0, 1, 1]})
dy_dummy = pd.DataFrame(df_dummy1, columns=columns)


df_dummy2 = pd.DataFrame({'start': ['start', 'conversion', 'null'],
                         'end': ['start', 'conversion', 'null']})

# 逐個計算移除單個渠道后的總轉化數

for chnl in channels_list:
    df_remove = channel_remove(temps, chnl)[0]
    df_noremove = channel_remove(temps, chnl)[1]
    
    df_temp = df_remove.groupby('start')['values'].sum().reset_index()
    df_temp = pd.merge(df_remove, df_temp, on='start', how='left')
    df_temp['perct'] = df_temp['values_x']/df_temp['values_y']

    df_temp = pd.DataFrame(df_temp, columns=columns)

    df_temp = pd.concat([df_temp, dy_dummy], axis=0)

    df_ini = pd.DataFrame(df_noremove, columns=['start', 'end'])

    df_temp2 = pd.concat([df_ini, df_dummy2], axis=0)

    df_temp = pd.merge(df_temp2, df_temp, on=['start', 'end'], how='left')

#     用0填充由於左連接出現的NaN
    df_temp['values_x'].fillna(0, inplace=True)
    df_temp['values_y'].fillna(0, inplace=True)
    df_temp['perct'].fillna(0, inplace=True)

    df_trans1 = pd.crosstab(df_temp['start'], df_temp['end'], values=df_temp['perct'], aggfunc=np.sum)

    df_trans1.update(df_trans1[['DM', 'EM', 'PHONE', 'WEB', 'conversion', 'null', 'start']].fillna(0))
    
#     轉化為numpy矩陣
    df_trans_mat = np.matrix(df_trans1)

    inist_n1 = pd.crosstab(df_temp['start'], df_temp['end'], values=df_temp['values_x'], aggfunc=np.sum)
    inist_n1.update(inist_n1[['DM', 'EM', 'PHONE', 'WEB', 'conversion', 'null', 'start']].fillna(0))

    inist_mat = np.matrix(inist_n1.iloc[-1])
    
#     矩陣乘積
    mat = inist_mat*df_trans_mat

# 取出估計出來的轉化數
    conversion.append(mat[0,4])


# 計算單個渠道的移除效應
chnl_conversion = pd.DataFrame({'channel': channels_list, 'conv': conversion})

df_remove = channel_remove(temps, chnl)[0]
df_noremove = channel_remove(temps, chnl)[1]
tot_conv = df_remove['values'].sum()

chnl_conversion['impact'] = (tot_conv-chnl_conversion['conv'])/tot_conv

tot_impact = chnl_conversion['impact'].sum()

chnl_conversion['convet_rate']= chnl_conversion['impact']/tot_impact

chnl_conversion

	channel	conv	impact	convet_rate
0	DM	1003.039274	0.961468	0.265241
1	WEB	2724.928034	0.895320	0.246993
2	EM	2856.954317	0.890248	0.245594
3	PHONE	3179.825240	0.877845	0.242172

馬爾科夫鏈模型分析結論：相比於其他渠道，DM（直郵）的轉換貢獻值最高，是轉化率較優的渠道

可視化馬爾科夫鏈

states = ['start', 'DM', 'EM', 'PHONE', 'WEB', 'conversion', 'null']

def _get_markov_edges(Q):
    edges = {}
    for col in Q.columns:
        for idx in Q.index:
            edges[(idx,col)] = Q.loc[idx,col]
    return edges

edges_wts = _get_markov_edges(tmp1)
edges_wts

{('DM', 'DM'): '0.00',
 ('EM', 'DM'): '0.43',
 ('PHONE', 'DM'): '0.18',
 ('WEB', 'DM'): '0.58',
 ('start', 'DM'): '0.14',
 ('DM', 'EM'): '0.09',
 ('EM', 'EM'): '0.00',
 ('PHONE', 'EM'): '0.11',
 ('WEB', 'EM'): '0.22',
 ('start', 'EM'): '0.29',
 ('DM', 'PHONE'): '0.01',
 ('EM', 'PHONE'): '0.02',
 ('PHONE', 'PHONE'): '0.00',
 ('WEB', 'PHONE'): '0.03',
 ('start', 'PHONE'): '0.32',
 ('DM', 'WEB'): '0.12',
 ('EM', 'WEB'): '0.41',
 ('PHONE', 'WEB'): '0.69',
 ('WEB', 'WEB'): '0.00',
 ('start', 'WEB'): '0.25',
 ('DM', 'conversion'): '0.21',
 ('EM', 'conversion'): '0.04',
 ('PHONE', 'conversion'): '0.01',
 ('WEB', 'conversion'): '0.05',
 ('start', 'conversion'): '0.00',
 ('DM', 'null'): '0.57',
 ('EM', 'null'): '0.09',
 ('PHONE', 'null'): '0.01',
 ('WEB', 'null'): '0.12',
 ('start', 'null'): '0.00'}

# 移除0轉化概率的邊
for key, value in list(edges_wts.items()):
    if value == '0.00':
        edges_wts.pop(key)

pprint(edges_wts)

{('DM', 'EM'): '0.09',
 ('DM', 'PHONE'): '0.01',
 ('DM', 'WEB'): '0.12',
 ('DM', 'conversion'): '0.21',
 ('DM', 'null'): '0.57',
 ('EM', 'DM'): '0.43',
 ('EM', 'PHONE'): '0.02',
 ('EM', 'WEB'): '0.41',
 ('EM', 'conversion'): '0.04',
 ('EM', 'null'): '0.09',
 ('PHONE', 'DM'): '0.18',
 ('PHONE', 'EM'): '0.11',
 ('PHONE', 'WEB'): '0.69',
 ('PHONE', 'conversion'): '0.01',
 ('PHONE', 'null'): '0.01',
 ('WEB', 'DM'): '0.58',
 ('WEB', 'EM'): '0.22',
 ('WEB', 'PHONE'): '0.03',
 ('WEB', 'conversion'): '0.05',
 ('WEB', 'null'): '0.12',
 ('start', 'DM'): '0.14',
 ('start', 'EM'): '0.29',
 ('start', 'PHONE'): '0.32',
 ('start', 'WEB'): '0.25'}

# 用networkx繪制馬爾科夫鏈

os.environ["PATH"] += os.pathsep + './graphviz-2.38/release/bin/'

G = nx.MultiDiGraph()

# 增加節點狀態
G.add_nodes_from(states)
print('Nodes:\n{G.nodes()}\n')

# 邊表示轉換概率
for k, v in edges_wts.items():
    tmp_origin, tmp_destination = k[0], k[1]
    G.add_edge(tmp_origin, tmp_destination, weight=v, label=v)
print('Edges:')
pprint(G.edges(data=True))

pos = nx.drawing.nx_pydot.graphviz_layout(G, prog='dot')
nx.draw_networkx(G, pos)

# 創建邊標簽
edge_labels = {(n1,n2):d['label'] for n1,n2,d in G.edges(data=True)}
nx.draw_networkx_edge_labels(G , pos, edge_labels=edge_labels)
nx.drawing.nx_pydot.write_dot(G, 'customer_markov.dot')

Nodes:
{G.nodes()}

Edges:
OutMultiEdgeDataView([('DM', 'WEB', {'label': '0.12', 'weight': '0.12'}), ('DM', 'EM', {'label': '0.09', 'weight': '0.09'}), ('DM', 'null', {'label': '0.57', 'weight': '0.57'}), ('DM', 'PHONE', {'label': '0.01', 'weight': '0.01'}), ('DM', 'conversion', {'label': '0.21', 'weight': '0.21'}), ('start', 'EM', {'label': '0.29', 'weight': '0.29'}), ('start', 'DM', {'label': '0.14', 'weight': '0.14'}), ('start', 'PHONE', {'label': '0.32', 'weight': '0.32'}), ('start', 'WEB', {'label': '0.25', 'weight': '0.25'}), ('EM', 'DM', {'label': '0.43', 'weight': '0.43'}), ('EM', 'WEB', {'label': '0.41', 'weight': '0.41'}), ('EM', 'null', {'label': '0.09', 'weight': '0.09'}), ('EM', 'PHONE', {'label': '0.02', 'weight': '0.02'}), ('EM', 'conversion', {'label': '0.04', 'weight': '0.04'}), ('WEB', 'DM', {'label': '0.58', 'weight': '0.58'}), ('WEB', 'EM', {'label': '0.22', 'weight': '0.22'}), ('WEB', 'null', {'label': '0.12', 'weight': '0.12'}), ('WEB', 'PHONE', {'label': '0.03', 'weight': '0.03'}), ('WEB', 'conversion', {'label': '0.05', 'weight': '0.05'}), ('PHONE', 'WEB', {'label': '0.69', 'weight': '0.69'}), ('PHONE', 'DM', {'label': '0.18', 'weight': '0.18'}), ('PHONE', 'null', {'label': '0.01', 'weight': '0.01'}), ('PHONE', 'EM', {'label': '0.11', 'weight': '0.11'}), ('PHONE', 'conversion', {'label': '0.01', 'weight': '0.01'})])

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 渠道歸因分析模型解析分析模型-SCP分析模型 6大常用數據分析模型詳解，做分析不再沒思路1、RFM模型2、帕累托分析3、購物籃分析4、波士頓矩陣5、轉化分析6、杜邦分析法用戶分析模型 1.漏斗分析模型 TTPPRC —— 商業分析模型阿里AIPL營銷模型客戶養成到轉化行業分析-常用的分析模型時間序列分析模型——ARIMA模型【Coursera】因子分析模型