數據分析之兩種用戶分群方法(RFM和聚類)


本文由於沒有現成的數據,就自己生成了一些商品訂單數據,基於該數據進行了RFM和聚類的構建

1.數據的生成

數據庫表操作

 1 use my_work;
 2 
 3 -- 創建商品訂單表
 4 CREATE table goods_orders_ful(
 5     user_id varchar(100),  -- 用戶id
 6     order_id varchar(100), -- 訂單id
 7     is_paid bool, -- 用戶是否實際支付,1支付;0未支付
 8     amount double, -- 訂單金額
 9     created_date date, -- 訂單生成日期 yyyy-mm-dd
10     created_time timestamp, -- 訂單生成時間 yyyy-mm-dd hh:mm:ss
11     business_type varchar(10), -- 業務類型
12     region_name varchar(10), -- 所屬區域:如 東部地區
13     order_source_name varchar(10), -- 訂單渠道:Web、H5、App 
14     is_done bool -- 訂單是否完成
15     );
16                                             
17 -- 創建用戶訂單行為中間表                 
18 drop table if exists user_info_frm_01;
19 CREATE table user_info_frm_01
20 as
21 select gof.user_id,
22        sum(gof.amount) all_of_money,
23        max(gof.created_date) latest_date, 
24        count(gof.order_id) all_of_orders
25 FROM goods_orders_ful gof 
26 where gof.is_paid = 1
27 and gof.is_done = 1
28 and gof.created_date >= '2020-01-01'
29 and gof.created_date < '2020-07-01'
30 group by gof.user_id;
31 
32 SELECT count(*) from user_info_frm_01 uif;
33 SELECT * from user_info_frm_01 uif limit 10
34 
35 -- 創建行為指標均值表
36 create table if not exists user_info_frm_02
37 as
38 select avg(uif.all_of_money) all_of_money_avg,
39        avg(datediff('2020-07-22', uif.latest_date)) latest_days_avg,
40        avg(uif.all_of_orders) orders_avg
41 from user_info_frm_01 uif;
42 
43 SELECT * from user_info_frm_02;
44 -- 消費均值1107.10,最小天數均值86.9,訂單數量均值2.1
45 
46 -- 將用戶進行rfm一級打標
47 create table if not exists user_info_frm_03
48 as
49 SELECT uif.user_id,
50        case when uif.all_of_money >= 1107.10
51             then ''
52             else ''
53             end money,
54        case when datediff('2020-07-22', uif.latest_date) >= 86.9
55             then ''
56             else ''
57             end recency,
58        case when uif.all_of_orders >= 2.1
59             then ''
60             else ''
61             end frequency
62 from user_info_frm_01 uif;
63 
64 -- 將用戶進行二級打標
65 create table if not exists user_info_frm_04
66 as
67 select uif.user_id,
68        uif.recency,
69        uif.frequency,
70        uif.money,
71        case when uif.recency = '' and uif.frequency = '' and uif.money = ''
72             then '重要價值用戶'
73             when uif.recency = '' and uif.frequency = '' and uif.money = ''
74             then '重要保持用戶'
75             when uif.recency = '' and uif.frequency = '' and uif.money = ''
76             then '重要發展用戶'
77             when uif.recency = '' and uif.frequency = '' and uif.money = ''
78             then '重要挽留用戶'
79             when uif.recency = '' and uif.frequency = '' and uif.money = ''
80             then '一般價值用戶'
81             when uif.recency = '' and uif.frequency = '' and uif.money = ''
82             then '一般保持用戶'
83             when uif.recency = '' and uif.frequency = '' and uif.money = ''
84             then '一般發展用戶'
85             when uif.recency = '' and uif.frequency = '' and uif.money = ''
86             then '一般挽留用戶'
87             else NULL 
88             end type
89             
90 from user_info_frm_03 uif;

python 程序生成數據

 

 1 # _*_ coding: utf-8 _*_ #
 2 # @Time     :2020/7/25 7:30 下午
 3 # @Author   :Zhx
 4 
 5 
 6 import pymysql
 7 import uuid
 8 import random
 9 import time
10 
11 
12 class CreateData(object):
13 
14     def __init__(self):
15         pass
16 
17     @staticmethod
18     def create():
19         user_id_ = random.randint(1, 5000)
20         order_id_ = uuid.uuid1()
21         is_paid_ = random.choice([1, 0, 1, 1, 1, 1, 1, 1, 1, 1])
22         amount_ = random.uniform(10, 1000)
23         a1 = (2020, 1, 1, 0, 0, 0, 0, 0, 0)
24         a2 = (2020, 6, 31, 23, 59, 59, 0, 0, 0)
25 
26         start = time.mktime(a1)  # 生成開始時間戳
27         end = time.mktime(a2)  # 生成結束時間戳
28 
29         # 隨機生成10個日期字符串
30         t = random.randint(start, end)  # 在開始和結束時間戳中隨機取出一個
31         date_tuple = time.localtime(t)  # 將時間戳生成時間元組
32         created_date_ = time.strftime("%Y-%m-%d", date_tuple)  # 將時間元組轉成格式化字符串
33         created_time_ = time.strftime("%Y-%m-%d %H:%M:%S", date_tuple)
34         business_type_ = random.randint(0, 20)
35         region_name_ = random.choice(['', '西', '', ''])
36         order_source_name_ = random.choice(['Web', 'app', 'H5'])
37         is_done_ = is_paid_
38         return user_id_, order_id_, is_paid_, amount_, created_date_, created_time_, \
39             business_type_, region_name_, order_source_name_, is_done_
40 
41 
42 if __name__ == '__main__':
43     database = 'my_work'
44     table = 'goods_orders_ful'
45     counts = 10000
46     create_data = CreateData()
47     con = pymysql.connect(database=database, host='localhost',
48                           user='root', port=3306, password='199498zhx@')
49     cur = con.cursor()
50     for i in range(counts):
51         user_id, order_id, is_paid, amount, created_date, created_time, \
52              business_type, region_name, order_source_name, is_done = create_data.create()
53         sql = """insert into %s.%s values('%s', '%s', %d, %f, '%s', '%s', '%s', '%s', '%s', %d)""" % \
54               (database, table, user_id, order_id, is_paid, amount, created_date, created_time, business_type,
55                region_name, order_source_name, is_done)
56         try:
57             cur.execute(sql)
58             print(i, i % 1000)
59             con.commit()
60         except Exception as e:
61             print(e)
62             con.rollback()
63     con.close()
64     cur.close()

 

源數據字段有:

  user_id varchar(100),  -- 用戶id

  order_id varchar(100), -- 訂單id

  is_paid bool, -- 用戶是否實際支付,1支付;0未支付

      amount double, -- 訂單金額

  created_date date, -- 訂單生成日期 yyyy-mm-dd

  created_time timestamp, -- 訂單生成時間 yyyy-mm-dd hh:mm:ss

  business_type varchar(10), -- 業務類型

  region_name varchar(10), -- 所屬區域:如 東部地區

  order_source_name varchar(10), -- 訂單渠道:Web、H5、App 

  is_done bool -- 訂單是否完成

 

RFM 模型最終表數據

 

 

 

最終的可視化分析使用jupyter完成

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM