课程目标

掌握电商核心指标计算、用户价值RFM分析、销售漏斗分析。

核心技能

  • 电商核心KPI计算(GMV、转化率、客单价等)
  • RFM用户价值分层
  • 销售漏斗分析

课件内容

1. 电商核心指标计算

python# 安装依赖:pip install pandas numpy matplotlib seaborn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

class EcommerceMetrics:
    def __init__(self, order_data, user_data=None, product_data=None):
        """
        初始化电商指标计算器
        
        参数:
        order_data: 订单数据
        user_data: 用户数据(可选)
        product_data: 商品数据(可选)
        """
        self.order_data = order_data.copy()
        self.user_data = user_data.copy() if user_data is not None else None
        self.product_data = product_data.copy() if product_data is not None else None
        
        # 预处理:确保日期格式
        if 'order_date' in self.order_data.columns:
            self.order_data['order_date'] = pd.to_datetime(self.order_data['order_date'])
        
        # 计算订单金额
        if 'quantity' in self.order_data.columns and 'unit_price' in self.order_data.columns:
            self.order_data['order_amount'] = self.order_data['quantity'] * self.order_data['unit_price']
    
    def calculate_basic_metrics(self, start_date=None, end_date=None):
        """计算基础电商指标"""
        # 过滤时间范围
        if start_date and end_date:
            mask = (self.order_data['order_date'] >= pd.to_datetime(start_date)) & \
                   (self.order_data['order_date'] <= pd.to_datetime(end_date))
            filtered_data = self.order_data[mask]
        else:
            filtered_data = self.order_data
        
        # 确保有订单金额列
        if 'order_amount' not in filtered_data.columns:
            if 'quantity' in filtered_data.columns and 'unit_price' in filtered_data.columns:
                filtered_data['order_amount'] = filtered_data['quantity'] * filtered_data['unit_price']
            else:
                raise ValueError("订单数据需要包含quantity和unit_price列来计算金额")
        
        metrics = {}
        
        # 1. GMV(总交易额)
        metrics['gmv'] = filtered_data['order_amount'].sum()
        
        # 2. 订单数量
        metrics['order_count'] = len(filtered_data)
        
        # 3. 用户数量(去重)
        if 'user_id' in filtered_data.columns:
            metrics['user_count'] = filtered_data['user_id'].nunique()
        else:
            metrics['user_count'] = None
        
        # 4. 客单价(平均订单金额)
        metrics['avg_order_value'] = metrics['gmv'] / metrics['order_count'] if metrics['order_count'] > 0 else 0
        
        # 5. 人均订单数
        if metrics['user_count']:
            metrics['orders_per_user'] = metrics['order_count'] / metrics['user_count']
        else:
            metrics['orders_per_user'] = None
        
        # 6. 转化率(如果有访问数据)
        # 这里假设有访问数据,实际中需要结合UV数据
        
        # 7. 退货率(如果有退货状态)
        if 'order_status' in filtered_data.columns:
            returned_orders = filtered_data[filtered_data['order_status'].str.contains('退货|退款', na=False)]
            metrics['return_rate'] = len(returned_orders) / metrics['order_count'] if metrics['order_count'] > 0 else 0
        else:
            metrics['return_rate'] = None
        
        # 8. 热门商品
        if 'product_id' in filtered_data.columns:
            product_stats = filtered_data.groupby('product_id').agg({
                'order_amount': 'sum',
                'quantity': 'sum',
                'order_id': 'count'
            }).rename(columns={
                'order_amount': 'sales_amount',
                'quantity': 'sales_quantity',
                'order_id': 'order_count'
            }).sort_values('sales_amount', ascending=False)
            metrics['top_products'] = product_stats.head(10)
        
        # 9. 销售趋势
        if 'order_date' in filtered_data.columns:
            daily_sales = filtered_data.groupby(filtered_data['order_date'].dt.date).agg({
                'order_amount': 'sum',
                'order_id': 'count'
            }).rename(columns={
                'order_amount': 'daily_gmv',
                'order_id': 'daily_orders'
            })
            metrics['daily_trend'] = daily_sales
        
        return metrics
    
    def calculate_rfm_metrics(self, analysis_date=None):
        """计算RFM指标"""
        if analysis_date is None:
            analysis_date = self.order_data['order_date'].max()
        else:
            analysis_date = pd.to_datetime(analysis_date)
        
        # 确保有user_id和order_amount
        required_cols = ['user_id', 'order_date', 'order_amount']
        for col in required_cols:
            if col not in self.order_data.columns:
                raise ValueError(f"订单数据需要包含{col}列来计算RFM")
        
        # 按用户聚合
        user_stats = self.order_data.groupby('user_id').agg({
            'order_date': lambda x: (analysis_date - x.max()).days,  # R: 最近购买天数
            'order_id': 'count',  # F: 购买频率
            'order_amount': 'sum'  # M: 购买金额
        }).rename(columns={
            'order_date': 'recency',
            'order_id': 'frequency',
            'order_amount': 'monetary'
        }).reset_index()
        
        # RFM打分(1-5分,5分最好)
        # R值:最近购买天数越小越好
        user_stats['R_score'] = pd.qcut(user_stats['recency'], 5, labels=[5, 4, 3, 2, 1])
        
        # F值:购买频率越高越好
        user_stats['F_score'] = pd.qcut(user_stats['frequency'], 5, labels=[1, 2, 3, 4, 5])
        
        # M值:购买金额越高越好
        user_stats['M_score'] = pd.qcut(user_stats['monetary'], 5, labels=[1, 2, 3, 4, 5])
        
        # 转换为数值型
        user_stats['R_score'] = user_stats['R_score'].astype(int)
        user_stats['F_score'] = user_stats['F_score'].astype(int)
        user_stats['M_score'] = user_stats['M_score'].astype(int)
        
        # 计算RFM总分
        user_stats['RFM_score'] = user_stats['R_score'] + user_stats['F_score'] + user_stats['M_score']
        
        # RFM分段
        def segment_rfm(row):
            if row['R_score'] >= 4 and row['F_score'] >= 4 and row['M_score'] >= 4:
                return '重要价值客户'
            elif row['R_score'] >= 4 and row['F_score'] < 4 and row['M_score'] >= 4:
                return '重要发展客户'
            elif row['R_score'] < 4 and row['F_score'] >= 4 and row['M_score'] >= 4:
                return '重要保持客户'
            elif row['R_score'] < 4 and row['F_score'] < 4 and row['M_score'] >= 4:
                return '重要挽留客户'
            elif row['R_score'] >= 4 and row['F_score'] >= 4 and row['M_score'] < 4:
                return '一般价值客户'
            elif row['R_score'] >= 4 and row['F_score'] < 4 and row['M_score'] < 4:
                return '一般发展客户'
            elif row['R_score'] < 4 and row['F_score'] >= 4 and row['M_score'] < 4:
                return '一般保持客户'
            else:
                return '一般挽留客户'
        
        user_stats['RFM_segment'] = user_stats.apply(segment_rfm, axis=1)
        
        return user_stats
    
    def analyze_sales_funnel(self, funnel_stages=None):
        """分析销售漏斗"""
        if funnel_stages is None:
            # 默认漏斗阶段
            funnel_stages = {
                '浏览商品': 10000,  # 假设数据
                '加入购物车': 5000,
                '生成订单': 3000,
                '完成支付': 2500,
                '确认收货': 2300
            }
        
        funnel_data = pd.DataFrame({
            'stage': list(funnel_stages.keys()),
            'count': list(funnel_stages.values())
        })
        
        # 计算转化率
        funnel_data['conversion_rate'] = (funnel_data['count'] / funnel_data['count'].iloc[0] * 100).round(2)
        funnel_data['stage_conversion'] = (funnel_data['count'].shift(-1) / funnel_data['count'] * 100).round(2)
        funnel_data.loc[len(funnel_data)-1, 'stage_conversion'] = 100  # 最后阶段
        
        return funnel_data
    
    def visualize_metrics(self, metrics):
        """可视化指标"""
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        
        # 1. 销售趋势图
        if 'daily_trend' in metrics:
            ax1 = axes[0, 0]
            metrics['daily_trend']['daily_gmv'].plot(ax=ax1, color='blue', marker='o')
            ax1.set_title('每日GMV趋势', fontsize=12, fontweight='bold')
            ax1.set_xlabel('日期')
            ax1.set_ylabel('GMV')
            ax1.grid(True, alpha=0.3)
        
        # 2. 热门商品
        if 'top_products' in metrics:
            ax2 = axes[0, 1]
            metrics['top_products'].head(5)['sales_amount'].plot(
                kind='bar', ax=ax2, color='green'
            )
            ax2.set_title('Top 5商品销售额', fontsize=12, fontweight='bold')
            ax2.set_xlabel('商品ID')
            ax2.set_ylabel('销售额')
            ax2.tick_params(axis='x', rotation=45)
        
        # 3. RFM分布
        if hasattr(self, 'rfm_data'):
            ax3 = axes[1, 0]
            segment_counts = self.rfm_data['RFM_segment'].value_counts()
            segment_counts.plot(kind='pie', ax=ax3, autopct='%1.1f%%')
            ax3.set_title('RFM用户分群分布', fontsize=12, fontweight='bold')
            ax3.set_ylabel('')
        
        # 4. 漏斗图
        if hasattr(self, 'funnel_data'):
            ax4 = axes[1, 1]
            stages = self.funnel_data['stage']
            rates = self.funnel_data['conversion_rate']
            
            ax4.barh(stages, rates, color='orange')
            ax4.set_title('销售漏斗转化率', fontsize=12, fontweight='bold')
            ax4.set_xlabel('转化率 (%)')
            
            # 添加数值标签
            for i, v in enumerate(rates):
                ax4.text(v + 1, i, f'{v}%', va='center')
        
        plt.tight_layout()
        plt.show()
    
    def generate_report(self, start_date=None, end_date=None):
        """生成完整分析报告"""
        print("="*60)
        print("电商数据分析报告")
        print("="*60)
        
        # 计算基础指标
        print("\n1. 核心业务指标:")
        print("-"*40)
        metrics = self.calculate_basic_metrics(start_date, end_date)
        
        print(f"📊 GMV(总交易额): ¥{metrics['gmv']:,.2f}")
        print(f"📦 订单数量: {metrics['order_count']:,}")
        print(f"👥 用户数量: {metrics['user_count']:,}")
        print(f"💰 客单价: ¥{metrics['avg_order_value']:,.2f}")
        
        if metrics['orders_per_user']:
            print(f"🛒 人均订单数: {metrics['orders_per_user']:.2f}")
        
        if metrics['return_rate']:
            print(f"↩️  退货率: {metrics['return_rate']:.2%}")
        
        # RFM分析
        print("\n2. RFM用户价值分析:")
        print("-"*40)
        rfm_data = self.calculate_rfm_metrics()
        self.rfm_data = rfm_data
        
        segment_dist = rfm_data['RFM_segment'].value_counts()
        print("用户分群分布:")
        for segment, count in segment_dist.items():
            percentage = (count / len(rfm_data) * 100)
            print(f"  {segment}: {count}人 ({percentage:.1f}%)")
        
        # 销售漏斗
        print("\n3. 销售漏斗分析:")
        print("-"*40)
        funnel_data = self.analyze_sales_funnel()
        self.funnel_data = funnel_data
        
        for _, row in funnel_data.iterrows():
            print(f"  {row['stage']}: {row['count']}人 | "
                  f"总体转化率: {row['conversion_rate']}% | "
                  f"阶段转化率: {row['stage_conversion']}%")
        
        # 可视化
        print("\n4. 数据可视化生成中...")
        self.visualize_metrics(metrics)
        
        return {
            'basic_metrics': metrics,
            'rfm_data': rfm_data,
            'funnel_data': funnel_data
        }

# 案例数据脚本
def create_ecommerce_sample_data():
    """生成电商分析示例数据"""
    np.random.seed(42)
    
    # 生成更丰富的订单数据
    n_orders = 1000
    user_ids = np.random.choice(range(1001, 1101), n_orders)
    product_ids = np.random.choice(['P001', 'P002', 'P003', 'P004', 'P005', 
                                   'P006', 'P007', 'P008', 'P009', 'P010'], n_orders)
    
    order_data = pd.DataFrame({
        'order_id': range(10001, 10001 + n_orders),
        'user_id': user_ids,
        'order_date': pd.date_range('2024-01-01', periods=n_orders, freq='H'),
        'product_id': product_ids,
        'quantity': np.random.randint(1, 5, n_orders),
        'unit_price': np.random.uniform(50, 2000, n_orders).round(2),
        'order_status': np.random.choice(['已完成', '已发货', '待付款', '已取消', '已退货'], 
                                        n_orders, p=[0.7, 0.15, 0.05, 0.05, 0.05])
    })
    
    # 打乱日期
    order_data['order_date'] = order_data['order_date'].sample(frac=1).reset_index(drop=True)
    
    # 用户数据
    user_data = pd.DataFrame({
        'user_id': range(1001, 1101),
        'registration_date': pd.date_range('2023-06-01', periods=100),
        'user_level': np.random.choice(['青铜', '白银', '黄金', '铂金', '钻石'], 100, 
                                      p=[0.3, 0.25, 0.2, 0.15, 0.1]),
        'city': np.random.choice(['北京', '上海', '广州', '深圳', '杭州', '成都', '武汉', '南京'], 100)
    })
    
    # 商品数据
    product_data = pd.DataFrame({
        'product_id': [f'P{str(i).zfill(3)}' for i in range(1, 11)],
        'product_name': ['智能手机', '笔记本电脑', '无线耳机', '智能手表', '平板电脑',
                        '数码相机', '游戏主机', '显示器', '键盘鼠标', '移动硬盘'],
        'category': ['电子产品', '电子产品', '配件', '电子产品', '电子产品',
                    '电子产品', '电子产品', '配件', '配件', '配件'],
        'brand': ['品牌A', '品牌B', '品牌C', '品牌D', '品牌E',
                 '品牌F', '品牌G', '品牌H', '品牌I', '品牌J'],
        'cost_price': [1999, 5999, 299, 1299, 2999, 3999, 3499, 899, 199, 499],
        'market_price': [2499, 6999, 399, 1599, 3499, 4999, 3999, 1299, 299, 699]
    })
    
    # 保存数据
    order_data.to_csv('ecommerce_orders.csv', index=False, encoding='utf-8')
    user_data.to_csv('ecommerce_users.csv', index=False, encoding='utf-8')
    product_data.to_csv('ecommerce_products.csv', index=False, encoding='utf-8')
    
    print("✅ 电商示例数据已生成:")
    print(f"  订单数据: {order_data.shape}")
    print(f"  用户数据: {user_data.shape}")
    print(f"  商品数据: {product_data.shape}")
    
    return order_data, user_data, product_data

# 运行示例
if __name__ == "__main__":
    print("="*60)
    print("Day 5: 电商核心指标计算与RFM分析")
    print("="*60)
    
    # 1. 生成示例数据
    print("\n步骤1: 生成电商示例数据...")
    order_df, user_df, product_df = create_ecommerce_sample_data()
    
    # 2. 初始化分析器
    print("\n步骤2: 初始化电商指标分析器...")
    analyzer = EcommerceMetrics(order_df, user_df, product_df)
    
    # 3. 生成分析报告
    print("\n步骤3: 生成完整分析报告...")
    report = analyzer.generate_report(
        start_date='2024-01-01',
        end_date='2024-03-31'
    )
    
    # 4. 保存RFM结果
    print("\n步骤4: 保存分析结果...")
    report['rfm_data'].to_csv('rfm_analysis_result.csv', index=False, encoding='utf-8')
    report['funnel_data'].to_csv('funnel_analysis_result.csv', index=False, encoding='utf-8')
    
    print("\n✅ 分析完成!结果已保存到:")
    print("  - rfm_analysis_result.csv")
    print("  - funnel_analysis_result.csv")

安装要求

bash# 基础依赖
pip install pandas==1.5.3 numpy==1.24.3 matplotlib==3.7.1 seaborn==0.12.2

# 可选:更美观的图表
pip install plotly==5.15.0