{"id":4078,"date":"2025-10-18T11:58:36","date_gmt":"2025-10-18T03:58:36","guid":{"rendered":"http:\/\/viplao.com\/?p=4078"},"modified":"2025-10-18T11:58:41","modified_gmt":"2025-10-18T03:58:41","slug":"%e3%80%90%e8%bf%90%e8%90%a5%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90-%e8%bf%9b%e9%98%b6%e7%af%87%e3%80%91%e7%94%a8%e6%88%b7%e8%a1%8c%e4%b8%ba%e5%88%86%e6%9e%90","status":"publish","type":"post","link":"http:\/\/viplao.com\/index.php\/2025\/10\/18\/%e3%80%90%e8%bf%90%e8%90%a5%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90-%e8%bf%9b%e9%98%b6%e7%af%87%e3%80%91%e7%94%a8%e6%88%b7%e8%a1%8c%e4%b8%ba%e5%88%86%e6%9e%90\/","title":{"rendered":"\u3010\u8fd0\u8425\u6570\u636e\u5206\u6790-\u8fdb\u9636\u7bc7\u3011\u7528\u6237\u884c\u4e3a\u5206\u6790"},"content":{"rendered":"\n<div id=\"ez-toc-container\" class=\"ez-toc-v2_0_71 counter-hierarchy ez-toc-counter ez-toc-grey ez-toc-container-direction\">\n<div class=\"ez-toc-title-container\">\n<p class=\"ez-toc-title\" style=\"cursor:inherit\">\u6587\u7ae0\u76ee\u5f55<\/p>\n<span class=\"ez-toc-title-toggle\"><a href=\"#\" class=\"ez-toc-pull-right ez-toc-btn ez-toc-btn-xs ez-toc-btn-default ez-toc-toggle\" aria-label=\"Toggle Table of Content\"><span class=\"ez-toc-js-icon-con\"><span class=\"\"><span class=\"eztoc-hide\" style=\"display:none;\">Toggle<\/span><span class=\"ez-toc-icon-toggle-span\"><svg style=\"fill: #999;color:#999\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" class=\"list-377408\" width=\"20px\" height=\"20px\" viewBox=\"0 0 24 24\" fill=\"none\"><path d=\"M6 6H4v2h2V6zm14 0H8v2h12V6zM4 11h2v2H4v-2zm16 0H8v2h12v-2zM4 16h2v2H4v-2zm16 0H8v2h12v-2z\" fill=\"currentColor\"><\/path><\/svg><svg style=\"fill: #999;color:#999\" class=\"arrow-unsorted-368013\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"10px\" height=\"10px\" viewBox=\"0 0 24 24\" version=\"1.2\" baseProfile=\"tiny\"><path d=\"M18.2 9.3l-6.2-6.3-6.2 6.3c-.2.2-.3.4-.3.7s.1.5.3.7c.2.2.4.3.7.3h11c.3 0 .5-.1.7-.3.2-.2.3-.5.3-.7s-.1-.5-.3-.7zM5.8 14.7l6.2 6.3 6.2-6.3c.2-.2.3-.5.3-.7s-.1-.5-.3-.7c-.2-.2-.4-.3-.7-.3h-11c-.3 0-.5.1-.7.3-.2.2-.3.5-.3.7s.1.5.3.7z\"\/><\/svg><\/span><\/span><\/span><\/a><\/span><\/div>\n<nav><ul class='ez-toc-list ez-toc-list-level-1 eztoc-toggle-hide-by-default' ><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-1\" href=\"http:\/\/viplao.com\/index.php\/2025\/10\/18\/%e3%80%90%e8%bf%90%e8%90%a5%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90-%e8%bf%9b%e9%98%b6%e7%af%87%e3%80%91%e7%94%a8%e6%88%b7%e8%a1%8c%e4%b8%ba%e5%88%86%e6%9e%90\/#51_%E7%94%A8%E6%88%B7%E7%94%BB%E5%83%8F%E6%9E%84%E5%BB%BA%E5%9F%BA%E7%A1%80\" title=\"5.1 \u7528\u6237\u753b\u50cf\u6784\u5efa\u57fa\u7840\">5.1 \u7528\u6237\u753b\u50cf\u6784\u5efa\u57fa\u7840<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-2\" href=\"http:\/\/viplao.com\/index.php\/2025\/10\/18\/%e3%80%90%e8%bf%90%e8%90%a5%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90-%e8%bf%9b%e9%98%b6%e7%af%87%e3%80%91%e7%94%a8%e6%88%b7%e8%a1%8c%e4%b8%ba%e5%88%86%e6%9e%90\/#52_RFM%E6%A8%A1%E5%9E%8B%EF%BC%88%E9%87%8D%E7%82%B9%EF%BC%89\" title=\"5.2 RFM\u6a21\u578b\uff08\u91cd\u70b9\uff09\">5.2 RFM\u6a21\u578b\uff08\u91cd\u70b9\uff09<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-3\" href=\"http:\/\/viplao.com\/index.php\/2025\/10\/18\/%e3%80%90%e8%bf%90%e8%90%a5%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90-%e8%bf%9b%e9%98%b6%e7%af%87%e3%80%91%e7%94%a8%e6%88%b7%e8%a1%8c%e4%b8%ba%e5%88%86%e6%9e%90\/#53_%E7%94%A8%E6%88%B7%E7%94%9F%E5%91%BD%E5%91%A8%E6%9C%9F%E5%88%86%E6%9E%90\" title=\"5.3 \u7528\u6237\u751f\u547d\u5468\u671f\u5206\u6790\">5.3 \u7528\u6237\u751f\u547d\u5468\u671f\u5206\u6790<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-4\" href=\"http:\/\/viplao.com\/index.php\/2025\/10\/18\/%e3%80%90%e8%bf%90%e8%90%a5%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90-%e8%bf%9b%e9%98%b6%e7%af%87%e3%80%91%e7%94%a8%e6%88%b7%e8%a1%8c%e4%b8%ba%e5%88%86%e6%9e%90\/#54_%E7%94%A8%E6%88%B7%E4%BB%B7%E5%80%BC%E4%B8%8E%E6%B5%81%E5%A4%B1%E9%A2%84%E6%B5%8B%EF%BC%88%E5%85%A5%E9%97%A8%EF%BC%89\" title=\"5.4 \u7528\u6237\u4ef7\u503c\u4e0e\u6d41\u5931\u9884\u6d4b\uff08\u5165\u95e8\uff09\">5.4 \u7528\u6237\u4ef7\u503c\u4e0e\u6d41\u5931\u9884\u6d4b\uff08\u5165\u95e8\uff09<\/a><\/li><\/ul><\/nav><\/div>\n<h4 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"51_%E7%94%A8%E6%88%B7%E7%94%BB%E5%83%8F%E6%9E%84%E5%BB%BA%E5%9F%BA%E7%A1%80\"><\/span><strong>5.1 \u7528\u6237\u753b\u50cf\u6784\u5efa\u57fa\u7840<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h4>\n\n\n\n<p><strong>\u3010\u7406\u8bba\u8bb2\u89e3\u3011<\/strong><\/p>\n\n\n\n<p>\u7528\u6237\u753b\u50cf\uff0c\u5c31\u50cf\u7ed9\u6bcf\u4e2a\u7528\u6237\u753b\u4e00\u5f20\u201c\u6570\u5b57\u8096\u50cf\u201d\uff0c\u4e0a\u9762\u8bb0\u5f55\u7740\u4ed6\u4eec\u7684\u5404\u79cd\u4fe1\u606f\u548c\u884c\u4e3a\u7279\u5f81\u3002\u901a\u8fc7\u7528\u6237\u753b\u50cf\uff0c\u6211\u4eec\u53ef\u4ee5\u66f4\u597d\u5730\u7406\u89e3\u7528\u6237\u662f\u8c01\u3001\u4ed6\u4eec\u559c\u6b22\u4ec0\u4e48\u3001\u6709\u4ec0\u4e48\u4e60\u60ef\uff0c\u4ece\u800c\u8fdb\u884c\u66f4\u7cbe\u51c6\u7684\u8fd0\u8425\u548c\u8425\u9500\u3002<\/p>\n\n\n\n<p><strong>\u6838\u5fc3\u8981\u7d20\uff1a<\/strong><\/p>\n\n\n\n<ol>\n<li><strong>\u7528\u6237\u57fa\u672c\u4fe1\u606f\uff1a<\/strong>&nbsp;\u6027\u522b\u3001\u5e74\u9f84\u3001\u5730\u57df\u3001\u804c\u4e1a\u7b49\u3002<\/li>\n\n\n\n<li><strong>\u8d2d\u4e70\u884c\u4e3a\uff1a<\/strong>&nbsp;\u8d2d\u4e70\u5546\u54c1\u54c1\u7c7b\u3001\u54c1\u724c\u504f\u597d\u3001\u4ef7\u683c\u654f\u611f\u5ea6\u3001\u8d2d\u4e70\u9891\u7387\u3001\u6d88\u8d39\u91d1\u989d\u7b49\u3002<\/li>\n\n\n\n<li><strong>\u6d4f\u89c8\u884c\u4e3a\uff1a<\/strong>&nbsp;\u6d4f\u89c8\u65f6\u957f\u3001\u6d4f\u89c8\u5546\u54c1\u7c7b\u578b\u3001\u641c\u7d22\u5173\u952e\u8bcd\u3001\u70b9\u51fb\u8def\u5f84\u7b49\u3002<\/li>\n\n\n\n<li><strong>\u4e92\u52a8\u884c\u4e3a\uff1a<\/strong>&nbsp;\u8bc4\u8bba\u3001\u70b9\u8d5e\u3001\u5206\u4eab\u3001\u6536\u85cf\u3001\u53c2\u4e0e\u6d3b\u52a8\u7b49\u3002<\/li>\n<\/ol>\n\n\n\n<p><strong>\u6784\u5efa\u6d41\u7a0b\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li><strong>\u6570\u636e\u6536\u96c6\uff1a<\/strong>&nbsp;\u4ece\u8ba2\u5355\u3001\u6d4f\u89c8\u65e5\u5fd7\u3001\u7528\u6237\u6ce8\u518c\u4fe1\u606f\u7b49\u6765\u6e90\u83b7\u53d6\u539f\u59cb\u6570\u636e\u3002<\/li>\n\n\n\n<li><strong>\u6570\u636e\u6e05\u6d17\u4e0e\u6574\u5408\uff1a<\/strong>&nbsp;\u5c06\u4e0d\u540c\u6765\u6e90\u7684\u6570\u636e\u7edf\u4e00\u5230\u7528\u6237ID\uff0c\u5904\u7406\u7f3a\u5931\u503c\u3001\u5f02\u5e38\u503c\u3002<\/li>\n\n\n\n<li><strong>\u7279\u5f81\u5de5\u7a0b\uff1a<\/strong>&nbsp;\u4ece\u539f\u59cb\u6570\u636e\u4e2d\u63d0\u53d6\u6709\u4ef7\u503c\u7684\u7279\u5f81\uff08\u5982\u201c\u9996\u6b21\u8d2d\u4e70\u65e5\u671f\u201d\u3001\u201c\u5e73\u5747\u5ba2\u5355\u4ef7\u201d\u3001\u201c\u6700\u5e38\u6d4f\u89c8\u54c1\u7c7b\u201d\uff09\u3002<\/li>\n\n\n\n<li><strong>\u6807\u7b7e\u5316\uff1a<\/strong>&nbsp;\u5c06\u7279\u5f81\u8f6c\u5316\u4e3a\u6613\u4e8e\u7406\u89e3\u548c\u4f7f\u7528\u7684\u6807\u7b7e\uff08\u5982\u201c\u9ad8\u6d88\u8d39\u7528\u6237\u201d\u3001\u201c\u65f6\u5c1a\u8fbe\u4eba\u201d\u3001\u201c\u7535\u5b50\u4ea7\u54c1\u7231\u597d\u8005\u201d\uff09\u3002<\/li>\n<\/ul>\n\n\n\n<p><strong>\u3010\u81ea\u52a8\u751f\u6210\u6570\u636e\u96c6\u4e0e\u4ee3\u7801\u5b9e\u4f8b\u3011<\/strong><\/p>\n\n\n\n<p>\u6211\u4eec\u5c06\u751f\u6210\u4e00\u4e2a\u5305\u542b\u7528\u6237\u6ce8\u518c\u4fe1\u606f\u3001\u6d4f\u89c8\u884c\u4e3a\u548c\u8d2d\u4e70\u884c\u4e3a\u7684\u6a21\u62df\u6570\u636e\u96c6\u3002<\/p>\n\n\n\n<p>python<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import pandas as pd\nimport numpy as np\nfrom datetime import datetime, timedelta\n\n# --- \u6570\u636e\u96c6\u751f\u6210 ---\nnp.random.seed(42) # \u4fdd\u8bc1\u6bcf\u6b21\u751f\u6210\u6570\u636e\u4e00\u81f4\n\ndef generate_user_profile_data(num_users=1000, start_date='2022-01-01', end_date='2023-12-31'):\n    \"\"\"\n    \u751f\u6210\u6a21\u62df\u7684\u7528\u6237\u753b\u50cf\u76f8\u5173\u6570\u636e\n    \u5305\u62ec\u7528\u6237\u57fa\u672c\u4fe1\u606f\u3001\u6d4f\u89c8\u884c\u4e3a\u3001\u8d2d\u4e70\u884c\u4e3a\n    \"\"\"\n    users = &#91;]\n    for i in range(num_users):\n        user_id = f'U{i:04d}'\n        gender = np.random.choice(&#91;'Male', 'Female', 'Unknown'], p=&#91;0.45, 0.45, 0.1])\n        age = np.random.randint(18, 60) if gender != 'Unknown' else np.random.randint(18, 80)\n        city = np.random.choice(&#91;'Beijing', 'Shanghai', 'Guangzhou', 'Shenzhen', 'Chengdu', 'Hangzhou', 'Other'])\n        reg_date = pd.to_datetime(start_date) + timedelta(days=np.random.randint(0, (pd.to_datetime(end_date) - pd.to_datetime(start_date)).days))\n        users.append(&#91;user_id, gender, age, city, reg_date])\n    df_users = pd.DataFrame(users, columns=&#91;'user_id', 'gender', 'age', 'city', 'registration_date'])\n\n    # \u6a21\u62df\u5546\u54c1\u6570\u636e\n    products = {\n        'P101': {'category': 'Electronics', 'brand': 'TechCo', 'price_range': (100, 1000)},\n        'P102': {'category': 'Apparel', 'brand': 'FashionX', 'price_range': (50, 500)},\n        'P103': {'category': 'Home', 'brand': 'LifeStyle', 'price_range': (20, 300)},\n        'P104': {'category': 'Books', 'brand': 'ReadNow', 'price_range': (10, 150)},\n        'P105': {'category': 'Sports', 'brand': 'ActiveFit', 'price_range': (80, 800)},\n    }\n    product_ids = list(products.keys())\n\n    # \u6a21\u62df\u6d4f\u89c8\u884c\u4e3a\n    browsing_data = &#91;]\n    for _ in range(num_users * 5): # \u5e73\u5747\u6bcf\u4e2a\u7528\u62375\u6b21\u6d4f\u89c8\n        user_id = np.random.choice(df_users&#91;'user_id'])\n        product_id = np.random.choice(product_ids)\n        browse_time = pd.to_datetime(start_date) + timedelta(seconds=np.random.randint(0, (pd.to_datetime(end_date) - pd.to_datetime(start_date)).total_seconds()))\n        browsing_data.append(&#91;user_id, product_id, browse_time])\n    df_browsing = pd.DataFrame(browsing_data, columns=&#91;'user_id', 'product_id', 'browse_time'])\n    df_browsing&#91;'category'] = df_browsing&#91;'product_id'].map(lambda x: products&#91;x]&#91;'category'])\n\n    # \u6a21\u62df\u8d2d\u4e70\u884c\u4e3a\n    order_data = &#91;]\n    for _ in range(num_users * 2): # \u5e73\u5747\u6bcf\u4e2a\u7528\u62372\u6b21\u8d2d\u4e70\n        user_id = np.random.choice(df_users&#91;'user_id'], p=df_users&#91;'user_id'].map(lambda x: 0.8 if int(x&#91;1:]) &lt; num_users * 0.2 else 0.2).values \/ sum(df_users&#91;'user_id'].map(lambda x: 0.8 if int(x&#91;1:]) &lt; num_users * 0.2 else 0.2).values)) # \u6a21\u62df\u90e8\u5206\u7528\u6237\u8d2d\u4e70\u66f4\u591a\n        product_id = np.random.choice(product_ids)\n        quantity = np.random.randint(1, 4)\n        price_range = products&#91;product_id]&#91;'price_range']\n        price = round(np.random.uniform(price_range&#91;0], price_range&#91;1]), 2)\n        order_time = pd.to_datetime(start_date) + timedelta(seconds=np.random.randint(0, (pd.to_datetime(end_date) - pd.to_datetime(start_date)).total_seconds()))\n        order_data.append(&#91;f'ORD{_ + 1:05d}', user_id, product_id, quantity, price, order_time])\n    df_orders = pd.DataFrame(order_data, columns=&#91;'order_id', 'user_id', 'product_id', 'quantity', 'price', 'order_time'])\n    df_orders&#91;'total_amount'] = df_orders&#91;'quantity'] * df_orders&#91;'price']\n    df_orders&#91;'category'] = df_orders&#91;'product_id'].map(lambda x: products&#91;x]&#91;'category'])\n    df_orders&#91;'brand'] = df_orders&#91;'product_id'].map(lambda x: products&#91;x]&#91;'brand'])\n\n    return df_users, df_browsing, df_orders\n\ndf_users, df_browsing, df_orders = generate_user_profile_data(num_users=1000)\n\nprint(\"--- \u7528\u6237\u57fa\u672c\u4fe1\u606f (df_users) ---\")\nprint(df_users.head())\nprint(\"\\n--- \u7528\u6237\u6d4f\u89c8\u884c\u4e3a (df_browsing) ---\")\nprint(df_browsing.head())\nprint(\"\\n--- \u7528\u6237\u8d2d\u4e70\u884c\u4e3a (df_orders) ---\")\nprint(df_orders.head())\n\n# --- \u7528\u6237\u753b\u50cf\u6784\u5efa (\u7279\u5f81\u5de5\u7a0b\u4e0e\u6807\u7b7e\u5316) ---\nprint(\"\\n--- \u7528\u6237\u753b\u50cf\u6784\u5efa ---\")\n\n# 1. \u5408\u5e76\u7528\u6237\u57fa\u672c\u4fe1\u606f\u548c\u8d2d\u4e70\u884c\u4e3a\nuser_purchase_summary = df_orders.groupby('user_id').agg(\n    total_orders=('order_id', 'nunique'),\n    total_spent=('total_amount', 'sum'),\n    avg_order_value=('total_amount', 'mean'),\n    first_purchase_date=('order_time', 'min'),\n    last_purchase_date=('order_time', 'max')\n).reset_index()\n\ndf_user_profile = pd.merge(df_users, user_purchase_summary, on='user_id', how='left')\n\n# 2. \u4ece\u6d4f\u89c8\u884c\u4e3a\u4e2d\u63d0\u53d6\u7279\u5f81\n# \u6700\u5e38\u6d4f\u89c8\u7684\u5546\u54c1\u7c7b\u522b\nmost_viewed_category = df_browsing.groupby('user_id')&#91;'category'].agg(lambda x: x.mode()&#91;0] if not x.empty else None).reset_index(name='most_viewed_category')\ndf_user_profile = pd.merge(df_user_profile, most_viewed_category, on='user_id', how='left')\n\n# 3. \u4ece\u8d2d\u4e70\u884c\u4e3a\u4e2d\u63d0\u53d6\u7279\u5f81\n# \u6700\u5e38\u8d2d\u4e70\u7684\u5546\u54c1\u7c7b\u522b\nmost_bought_category = df_orders.groupby('user_id')&#91;'category'].agg(lambda x: x.mode()&#91;0] if not x.empty else None).reset_index(name='most_bought_category')\ndf_user_profile = pd.merge(df_user_profile, most_bought_category, on='user_id', how='left')\n\n# 4. \u884d\u751f\u6807\u7b7e (\u4f8b\u5982\uff1a\u6d88\u8d39\u7b49\u7ea7\u3001\u6d3b\u8dc3\u5ea6)\n# \u586b\u5145\u672a\u8d2d\u4e70\u7528\u6237\u7684\u6d88\u8d39\u6570\u636e\u4e3a0\uff0c\u65b9\u4fbf\u540e\u7eed\u6253\u6807\u7b7e\ndf_user_profile&#91;'total_spent'] = df_user_profile&#91;'total_spent'].fillna(0)\ndf_user_profile&#91;'total_orders'] = df_user_profile&#91;'total_orders'].fillna(0)\n\n# \u6d88\u8d39\u7b49\u7ea7\u6807\u7b7e\ndf_user_profile&#91;'spending_level'] = pd.qcut(\n    df_user_profile&#91;df_user_profile&#91;'total_spent'] &gt; 0]&#91;'total_spent'],\n    q=3,\n    labels=&#91;'\u4f4e\u6d88\u8d39', '\u4e2d\u6d88\u8d39', '\u9ad8\u6d88\u8d39']\n).astype(object) # \u8f6c\u6362\u4e3aobject\u7c7b\u578b\uff0c\u5426\u5219\u672a\u8d2d\u4e70\u7528\u6237\u4f1a\u662fNaN\ndf_user_profile&#91;'spending_level'] = df_user_profile&#91;'spending_level'].fillna('\u672a\u6d88\u8d39')\n\n\n# \u6d3b\u8dc3\u5ea6\u6807\u7b7e (\u7b80\u5355\u793a\u4f8b\uff1a\u6839\u636e\u8ba2\u5355\u6570)\ndf_user_profile&#91;'activity_level'] = df_user_profile&#91;'total_orders'].apply(\n    lambda x: '\u9ad8\u6d3b\u8dc3' if x &gt;= 5 else ('\u4e2d\u6d3b\u8dc3' if x &gt;= 2 else ('\u4f4e\u6d3b\u8dc3' if x &gt; 0 else '\u4e0d\u6d3b\u8dc3'))\n)\n\nprint(\"\\n--- \u6700\u7ec8\u7528\u6237\u753b\u50cf (\u90e8\u5206\u7279\u5f81) ---\")\nprint(df_user_profile&#91;&#91;'user_id', 'gender', 'age', 'city', 'total_spent', 'total_orders', 'most_bought_category', 'spending_level', 'activity_level']].head(10))\n\nprint(\"\\n--- \u7528\u6237\u753b\u50cf\u7edf\u8ba1\u6982\u89c8 ---\")\nprint(\"\u6027\u522b\u5206\u5e03:\\n\", df_user_profile&#91;'gender'].value_counts(normalize=True))\nprint(\"\\n\u57ce\u5e02\u5206\u5e03 (Top 5):\\n\", df_user_profile&#91;'city'].value_counts().head())\nprint(\"\\n\u6d88\u8d39\u7b49\u7ea7\u5206\u5e03:\\n\", df_user_profile&#91;'spending_level'].value_counts())\nprint(\"\\n\u6d3b\u8dc3\u5ea6\u5206\u5e03:\\n\", df_user_profile&#91;'activity_level'].value_counts())\n\n# \u53ef\u89c6\u5316\u7528\u6237\u753b\u50cf\u7279\u5f81\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nplt.rcParams&#91;'font.sans-serif'] = &#91;'SimHei']\nplt.rcParams&#91;'axes.unicode_minus'] = False\n\nfig, axes = plt.subplots(1, 3, figsize=(18, 5))\n\nsns.countplot(x='gender', data=df_user_profile, ax=axes&#91;0])\naxes&#91;0].set_title('\u7528\u6237\u6027\u522b\u5206\u5e03')\n\nsns.histplot(df_user_profile&#91;'age'], bins=10, kde=True, ax=axes&#91;1])\naxes&#91;1].set_title('\u7528\u6237\u5e74\u9f84\u5206\u5e03')\n\nsns.countplot(x='spending_level', data=df_user_profile, order=&#91;'\u672a\u6d88\u8d39', '\u4f4e\u6d88\u8d39', '\u4e2d\u6d88\u8d39', '\u9ad8\u6d88\u8d39'], ax=axes&#91;2])\naxes&#91;2].set_title('\u7528\u6237\u6d88\u8d39\u7b49\u7ea7\u5206\u5e03')\n\nplt.tight_layout()\nplt.show()\n\n# \u4ea4\u53c9\u5206\u6790\u793a\u4f8b\uff1a\u4e0d\u540c\u6027\u522b\u7528\u6237\u7684\u6d88\u8d39\u7b49\u7ea7\ngender_spending = pd.crosstab(df_user_profile&#91;'gender'], df_user_profile&#91;'spending_level'], normalize='index')\nprint(\"\\n\u4e0d\u540c\u6027\u522b\u7528\u6237\u7684\u6d88\u8d39\u7b49\u7ea7\u5206\u5e03:\\n\", gender_spending)\ngender_spending.plot(kind='bar', stacked=True, figsize=(8, 5))\nplt.title('\u4e0d\u540c\u6027\u522b\u7528\u6237\u7684\u6d88\u8d39\u7b49\u7ea7')\nplt.ylabel('\u6bd4\u4f8b')\nplt.show()<\/code><\/pre>\n\n\n\n<p><strong>\u3010\u4e92\u52a8\u95ee\u7b54\u3011<\/strong><\/p>\n\n\n\n<ul>\n<li>\u7528\u6237\u753b\u50cf\u7684\u201c\u6807\u7b7e\u5316\u201d\u6709\u4ec0\u4e48\u597d\u5904\uff1f\u5728\u5b9e\u9645\u8fd0\u8425\u4e2d\u5982\u4f55\u4f7f\u7528\u8fd9\u4e9b\u6807\u7b7e\uff1f<\/li>\n\n\n\n<li>\u9664\u4e86\u6211\u4eec\u751f\u6210\u7684\u8fd9\u4e9b\u7279\u5f81\uff0c\u4f60\u8fd8\u80fd\u60f3\u5230\u54ea\u4e9b\u53ef\u4ee5\u7528\u6765\u6784\u5efa\u7528\u6237\u753b\u50cf\u7684\u7279\u5f81\uff1f<\/li>\n\n\n\n<li><code>pd.qcut()<\/code>&nbsp;\u548c&nbsp;<code>pd.cut()<\/code>&nbsp;\u5728\u6570\u636e\u5206\u7bb1\u65f6\u6709\u4ec0\u4e48\u533a\u522b\uff1f<\/li>\n\n\n\n<li>\u5982\u4f55\u4ece\u7528\u6237\u6d4f\u89c8\u884c\u4e3a\u4e2d\u63d0\u53d6\u201c\u7528\u6237\u504f\u597d\u54c1\u7c7b\u201d\u6807\u7b7e\uff1f<\/li>\n\n\n\n<li>\u5982\u679c\u7528\u6237\u6570\u636e\u91cf\u975e\u5e38\u5927\uff0c\u5982\u4f55\u9ad8\u6548\u5730\u8fdb\u884c\u7279\u5f81\u5de5\u7a0b\uff1f<\/li>\n<\/ul>\n\n\n\n<h4 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"52_RFM%E6%A8%A1%E5%9E%8B%EF%BC%88%E9%87%8D%E7%82%B9%EF%BC%89\"><\/span><strong>5.2 RFM\u6a21\u578b\uff08\u91cd\u70b9\uff09<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h4>\n\n\n\n<p><strong>\u3010\u7406\u8bba\u8bb2\u89e3\u3011<\/strong><\/p>\n\n\n\n<p>RFM\u6a21\u578b\uff08Recency, Frequency, Monetary\uff09\u662f\u7535\u5546\u9886\u57df\u6700\u7ecf\u5178\u3001\u6700\u5b9e\u7528\u7684\u7528\u6237\u4ef7\u503c\u5206\u6790\u6a21\u578b\u4e4b\u4e00\u3002\u5b83\u901a\u8fc7\u7528\u6237\u7684\u201c\u6700\u8fd1\u4e00\u6b21\u6d88\u8d39\u65f6\u95f4\u201d\u3001\u201c\u6d88\u8d39\u9891\u7387\u201d\u548c\u201c\u6d88\u8d39\u91d1\u989d\u201d\u4e09\u4e2a\u6307\u6807\u6765\u8861\u91cf\u7528\u6237\u4ef7\u503c\u3002<\/p>\n\n\n\n<ul>\n<li><strong>R (Recency &#8211; \u6700\u8fd1\u4e00\u6b21\u6d88\u8d39)\uff1a<\/strong>&nbsp;\u7528\u6237\u6700\u8fd1\u4e00\u6b21\u8d2d\u4e70\u8ddd\u79bb\u73b0\u5728\u7684\u65f6\u95f4\u3002R\u503c\u8d8a\u5c0f\uff08\u8d8a\u8fd1\uff09\uff0c\u7528\u6237\u4ef7\u503c\u8d8a\u9ad8\u3002<\/li>\n\n\n\n<li><strong>F (Frequency &#8211; \u6d88\u8d39\u9891\u7387)\uff1a<\/strong>&nbsp;\u7528\u6237\u5728\u4e00\u5b9a\u65f6\u95f4\u5185\u7684\u8d2d\u4e70\u6b21\u6570\u3002F\u503c\u8d8a\u5927\uff0c\u7528\u6237\u4ef7\u503c\u8d8a\u9ad8\u3002<\/li>\n\n\n\n<li><strong>M (Monetary &#8211; \u6d88\u8d39\u91d1\u989d)\uff1a<\/strong>&nbsp;\u7528\u6237\u5728\u4e00\u5b9a\u65f6\u95f4\u5185\u7684\u6d88\u8d39\u603b\u91d1\u989d\u3002M\u503c\u8d8a\u5927\uff0c\u7528\u6237\u4ef7\u503c\u8d8a\u9ad8\u3002<\/li>\n<\/ul>\n\n\n\n<p><strong>RFM\u8bc4\u5206\u4e0e\u7528\u6237\u5206\u7fa4\uff1a<\/strong><\/p>\n\n\n\n<p>\u901a\u5e38\u6211\u4eec\u4f1a\u5c06R\u3001F\u3001M\u4e09\u4e2a\u6307\u6807\u5206\u522b\u6253\u5206\uff08\u59821-5\u5206\uff09\uff0c\u7136\u540e\u7ec4\u5408\u5206\u6570\u5bf9\u7528\u6237\u8fdb\u884c\u5206\u7fa4\uff0c\u4f8b\u5982\uff1a<\/p>\n\n\n\n<ul>\n<li><strong>\u9ad8\u4ef7\u503c\u7528\u6237 (\u91cd\u8981\u4ef7\u503c\u5ba2\u6237)\uff1a<\/strong>&nbsp;R\u9ad8F\u9ad8M\u9ad8<\/li>\n\n\n\n<li><strong>\u91cd\u8981\u4fdd\u6301\u5ba2\u6237\uff1a<\/strong>&nbsp;R\u9ad8F\u9ad8M\u4e2d<\/li>\n\n\n\n<li><strong>\u91cd\u8981\u53d1\u5c55\u5ba2\u6237\uff1a<\/strong>&nbsp;R\u4f4eF\u9ad8M\u9ad8\uff08\u6700\u8fd1\u4e0d\u6d3b\u8dc3\u4f46\u6d88\u8d39\u9ad8\uff0c\u9700\u8981\u5524\u9192\uff09<\/li>\n\n\n\n<li><strong>\u6d41\u5931\u5ba2\u6237\uff1a<\/strong>&nbsp;R\u4f4eF\u4f4eM\u4f4e<\/li>\n<\/ul>\n\n\n\n<p><strong>\u3010\u81ea\u52a8\u751f\u6210\u6570\u636e\u96c6\u4e0e\u4ee3\u7801\u5b9e\u4f8b\u3011<\/strong><\/p>\n\n\n\n<p>\u6211\u4eec\u5c06\u57fa\u4e8e\u4e4b\u524d\u751f\u6210\u7684&nbsp;<code>df_orders<\/code>&nbsp;\u6570\u636e\uff0c\u6765\u8ba1\u7b97RFM\u6307\u6807\u5e76\u8fdb\u884c\u7528\u6237\u5206\u7fa4\u3002<\/p>\n\n\n\n<p>python<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import pandas as pd\nimport numpy as np\nfrom datetime import datetime, timedelta\n\n# --- \u6570\u636e\u96c6\u751f\u6210 (\u590d\u7528\u4e4b\u524d\u7684\u8ba2\u5355\u6570\u636e) ---\nnp.random.seed(42)\ndef generate_rfm_orders_data(num_users=1000, start_date='2022-01-01', end_date='2023-12-31'):\n    products = {\n        'P101': {'category': 'Electronics', 'brand': 'TechCo', 'price_range': (100, 1000)},\n        'P102': {'category': 'Apparel', 'brand': 'FashionX', 'price_range': (50, 500)},\n        'P103': {'category': 'Home', 'brand': 'LifeStyle', 'price_range': (20, 300)},\n        'P104': {'category': 'Books', 'brand': 'ReadNow', 'price_range': (10, 150)},\n        'P105': {'category': 'Sports', 'brand': 'ActiveFit', 'price_range': (80, 800)},\n    }\n    product_ids = list(products.keys())\n\n    order_data = &#91;]\n    for _ in range(num_users * 3): # \u6a21\u62df\u66f4\u591a\u8d2d\u4e70\u884c\u4e3a\uff0c\u8ba9RFM\u66f4\u660e\u663e\n        user_id = f'U{np.random.randint(0, num_users):04d}'\n        product_id = np.random.choice(product_ids)\n        quantity = np.random.randint(1, 4)\n        price_range = products&#91;product_id]&#91;'price_range']\n        price = round(np.random.uniform(price_range&#91;0], price_range&#91;1]), 2)\n        order_time = pd.to_datetime(start_date) + timedelta(seconds=np.random.randint(0, (pd.to_datetime(end_date) - pd.to_datetime(start_date)).total_seconds()))\n        order_data.append(&#91;f'ORD{_ + 1:05d}', user_id, product_id, quantity, price, order_time])\n    df_orders_rfm = pd.DataFrame(order_data, columns=&#91;'order_id', 'user_id', 'product_id', 'quantity', 'price', 'order_time'])\n    df_orders_rfm&#91;'total_amount'] = df_orders_rfm&#91;'quantity'] * df_orders_rfm&#91;'price']\n    return df_orders_rfm\n\ndf_orders_rfm = generate_rfm_orders_data(num_users=500, start_date='2022-01-01', end_date='2023-12-31')\nprint(\"--- RFM\u8ba2\u5355\u6570\u636e\u9884\u89c8 ---\")\nprint(df_orders_rfm.head())\nprint(\"\u6570\u636e\u65f6\u95f4\u8303\u56f4:\", df_orders_rfm&#91;'order_time'].min(), \"\u5230\", df_orders_rfm&#91;'order_time'].max())\n\n# --- RFM\u6a21\u578b\u8ba1\u7b97 ---\nprint(\"\\n--- RFM\u6a21\u578b\u8ba1\u7b97 ---\")\n\n# \u5b9a\u4e49\u5206\u6790\u7684\u622a\u6b62\u65e5\u671f (\u901a\u5e38\u662f\u6570\u636e\u96c6\u4e2d\u6700\u65b0\u8ba2\u5355\u7684\u65e5\u671f\uff0c\u6216\u4eca\u5929\u7684\u65e5\u671f)\nsnapshot_date = df_orders_rfm&#91;'order_time'].max() + timedelta(days=1)\nprint(f\"\u5206\u6790\u622a\u6b62\u65e5\u671f: {snapshot_date}\")\n\n# \u8ba1\u7b97R, F, M\nrfm = df_orders_rfm.groupby('user_id').agg(\n    recency=('order_time', lambda date: (snapshot_date - date.max()).days), # \u6700\u8fd1\u4e00\u6b21\u6d88\u8d39\u8ddd\u79bb\u622a\u6b62\u65e5\u671f\u7684\u5929\u6570\n    frequency=('order_id', 'nunique'),                                  # \u8d2d\u4e70\u6b21\u6570 (\u4e0d\u91cd\u590d\u8ba2\u5355ID)\n    monetary=('total_amount', 'sum')                                    # \u6d88\u8d39\u603b\u91d1\u989d\n).reset_index()\n\nprint(\"\\nRFM\u539f\u59cb\u6570\u636e\u9884\u89c8:\\n\", rfm.head())\n\n# --- RFM\u8bc4\u5206 (\u901a\u5e38\u91c7\u7528\u7b49\u9891\u5206\u7bb1\u6216\u4e1a\u52a1\u7ecf\u9a8c\u5206\u7bb1) ---\nprint(\"\\n--- RFM\u8bc4\u5206 ---\")\n\n# \u7b49\u9891\u5206\u7bb1 (\u5c06\u6570\u636e\u5206\u6210N\u7b49\u4efd\uff0c\u6bcf\u4efd\u7684\u7528\u6237\u6570\u91cf\u5927\u81f4\u76f8\u7b49)\n# R: \u8d8a\u5c0f\u8d8a\u597d\uff0c\u6240\u4ee5\u8bc4\u5206\u9ad8\u7684R\u503c\u53cd\u800c\u5c0f\nrfm&#91;'R_score'] = pd.qcut(rfm&#91;'recency'], q=5, labels=&#91;5, 4, 3, 2, 1], duplicates='drop')\n# F: \u8d8a\u5927\u8d8a\u597d\nrfm&#91;'F_score'] = pd.qcut(rfm&#91;'frequency'], q=5, labels=&#91;1, 2, 3, 4, 5], duplicates='drop')\n# M: \u8d8a\u5927\u8d8a\u597d\nrfm&#91;'M_score'] = pd.qcut(rfm&#91;'monetary'], q=5, labels=&#91;1, 2, 3, 4, 5], duplicates='drop')\n\n# \u7ec4\u5408RFM\u5206\u6570\nrfm&#91;'RFM_score'] = rfm&#91;'R_score'].astype(str) + rfm&#91;'F_score'].astype(str) + rfm&#91;'M_score'].astype(str)\nprint(\"\\nRFM\u8bc4\u5206\u540e\u7684\u6570\u636e\u9884\u89c8:\\n\", rfm.head())\n\n# --- \u7528\u6237\u5206\u7fa4 ---\nprint(\"\\n--- \u7528\u6237\u5206\u7fa4 ---\")\n\n# \u5b9a\u4e49\u7528\u6237\u5206\u7fa4\u89c4\u5219 (\u53ef\u4ee5\u6839\u636e\u4e1a\u52a1\u7ecf\u9a8c\u8c03\u6574)\ndef rfm_segment(row):\n    r, f, m = row&#91;'R_score'], row&#91;'F_score'], row&#91;'M_score']\n    if r &gt;= 4 and f &gt;= 4 and m &gt;= 4:\n        return '\u91cd\u8981\u4ef7\u503c\u5ba2\u6237' # \u9ad8R\u9ad8F\u9ad8M\n    elif r &gt;= 4 and f &gt;= 3 and m &gt;= 3:\n        return '\u91cd\u8981\u4fdd\u6301\u5ba2\u6237' # \u9ad8R\u4e2dF\u4e2dM\n    elif r &gt;= 3 and f &gt;= 4 and m &gt;= 4:\n        return '\u91cd\u8981\u53d1\u5c55\u5ba2\u6237' # \u4e2dR\u9ad8F\u9ad8M\n    elif r &gt;= 3 and f &gt;= 2 and m &gt;= 2:\n        return '\u4e00\u822c\u5ba2\u6237'\n    elif r &lt;= 2 and f &gt;= 3 and m &gt;= 3:\n        return '\u91cd\u8981\u633d\u7559\u5ba2\u6237' # \u4f4eR\u9ad8F\u9ad8M (\u8fc7\u53bb\u662f\u597d\u5ba2\u6237\uff0c\u73b0\u5728\u4e0d\u6d3b\u8dc3\u4e86)\n    elif r &lt;= 2 and f &lt;= 2 and m &lt;= 2:\n        return '\u6d41\u5931\u5ba2\u6237' # \u4f4eR\u4f4eF\u4f4eM\n    else:\n        return '\u6f5c\u5728\u5ba2\u6237' # \u5176\u4ed6\u60c5\u51b5\uff0c\u5f85\u89c2\u5bdf\n\nrfm&#91;'customer_segment'] = rfm.apply(rfm_segment, axis=1)\n\nprint(\"\\nRFM\u5206\u7fa4\u7ed3\u679c\u9884\u89c8:\\n\", rfm.head(10))\n\n# \u7edf\u8ba1\u5404\u7fa4\u7ec4\u7684\u7528\u6237\u6570\u91cf\nsegment_counts = rfm&#91;'customer_segment'].value_counts().sort_values(ascending=False)\nprint(\"\\n\u5404\u5ba2\u6237\u7fa4\u7ec4\u7528\u6237\u6570\u91cf:\\n\", segment_counts)\n\n# \u53ef\u89c6\u5316\u5206\u7fa4\u7ed3\u679c\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nplt.rcParams&#91;'font.sans-serif'] = &#91;'SimHei']\nplt.rcParams&#91;'axes.unicode_minus'] = False\n\nplt.figure(figsize=(10, 6))\nsns.barplot(x=segment_counts.index, y=segment_counts.values, palette='viridis')\nplt.title('RFM\u5ba2\u6237\u5206\u7fa4\u6570\u91cf\u5206\u5e03')\nplt.xlabel('\u5ba2\u6237\u7fa4\u7ec4')\nplt.ylabel('\u7528\u6237\u6570\u91cf')\nplt.xticks(rotation=45, ha='right')\nplt.tight_layout()\nplt.show()\n\n# \u8fdb\u4e00\u6b65\u5206\u6790\u5404\u7fa4\u7ec4\u7684RFM\u5747\u503c\nsegment_rfm_means = rfm.groupby('customer_segment')&#91;&#91;'recency', 'frequency', 'monetary']].mean().sort_values(by='monetary', ascending=False)\nprint(\"\\n\u5404\u5ba2\u6237\u7fa4\u7ec4\u7684RFM\u5747\u503c:\\n\", segment_rfm_means)\n\n# \u3010\u8fd0\u8425\u7b56\u7565\u5efa\u8bae\u3011\nprint(\"\\n--- \u57fa\u4e8eRFM\u5206\u7fa4\u7684\u8fd0\u8425\u7b56\u7565\u5efa\u8bae ---\")\nprint(\"1. **\u91cd\u8981\u4ef7\u503c\u5ba2\u6237 (\u9ad8R\u9ad8F\u9ad8M):** \u7ed9\u4e88VIP\u5f85\u9047\uff0c\u4f18\u5148\u4f53\u9a8c\u65b0\u54c1\uff0c\u4e2a\u6027\u5316\u4e13\u5c5e\u670d\u52a1\uff0c\u7ef4\u6301\u9ad8\u5fe0\u8bda\u5ea6\u3002\")\nprint(\"2. **\u91cd\u8981\u4fdd\u6301\u5ba2\u6237 (\u9ad8R\u4e2dF\u4e2dM):** \u9f13\u52b1\u63d0\u5347\u6d88\u8d39\u9891\u6b21\u548c\u91d1\u989d\uff0c\u53ef\u63a8\u8350\u5173\u8054\u5546\u54c1\uff0c\u79ef\u5206\u5956\u52b1\u3002\")\nprint(\"3. **\u91cd\u8981\u53d1\u5c55\u5ba2\u6237 (\u4e2dR\u9ad8F\u9ad8M):** \u6700\u8fd1\u4e0d\u6d3b\u8dc3\u4f46\u5386\u53f2\u6d88\u8d39\u9ad8\uff0c\u9700\u8981\u5524\u9192\uff0c\u901a\u8fc7\u4e2a\u6027\u5316\u63a8\u8350\u3001\u9650\u65f6\u4f18\u60e0\u523a\u6fc0\u590d\u8d2d\u3002\")\nprint(\"4. **\u91cd\u8981\u633d\u7559\u5ba2\u6237 (\u4f4eR\u9ad8F\u9ad8M):** \u7d27\u6025\u633d\u7559\uff0c\u53d1\u9001\u633d\u7559\u90ae\u4ef6\/\u77ed\u4fe1\uff0c\u63d0\u4f9b\u9ad8\u6298\u6263\u4f18\u60e0\u5238\uff0c\u4e86\u89e3\u6d41\u5931\u539f\u56e0\u3002\")\nprint(\"5. **\u6d41\u5931\u5ba2\u6237 (\u4f4eR\u4f4eF\u4f4eM):** \u6210\u672c\u8f83\u9ad8\uff0c\u53ef\u5c1d\u8bd5\u5c11\u91cf\u4f18\u60e0\u5238\u5524\u9192\uff0c\u6216\u653e\u5f03\u3002\")<\/code><\/pre>\n\n\n\n<p><strong>\u3010\u4e92\u52a8\u95ee\u7b54\u3011<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4e3a\u4ec0\u4e48RFM\u4e2d\u7684R\u503c\u662f\u8d8a\u5c0f\u8d8a\u597d\uff0c\u800cF\u548cM\u503c\u662f\u8d8a\u5927\u8d8a\u597d\uff1f<\/li>\n\n\n\n<li>RFM\u8bc4\u5206\u65f6\uff0c\u7b49\u9891\u5206\u7bb1\u548c\u7b49\u8ddd\u5206\u7bb1\u5404\u6709\u4ec0\u4e48\u4f18\u7f3a\u70b9\uff1f\u5728\u4ec0\u4e48\u60c5\u51b5\u4e0b\u9009\u62e9\u54ea\u79cd\uff1f<\/li>\n\n\n\n<li>\u9664\u4e86\u6211\u4eec\u5b9a\u4e49\u7684\u8fd9\u4e9b\u5ba2\u6237\u7fa4\u7ec4\uff0c\u4f60\u8fd8\u80fd\u60f3\u5230\u54ea\u4e9b\u6709\u610f\u4e49\u7684RFM\u7ec4\u5408\u7fa4\u7ec4\uff1f<\/li>\n\n\n\n<li>\u5982\u4f55\u4f7f\u7528RFM\u6a21\u578b\u6765\u8bc4\u4f30\u4e00\u6b21\u8425\u9500\u6d3b\u52a8\u7684\u6548\u679c\uff1f<\/li>\n\n\n\n<li>RFM\u6a21\u578b\u6709\u54ea\u4e9b\u5c40\u9650\u6027\uff1f\u5728\u5b9e\u9645\u5e94\u7528\u4e2d\u9700\u8981\u6ce8\u610f\u4ec0\u4e48\uff1f<\/li>\n<\/ul>\n\n\n\n<h4 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"53_%E7%94%A8%E6%88%B7%E7%94%9F%E5%91%BD%E5%91%A8%E6%9C%9F%E5%88%86%E6%9E%90\"><\/span><strong>5.3 \u7528\u6237\u751f\u547d\u5468\u671f\u5206\u6790<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h4>\n\n\n\n<p><strong>\u3010\u7406\u8bba\u8bb2\u89e3\u3011<\/strong><\/p>\n\n\n\n<p>\u7528\u6237\u751f\u547d\u5468\u671f\uff08User Lifecycle\uff09\u63cf\u8ff0\u4e86\u7528\u6237\u4ece\u9996\u6b21\u63a5\u89e6\u4ea7\u54c1\u5230\u6700\u7ec8\u6d41\u5931\u7684\u6574\u4e2a\u8fc7\u7a0b\u3002\u7406\u89e3\u7528\u6237\u5728\u4e0d\u540c\u751f\u547d\u5468\u671f\u9636\u6bb5\u7684\u7279\u5f81\u548c\u9700\u6c42\uff0c\u6709\u52a9\u4e8e\u6211\u4eec\u5236\u5b9a\u9488\u5bf9\u6027\u7684\u8fd0\u8425\u7b56\u7565\uff0c\u63d0\u9ad8\u7528\u6237\u7559\u5b58\u548cLTV\uff08\u751f\u547d\u5468\u671f\u4ef7\u503c\uff09\u3002<\/p>\n\n\n\n<p><strong>\u4e3b\u8981\u9636\u6bb5\uff1a<\/strong><\/p>\n\n\n\n<ol>\n<li><strong>\u65b0\u7528\u6237 (New User)\uff1a<\/strong>&nbsp;\u9996\u6b21\u6ce8\u518c\u6216\u9996\u6b21\u8d2d\u4e70\u7684\u7528\u6237\u3002<\/li>\n\n\n\n<li><strong>\u6d3b\u8dc3\u7528\u6237 (Active User)\uff1a<\/strong>&nbsp;\u5728\u4e00\u5b9a\u65f6\u95f4\u5185\u6709\u6301\u7eed\u884c\u4e3a\uff08\u5982\u767b\u5f55\u3001\u6d4f\u89c8\u3001\u8d2d\u4e70\uff09\u7684\u7528\u6237\u3002<\/li>\n\n\n\n<li><strong>\u7559\u5b58\u7528\u6237 (Retained User)\uff1a<\/strong>&nbsp;\u5728\u9996\u6b21\u884c\u4e3a\u540e\uff0c\u518d\u6b21\u8fdb\u884c\u5173\u952e\u884c\u4e3a\u7684\u7528\u6237\u3002<\/li>\n\n\n\n<li><strong>\u6c89\u9ed8\u7528\u6237 (Dormant User)\uff1a<\/strong>&nbsp;\u4e00\u6bb5\u65f6\u95f4\u5185\u6ca1\u6709\u6d3b\u8dc3\u884c\u4e3a\uff0c\u4f46\u5c1a\u672a\u5b8c\u5168\u6d41\u5931\u7684\u7528\u6237\u3002<\/li>\n\n\n\n<li><strong>\u6d41\u5931\u7528\u6237 (Churned User)\uff1a<\/strong>&nbsp;\u957f\u671f\u6ca1\u6709\u6d3b\u8dc3\u884c\u4e3a\uff0c\u88ab\u8ba4\u4e3a\u5df2\u79bb\u5f00\u5e73\u53f0\u7684\u7528\u6237\u3002<\/li>\n<\/ol>\n\n\n\n<p><strong>\u6838\u5fc3\u5206\u6790\u6307\u6807\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li><strong>\u7528\u6237\u7559\u5b58\u7387\uff1a<\/strong>&nbsp;\u8861\u91cf\u7528\u6237\u5728\u4e00\u6bb5\u65f6\u95f4\u540e\u4ecd\u4fdd\u6301\u6d3b\u8dc3\u7684\u6bd4\u4f8b\u3002<\/li>\n\n\n\n<li><strong>\u7528\u6237\u8f6c\u5316\u6f0f\u6597\uff1a<\/strong>&nbsp;\u8ffd\u8e2a\u7528\u6237\u4ece\u67d0\u4e2a\u8d77\u59cb\u70b9\uff08\u5982\u8bbf\u95ee\uff09\u5230\u6700\u7ec8\u76ee\u6807\uff08\u5982\u8d2d\u4e70\uff09\u7684\u6bcf\u4e00\u6b65\u8f6c\u5316\u7387\u3002<\/li>\n<\/ul>\n\n\n\n<p><strong>\u3010\u81ea\u52a8\u751f\u6210\u6570\u636e\u96c6\u4e0e\u4ee3\u7801\u5b9e\u4f8b\u3011<\/strong><\/p>\n\n\n\n<p>\u6211\u4eec\u5c06\u751f\u6210\u4e00\u4e2a\u5305\u542b\u7528\u6237\u6ce8\u518c\u3001\u6d4f\u89c8\u3001\u52a0\u8d2d\u548c\u8d2d\u4e70\u884c\u4e3a\u7684\u6a21\u62df\u6570\u636e\u96c6\uff0c\u7528\u4e8e\u7528\u6237\u751f\u547d\u5468\u671f\u5206\u6790\u3002<\/p>\n\n\n\n<p>python<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import pandas as pd\nimport numpy as np\nfrom datetime import datetime, timedelta\n\n# --- \u6570\u636e\u96c6\u751f\u6210 ---\nnp.random.seed(42)\n\ndef generate_user_lifecycle_data(num_users=1000, start_date='2023-01-01', end_date='2023-03-31'):\n    users = &#91;f'U{i:04d}' for i in range(num_users)]\n    products = {f'P{i:03d}': {'category': np.random.choice(&#91;'Electronics', 'Apparel', 'Home', 'Books', 'Sports']), 'price': round(np.random.uniform(20, 1000), 2)} for i in range(50)}\n    product_ids = list(products.keys())\n\n    data = &#91;]\n    current_date = pd.to_datetime(start_date)\n    while current_date &lt;= pd.to_datetime(end_date):\n        for user_id in np.random.choice(users, size=np.random.randint(50, 200), replace=False): # \u6bcf\u5929\u6d3b\u8dc3\u7528\u6237\n            # \u6a21\u62df\u6d4f\u89c8\u884c\u4e3a\n            for _ in range(np.random.randint(1, 5)):\n                product_id = np.random.choice(product_ids)\n                data.append(&#91;user_id, product_id, current_date + timedelta(minutes=np.random.randint(0, 1440)), 'browse'])\n            \n            # \u6a21\u62df\u52a0\u8d2d\u884c\u4e3a (\u90e8\u5206\u6d4f\u89c8\u7528\u6237\u4f1a\u52a0\u8d2d)\n            if np.random.rand() &lt; 0.3: # 30%\u7684\u6d4f\u89c8\u7528\u6237\u4f1a\u52a0\u8d2d\n                product_id = np.random.choice(product_ids)\n                data.append(&#91;user_id, product_id, current_date + timedelta(minutes=np.random.randint(0, 1440)), 'add_to_cart'])\n\n            # \u6a21\u62df\u8d2d\u4e70\u884c\u4e3a (\u90e8\u5206\u52a0\u8d2d\u7528\u6237\u4f1a\u8d2d\u4e70)\n            if np.random.rand() &lt; 0.15: # 15%\u7684\u52a0\u8d2d\u7528\u6237\u4f1a\u8d2d\u4e70\n                product_id = np.random.choice(product_ids)\n                quantity = np.random.randint(1, 3)\n                price = products&#91;product_id]&#91;'price']\n                total_amount = round(price * quantity, 2)\n                data.append(&#91;user_id, product_id, current_date + timedelta(minutes=np.random.randint(0, 1440)), 'purchase', quantity, price, total_amount])\n        current_date += timedelta(days=1)\n\n    df_lifecycle = pd.DataFrame(data, columns=&#91;'user_id', 'product_id', 'event_time', 'event_type', 'quantity', 'price', 'total_amount'])\n    df_lifecycle&#91;'event_time'] = pd.to_datetime(df_lifecycle&#91;'event_time'])\n    \n    # \u586b\u5145NaN\u503c\uff0c\u56e0\u4e3a\u53ea\u6709purchase\u4e8b\u4ef6\u624d\u6709quantity, price, total_amount\n    df_lifecycle&#91;&#91;'quantity', 'price', 'total_amount']] = df_lifecycle&#91;&#91;'quantity', 'price', 'total_amount']].fillna(0)\n    \n    return df_lifecycle\n\ndf_lifecycle = generate_user_lifecycle_data(num_users=1000, start_date='2023-01-01', end_date='2023-03-31')\nprint(\"--- \u7528\u6237\u751f\u547d\u5468\u671f\u6570\u636e\u9884\u89c8 ---\")\nprint(df_lifecycle.head())\nprint(\"\\n\u4e8b\u4ef6\u7c7b\u578b\u5206\u5e03:\\n\", df_lifecycle&#91;'event_type'].value_counts())\n\n# --- \u7528\u6237\u751f\u547d\u5468\u671f\u9636\u6bb5\u8bc6\u522b ---\nprint(\"\\n--- \u7528\u6237\u751f\u547d\u5468\u671f\u9636\u6bb5\u8bc6\u522b ---\")\n\n# 1. \u8bc6\u522b\u65b0\u7528\u6237\uff08\u9996\u6b21\u8d2d\u4e70\u7528\u6237\uff09\nfirst_purchase_time = df_lifecycle&#91;df_lifecycle&#91;'event_type'] == 'purchase'].groupby('user_id')&#91;'event_time'].min().reset_index(name='first_purchase_time')\ndf_lifecycle = pd.merge(df_lifecycle, first_purchase_time, on='user_id', how='left')\ndf_lifecycle&#91;'is_new_user'] = (df_lifecycle&#91;'event_type'] == 'purchase') &amp; (df_lifecycle&#91;'event_time'] == df_lifecycle&#91;'first_purchase_time'])\n\n# 2. \u6d3b\u8dc3\u7528\u6237\u3001\u6c89\u9ed8\u7528\u6237\u3001\u6d41\u5931\u7528\u6237\n# \u5b9a\u4e49\u6d3b\u8dc3\u5468\u671f\u548c\u6c89\u9ed8\u5468\u671f\nactive_threshold_days = 7   # 7\u5929\u5185\u6709\u884c\u4e3a\u7b97\u6d3b\u8dc3\ndormant_threshold_days = 30 # 30\u5929\u5185\u65e0\u884c\u4e3a\u7b97\u6c89\u9ed8\uff0c\u8d85\u8fc730\u5929\u7b97\u6d41\u5931\n\ncurrent_date_for_analysis = df_lifecycle&#91;'event_time'].max() # \u4ee5\u6570\u636e\u96c6\u4e2d\u6700\u65b0\u65e5\u671f\u4f5c\u4e3a\u5f53\u524d\u65e5\u671f\n\nuser_last_activity = df_lifecycle.groupby('user_id')&#91;'event_time'].max().reset_index(name='last_activity_time')\nuser_last_activity&#91;'days_since_last_activity'] = (current_date_for_analysis - user_last_activity&#91;'last_activity_time']).dt.days\n\ndef get_user_status(days_since_last_activity):\n    if days_since_last_activity &lt;= active_threshold_days:\n        return '\u6d3b\u8dc3\u7528\u6237'\n    elif days_since_last_activity &lt;= dormant_threshold_days:\n        return '\u6c89\u9ed8\u7528\u6237'\n    else:\n        return '\u6d41\u5931\u7528\u6237'\n\nuser_last_activity&#91;'user_status'] = user_last_activity&#91;'days_since_last_activity'].apply(get_user_status)\n\nprint(\"\\n\u7528\u6237\u6d3b\u8dc3\u72b6\u6001\u7edf\u8ba1:\\n\", user_last_activity&#91;'user_status'].value_counts())\n\n# --- \u7528\u6237\u7559\u5b58\u7387\u5206\u6790 (\u6309\u9996\u6b21\u8d2d\u4e70\u6708\u4efd) ---\nprint(\"\\n--- \u7528\u6237\u7559\u5b58\u7387\u5206\u6790 ---\")\n\n# 1. \u786e\u5b9a\u6bcf\u4e2a\u7528\u6237\u7684\u9996\u6b21\u8d2d\u4e70\u6708\u4efd\ndf_purchases = df_lifecycle&#91;df_lifecycle&#91;'event_type'] == 'purchase'].copy()\ndf_purchases&#91;'cohort_month'] = df_purchases.groupby('user_id')&#91;'event_time'].transform('min').dt.to_period('M')\ndf_purchases&#91;'purchase_month'] = df_purchases&#91;'event_time'].dt.to_period('M')\n\n# 2. \u8ba1\u7b97\u7528\u6237\u751f\u547d\u5468\u671f\u6708\u4efd\ndf_purchases&#91;'cohort_period'] = (df_purchases&#91;'purchase_month'] - df_purchases&#91;'cohort_month']).apply(lambda x: x.n)\n\n# 3. \u7edf\u8ba1\u6bcf\u4e2a\u540c\u671f\u7fa4\uff08cohort\uff09\u5728\u4e0d\u540c\u6708\u4efd\u7684\u7559\u5b58\u7528\u6237\u6570\ncohort_counts = df_purchases.groupby(&#91;'cohort_month', 'cohort_period'])&#91;'user_id'].nunique().reset_index()\ncohort_pivot = cohort_counts.pivot_table(index='cohort_month', columns='cohort_period', values='user_id')\n\n# 4. \u8ba1\u7b97\u7559\u5b58\u7387\ncohort_sizes = cohort_pivot.iloc&#91;:, 0] # \u6bcf\u4e2a\u540c\u671f\u7fa4\u7684\u521d\u59cb\u7528\u6237\u6570\nretention_matrix = cohort_pivot.divide(cohort_sizes, axis=0)\n\nprint(\"\\n\u540c\u671f\u7fa4\u7559\u5b58\u7387\u77e9\u9635:\\n\", retention_matrix)\n\n# \u53ef\u89c6\u5316\u7559\u5b58\u7387\u70ed\u529b\u56fe\nplt.figure(figsize=(10, 7))\nsns.heatmap(retention_matrix, annot=True, fmt=\".1%\", cmap='Blues', linewidths=.5)\nplt.title('\u7528\u6237\u7559\u5b58\u7387 (\u6309\u9996\u6b21\u8d2d\u4e70\u6708\u4efd)')\nplt.xlabel('\u751f\u547d\u5468\u671f\u6708\u4efd')\nplt.ylabel('\u9996\u6b21\u8d2d\u4e70\u6708\u4efd')\nplt.show()\n\n# --- \u7528\u6237\u8f6c\u5316\u6f0f\u6597\u5206\u6790 ---\nprint(\"\\n--- \u7528\u6237\u8f6c\u5316\u6f0f\u6597\u5206\u6790 ---\")\n\n# \u5b9a\u4e49\u6f0f\u6597\u6b65\u9aa4 (\u4f8b\u5982\uff1a\u6d4f\u89c8 -&gt; \u52a0\u8d2d -&gt; \u8d2d\u4e70)\nfunnel_steps = &#91;'browse', 'add_to_cart', 'purchase']\n\n# \u7edf\u8ba1\u6bcf\u4e2a\u6b65\u9aa4\u7684\u7528\u6237\u6570\nfunnel_data = {}\nfor step in funnel_steps:\n    if step == 'purchase':\n        # \u8d2d\u4e70\u4e8b\u4ef6\u4e2d\uff0c\u4e00\u4e2a\u7528\u6237\u53ef\u80fd\u6709\u591a\u7b14\u8ba2\u5355\uff0c\u8fd9\u91cc\u7edf\u8ba1\u72ec\u7acb\u7528\u6237\u6570\n        funnel_data&#91;step] = df_lifecycle&#91;df_lifecycle&#91;'event_type'] == step]&#91;'user_id'].nunique()\n    else:\n        # \u6d4f\u89c8\u548c\u52a0\u8d2d\u4e8b\u4ef6\uff0c\u7edf\u8ba1\u72ec\u7acb\u7528\u6237\u6570\n        funnel_data&#91;step] = df_lifecycle&#91;df_lifecycle&#91;'event_type'] == step]&#91;'user_id'].nunique()\n\ndf_funnel = pd.DataFrame.from_dict(funnel_data, orient='index', columns=&#91;'users'])\ndf_funnel&#91;'conversion_rate'] = df_funnel&#91;'users'].pct_change().fillna(1) * 100 # \u4e0e\u4e0a\u4e00\u6b65\u7684\u8f6c\u5316\u7387\ndf_funnel&#91;'total_conversion_rate'] = df_funnel&#91;'users'] \/ df_funnel&#91;'users'].iloc&#91;0] * 100 # \u76f8\u5bf9\u4e8e\u7b2c\u4e00\u6b65\u7684\u603b\u8f6c\u5316\u7387\n\nprint(\"\\n\u7528\u6237\u8f6c\u5316\u6f0f\u6597:\\n\", df_funnel)\n\n# \u6f0f\u6597\u53ef\u89c6\u5316\nfig, ax = plt.subplots(figsize=(8, 6))\nbars = ax.bar(df_funnel.index, df_funnel&#91;'users'], color='lightgreen')\nax.set_title('\u7528\u6237\u8f6c\u5316\u6f0f\u6597')\nax.set_xlabel('\u4e8b\u4ef6\u7c7b\u578b')\nax.set_ylabel('\u7528\u6237\u6570\u91cf')\nax.ticklabel_format(style='plain', axis='y') # \u53d6\u6d88\u79d1\u5b66\u8ba1\u6570\u6cd5\n\n# \u6dfb\u52a0\u8f6c\u5316\u7387\u6807\u7b7e\nfor i, bar in enumerate(bars):\n    if i &gt; 0:\n        total_rate = df_funnel&#91;'total_conversion_rate'].iloc&#91;i]\n        step_rate = df_funnel&#91;'conversion_rate'].iloc&#91;i]\n        ax.text(bar.get_x() + bar.get_width() \/ 2, bar.get_height(),\n                f'{bar.get_height()}\\n({step_rate:.1f}%)\\n({total_rate:.1f}% total)',\n                ha='center', va='bottom', fontsize=10)\n    else:\n        ax.text(bar.get_x() + bar.get_width() \/ 2, bar.get_height(),\n                f'{bar.get_height()}', ha='center', va='bottom', fontsize=10)\nplt.tight_layout()\nplt.show()\n\n# \u3010\u8fd0\u8425\u7b56\u7565\u5efa\u8bae\u3011\nprint(\"\\n--- \u57fa\u4e8e\u7528\u6237\u751f\u547d\u5468\u671f\u5206\u6790\u7684\u8fd0\u8425\u7b56\u7565\u5efa\u8bae ---\")\nprint(\"1. **\u65b0\u7528\u6237\u5f15\u5165:** \u5173\u6ce8\u65b0\u7528\u6237\u6ce8\u518c\/\u9996\u8d2d\u8f6c\u5316\u7387\uff0c\u4f18\u5316\u65b0\u624b\u5f15\u5bfc\u3001\u9996\u5355\u4f18\u60e0\u3002\")\nprint(\"2. **\u63d0\u5347\u6d3b\u8dc3\u5ea6:** \u5bf9\u6c89\u9ed8\u7528\u6237\u8fdb\u884c\u7cbe\u51c6\u8425\u9500\uff0c\u5982\u53d1\u9001\u4e2a\u6027\u5316\u63a8\u8350\u3001\u4f18\u60e0\u5238\u5524\u9192\u3002\")\nprint(\"3. **\u633d\u7559\u6d41\u5931\u7528\u6237:** \u5bf9\u6d41\u5931\u7528\u6237\u8fdb\u884c\u8c03\u67e5\uff0c\u4e86\u89e3\u6d41\u5931\u539f\u56e0\uff0c\u5c1d\u8bd5\u9ad8\u4ef7\u503c\u53ec\u56de\u6d3b\u52a8\u3002\")\nprint(\"4. **\u4f18\u5316\u8f6c\u5316\u6f0f\u6597:** \u9488\u5bf9\u8f6c\u5316\u7387\u4f4e\u7684\u73af\u8282\u8fdb\u884c\u5206\u6790\uff0c\u5982\u201c\u52a0\u8d2d\u5230\u8d2d\u4e70\u201d\u8f6c\u5316\u4f4e\uff0c\u53ef\u80fd\u662f\u7ed3\u7b97\u6d41\u7a0b\u590d\u6742\u3001\u8fd0\u8d39\u9ad8\u3001\u652f\u4ed8\u65b9\u5f0f\u5c11\u7b49\u95ee\u9898\u3002\")<\/code><\/pre>\n\n\n\n<p><strong>\u3010\u4e92\u52a8\u95ee\u7b54\u3011<\/strong><\/p>\n\n\n\n<ul>\n<li>\u7528\u6237\u751f\u547d\u5468\u671f\u5206\u6790\u5bf9\u7535\u5546\u8fd0\u8425\u6709\u4ec0\u4e48\u5b9e\u9645\u610f\u4e49\uff1f<\/li>\n\n\n\n<li>\u5728\u8ba1\u7b97\u7559\u5b58\u7387\u65f6\uff0c\u4e3a\u4ec0\u4e48\u9009\u62e9\u201c\u540c\u671f\u7fa4\uff08Cohort\uff09\u201d\u7684\u6982\u5ff5\uff1f<\/li>\n\n\n\n<li>\u5982\u4f55\u6839\u636e\u6f0f\u6597\u5206\u6790\u7684\u7ed3\u679c\uff0c\u5224\u65ad\u54ea\u4e2a\u73af\u8282\u6700\u9700\u8981\u4f18\u5316\uff1f<\/li>\n\n\n\n<li>\u9664\u4e86\u6211\u4eec\u5b9a\u4e49\u7684\u8fd9\u4e9b\u4e8b\u4ef6\u7c7b\u578b\uff0c\u4f60\u8fd8\u80fd\u60f3\u5230\u54ea\u4e9b\u53ef\u4ee5\u653e\u5165\u6f0f\u6597\u5206\u6790\u7684\u7535\u5546\u884c\u4e3a\uff1f<\/li>\n\n\n\n<li>\u5982\u4f55\u8bc6\u522b\u201c\u5047\u6027\u6d41\u5931\u201d\u7528\u6237\uff08\u4f8b\u5982\uff0c\u67d0\u4e2a\u7528\u6237\u53ea\u662f\u5b63\u8282\u6027\u8d2d\u4e70\uff09\uff1f<\/li>\n<\/ul>\n\n\n\n<h4 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"54_%E7%94%A8%E6%88%B7%E4%BB%B7%E5%80%BC%E4%B8%8E%E6%B5%81%E5%A4%B1%E9%A2%84%E6%B5%8B%EF%BC%88%E5%85%A5%E9%97%A8%EF%BC%89\"><\/span><strong>5.4 \u7528\u6237\u4ef7\u503c\u4e0e\u6d41\u5931\u9884\u6d4b\uff08\u5165\u95e8\uff09<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h4>\n\n\n\n<p><strong>\u3010\u7406\u8bba\u8bb2\u89e3\u3011<\/strong><\/p>\n\n\n\n<p>\u5728\u7528\u6237\u751f\u547d\u5468\u671f\u5206\u6790\u7684\u57fa\u7840\u4e0a\uff0c\u6211\u4eec\u53ef\u4ee5\u8fdb\u4e00\u6b65\u5c1d\u8bd5\u9884\u6d4b\u7528\u6237\u7684\u672a\u6765\u884c\u4e3a\uff0c\u4f8b\u5982\u9884\u6d4b\u7528\u6237\u672a\u6765\u7684\u6d88\u8d39\u91d1\u989d\uff08\u7528\u6237\u4ef7\u503c\uff09\u6216\u9884\u6d4b\u54ea\u4e9b\u7528\u6237\u5373\u5c06\u6d41\u5931\uff08\u6d41\u5931\u9884\u6d4b\uff09\u3002\u8fd9\u9700\u8981\u5f15\u5165\u4e00\u4e9b\u7b80\u5355\u7684\u673a\u5668\u5b66\u4e60\u6982\u5ff5\u3002<\/p>\n\n\n\n<ul>\n<li><strong>\u7528\u6237\u4ef7\u503c\u9884\u6d4b\uff1a<\/strong>&nbsp;\u9884\u6d4b\u7528\u6237\u5728\u672a\u6765\u4e00\u6bb5\u65f6\u95f4\u5185\u53ef\u80fd\u4ea7\u751f\u7684\u6d88\u8d39\u91d1\u989d\u3002<\/li>\n\n\n\n<li><strong>\u7528\u6237\u6d41\u5931\u9884\u6d4b\uff1a<\/strong>&nbsp;\u8bc6\u522b\u54ea\u4e9b\u7528\u6237\u6709\u9ad8\u98ce\u9669\u5728\u672a\u6765\u4e00\u6bb5\u65f6\u95f4\u5185\u6d41\u5931\u3002<\/li>\n<\/ul>\n\n\n\n<p><strong>\u5e38\u7528\u65b9\u6cd5\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li><strong>\u7ebf\u6027\u56de\u5f52\uff1a<\/strong>&nbsp;\u9884\u6d4b\u8fde\u7eed\u6570\u503c\u578b\u76ee\u6807\uff08\u5982\u672a\u6765\u6d88\u8d39\u91d1\u989d\uff09\u3002<\/li>\n\n\n\n<li><strong>\u903b\u8f91\u56de\u5f52\uff1a<\/strong>&nbsp;\u9884\u6d4b\u4e8c\u5206\u7c7b\u76ee\u6807\uff08\u5982\u662f\u5426\u6d41\u5931\uff09\u3002<\/li>\n<\/ul>\n\n\n\n<p><strong>\u3010\u81ea\u52a8\u751f\u6210\u6570\u636e\u96c6\u4e0e\u4ee3\u7801\u5b9e\u4f8b\u3011<\/strong><\/p>\n\n\n\n<p>\u6211\u4eec\u5c06\u57fa\u4e8e\u4e4b\u524d\u7684RFM\u6570\u636e\u548c\u7528\u6237\u6d3b\u8dc3\u72b6\u6001\uff0c\u751f\u6210\u4e00\u4e9b\u989d\u5916\u7684\u7279\u5f81\uff0c\u5e76\u8fdb\u884c\u7b80\u5355\u7684\u9884\u6d4b\u6a21\u578b\u6784\u5efa\u3002<\/p>\n\n\n\n<p>python<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import pandas as pd\nimport numpy as np\nfrom datetime import datetime, timedelta\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import LinearRegression, LogisticRegression\nfrom sklearn.metrics import mean_squared_error, accuracy_score, classification_report\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n\n# --- \u6570\u636e\u96c6\u751f\u6210 (\u590d\u7528RFM\u6570\u636e\u548c\u7528\u6237\u6d3b\u8dc3\u72b6\u6001) ---\nnp.random.seed(42)\n\ndef generate_prediction_data(num_users=1000, start_date='2022-01-01', end_date='2023-12-31'):\n    # \u6a21\u62dfRFM\u6570\u636e\n    df_orders_rfm = generate_rfm_orders_data(num_users, start_date, end_date)\n    snapshot_date = df_orders_rfm&#91;'order_time'].max() + timedelta(days=1)\n    rfm = df_orders_rfm.groupby('user_id').agg(\n        recency=('order_time', lambda date: (snapshot_date - date.max()).days),\n        frequency=('order_id', 'nunique'),\n        monetary=('total_amount', 'sum')\n    ).reset_index()\n\n    # \u6a21\u62df\u7528\u6237\u6d3b\u8dc3\u72b6\u6001 (\u57fa\u4e8eRFM\u7684recency)\n    active_threshold_days = 30\n    rfm&#91;'is_churned'] = (rfm&#91;'recency'] &gt; active_threshold_days).astype(int) # \u8d85\u8fc730\u5929\u672a\u8d2d\u4e70\u89c6\u4e3a\u6d41\u5931\n\n    # \u6a21\u62df\u989d\u5916\u7279\u5f81\uff0c\u7528\u4e8e\u9884\u6d4b\n    rfm&#91;'avg_item_price'] = df_orders_rfm.groupby('user_id')&#91;'price'].mean().fillna(0).values\n    rfm&#91;'total_quantity'] = df_orders_rfm.groupby('user_id')&#91;'quantity'].sum().fillna(0).values\n    rfm&#91;'days_since_first_purchase'] = (snapshot_date - df_orders_rfm.groupby('user_id')&#91;'order_time'].min()).dt.days.fillna(0).values\n\n    # \u6a21\u62df\u672a\u6765\u6d88\u8d39 (\u4f5c\u4e3a\u4ef7\u503c\u9884\u6d4b\u7684\u76ee\u6807\u53d8\u91cf)\n    rfm&#91;'future_monetary'] = rfm&#91;'monetary'] * np.random.uniform(0.5, 1.5, size=len(rfm)) # \u7b80\u5355\u6a21\u62df\uff0c\u4e0e\u5f53\u524d\u6d88\u8d39\u76f8\u5173\n    rfm.loc&#91;rfm&#91;'is_churned'] == 1, 'future_monetary'] = rfm.loc&#91;rfm&#91;'is_churned'] == 1, 'future_monetary'] * np.random.uniform(0, 0.2, size=rfm&#91;'is_churned'].sum()) # \u6d41\u5931\u7528\u6237\u672a\u6765\u6d88\u8d39\u4f4e\n\n    return rfm\n\ndf_rfm_prediction = generate_prediction_data(num_users=1000)\nprint(\"--- \u7528\u4e8e\u9884\u6d4b\u7684\u6570\u636e\u9884\u89c8 ---\")\nprint(df_rfm_prediction.head())\nprint(\"\\n\u6d41\u5931\u7528\u6237\u6bd4\u4f8b:\", df_rfm_prediction&#91;'is_churned'].mean())\n\n# --- \u7528\u6237\u4ef7\u503c\u9884\u6d4b (\u7b80\u5355\u7ebf\u6027\u56de\u5f52) ---\nprint(\"\\n--- \u7528\u6237\u4ef7\u503c\u9884\u6d4b (\u672a\u6765\u6d88\u8d39\u91d1\u989d) ---\")\n\n# \u9009\u62e9\u7279\u5f81 (X) \u548c\u76ee\u6807\u53d8\u91cf (y)\nfeatures_value = &#91;'recency', 'frequency', 'monetary', 'avg_item_price', 'total_quantity', 'days_since_first_purchase']\ntarget_value = 'future_monetary'\n\nX_value = df_rfm_prediction&#91;features_value]\ny_value = df_rfm_prediction&#91;target_value]\n\n# \u5212\u5206\u8bad\u7ec3\u96c6\u548c\u6d4b\u8bd5\u96c6\nX_train_value, X_test_value, y_train_value, y_test_value = train_test_split(X_value, y_value, test_size=0.2, random_state=42)\n\n# \u8bad\u7ec3\u7ebf\u6027\u56de\u5f52\u6a21\u578b\nlr_model = LinearRegression()\nlr_model.fit(X_train_value, y_train_value)\n\n# \u8fdb\u884c\u9884\u6d4b\ny_pred_value = lr_model.predict(X_test_value)\n\n# \u8bc4\u4f30\u6a21\u578b\nrmse = np.sqrt(mean_squared_error(y_test_value, y_pred_value))\nprint(f\"\\n\u7528\u6237\u4ef7\u503c\u9884\u6d4b - RMSE (\u5747\u65b9\u6839\u8bef\u5dee): {rmse:.2f}\")\n\n# \u53ef\u89c6\u5316\u9884\u6d4b\u7ed3\u679c (\u90e8\u5206\u6570\u636e)\nplt.figure(figsize=(10, 6))\nplt.scatter(y_test_value, y_pred_value, alpha=0.6)\nplt.plot(&#91;y_test_value.min(), y_test_value.max()], &#91;y_test_value.min(), y_test_value.max()], 'r--') # \u7ed8\u5236y=x\u7ebf\nplt.title('\u7528\u6237\u672a\u6765\u6d88\u8d39\u91d1\u989d\u9884\u6d4b\u7ed3\u679c')\nplt.xlabel('\u771f\u5b9e\u672a\u6765\u6d88\u8d39\u91d1\u989d')\nplt.ylabel('\u9884\u6d4b\u672a\u6765\u6d88\u8d39\u91d1\u989d')\nplt.show()\n\n# \u3010\u8fd0\u8425\u5efa\u8bae\u3011\nprint(\"\\n--- \u57fa\u4e8e\u7528\u6237\u4ef7\u503c\u9884\u6d4b\u7684\u8fd0\u8425\u7b56\u7565\u5efa\u8bae ---\")\nprint(\"1. **\u7cbe\u51c6\u8425\u9500:** \u5bf9\u9884\u6d4b\u672a\u6765\u4ef7\u503c\u9ad8\u7684\u7528\u6237\uff0c\u63d0\u4f9b\u4e13\u5c5e\u4f18\u60e0\u548c\u4e2a\u6027\u5316\u63a8\u8350\uff0c\u8fdb\u4e00\u6b65\u63d0\u5347LTV\u3002\")\nprint(\"2. **\u8d44\u6e90\u5206\u914d:** \u5c06\u8425\u9500\u9884\u7b97\u66f4\u591a\u5730\u6295\u5165\u5230\u9ad8\u4ef7\u503c\u6f5c\u529b\u7684\u7528\u6237\u4e0a\u3002\")\n\n\n# --- \u7528\u6237\u6d41\u5931\u9884\u6d4b (\u7b80\u5355\u903b\u8f91\u56de\u5f52) ---\nprint(\"\\n--- \u7528\u6237\u6d41\u5931\u9884\u6d4b ---\")\n\n# \u9009\u62e9\u7279\u5f81 (X) \u548c\u76ee\u6807\u53d8\u91cf (y)\nfeatures_churn = &#91;'recency', 'frequency', 'monetary', 'avg_item_price', 'total_quantity', 'days_since_first_purchase']\ntarget_churn = 'is_churned' # 0\u8868\u793a\u672a\u6d41\u5931\uff0c1\u8868\u793a\u6d41\u5931\n\nX_churn = df_rfm_prediction&#91;features_churn]\ny_churn = df_rfm_prediction&#91;target_churn]\n\n# \u5212\u5206\u8bad\u7ec3\u96c6\u548c\u6d4b\u8bd5\u96c6\nX_train_churn, X_test_churn, y_train_churn, y_test_churn = train_test_split(X_churn, y_churn, test_size=0.2, random_state=42, stratify=y_churn) # stratify\u4fdd\u6301\u76ee\u6807\u53d8\u91cf\u6bd4\u4f8b\n\n# \u8bad\u7ec3\u903b\u8f91\u56de\u5f52\u6a21\u578b\nlr_churn_model = LogisticRegression(solver='liblinear') # \u4f7f\u7528liblinear\u907f\u514d\u8b66\u544a\nlr_churn_model.fit(X_train_churn, y_train_churn)\n\n# \u8fdb\u884c\u9884\u6d4b\ny_pred_churn = lr_churn_model.predict(X_test_churn)\ny_pred_proba_churn = lr_churn_model.predict_proba(X_test_churn)&#91;:, 1] # \u9884\u6d4b\u6d41\u5931\u6982\u7387\n\n# \u8bc4\u4f30\u6a21\u578b\naccuracy = accuracy_score(y_test_churn, y_pred_churn)\nprint(f\"\\n\u7528\u6237\u6d41\u5931\u9884\u6d4b - \u51c6\u786e\u7387: {accuracy:.2f}\")\nprint(\"\\n\u5206\u7c7b\u62a5\u544a:\\n\", classification_report(y_test_churn, y_pred_churn))\n\n# \u67e5\u770b\u6d41\u5931\u6982\u7387\u6700\u9ad8\u7684\u5ba2\u6237\ndf_test_churn = X_test_churn.copy()\ndf_test_churn&#91;'actual_churn'] = y_test_churn\ndf_test_churn&#91;'predicted_churn_proba'] = y_pred_proba_churn\nprint(\"\\n\u9884\u6d4b\u6d41\u5931\u6982\u7387\u6700\u9ad8\u7684Top 10\u7528\u6237:\\n\", df_test_churn.sort_values(by='predicted_churn_proba', ascending=False).head(10))\n\n# \u3010\u8fd0\u8425\u5efa\u8bae\u3011\nprint(\"\\n--- \u57fa\u4e8e\u7528\u6237\u6d41\u5931\u9884\u6d4b\u7684\u8fd0\u8425\u7b56\u7565\u5efa\u8bae ---\")\nprint(\"1. **\u63d0\u524d\u9884\u8b66:** \u8bc6\u522b\u51fa\u9ad8\u6d41\u5931\u98ce\u9669\u7684\u7528\u6237\uff0c\u5728\u4ed6\u4eec\u771f\u6b63\u6d41\u5931\u524d\u91c7\u53d6\u5e72\u9884\u63aa\u65bd\u3002\")\nprint(\"2. **\u7cbe\u51c6\u633d\u7559:** \u5bf9\u9ad8\u98ce\u9669\u7528\u6237\u63d0\u4f9b\u4e2a\u6027\u5316\u7684\u633d\u7559\u4f18\u60e0\u3001\u4e13\u5c5e\u670d\u52a1\u6216\u95ee\u5377\u8c03\u67e5\uff0c\u4e86\u89e3\u5e76\u89e3\u51b3\u75db\u70b9\u3002\")\nprint(\"3. **\u8d44\u6e90\u4f18\u5316:** \u5c06\u633d\u7559\u8d44\u6e90\u96c6\u4e2d\u5728\u6700\u6709\u4ef7\u503c\u4e14\u6709\u6d41\u5931\u98ce\u9669\u7684\u7528\u6237\u4e0a\u3002\")<\/code><\/pre>\n\n\n\n<p><strong>\u3010\u4e92\u52a8\u95ee\u7b54\u3011<\/strong><\/p>\n\n\n\n<ul>\n<li>\u7ebf\u6027\u56de\u5f52\u548c\u903b\u8f91\u56de\u5f52\u5206\u522b\u9002\u7528\u4e8e\u4ec0\u4e48\u7c7b\u578b\u7684\u9884\u6d4b\u95ee\u9898\uff1f\u5b83\u4eec\u6709\u4ec0\u4e48\u533a\u522b\uff1f<\/li>\n\n\n\n<li>RMSE\u3001\u51c6\u786e\u7387\u3001\u7cbe\u786e\u7387\u3001\u53ec\u56de\u7387\u3001F1-score\u8fd9\u4e9b\u6a21\u578b\u8bc4\u4f30\u6307\u6807\u5206\u522b\u4ee3\u8868\u4ec0\u4e48\u542b\u4e49\uff1f\u5728\u6d41\u5931\u9884\u6d4b\u4e2d\uff0c\u54ea\u4e2a\u6307\u6807\u53ef\u80fd\u66f4\u91cd\u8981\uff1f<\/li>\n\n\n\n<li><code>train_test_split<\/code>&nbsp;\u7684&nbsp;<code>stratify<\/code>&nbsp;\u53c2\u6570\u6709\u4ec0\u4e48\u4f5c\u7528\uff1f\u5728\u4ec0\u4e48\u60c5\u51b5\u4e0b\u4f7f\u7528\u5b83\uff1f<\/li>\n\n\n\n<li>\u9664\u4e86\u6211\u4eec\u4f7f\u7528\u7684\u8fd9\u4e9b\u7279\u5f81\uff0c\u4f60\u8fd8\u80fd\u60f3\u5230\u54ea\u4e9b\u53ef\u4ee5\u7528\u6765\u9884\u6d4b\u7528\u6237\u4ef7\u503c\u6216\u6d41\u5931\u7684\u7279\u5f81\uff1f<\/li>\n\n\n\n<li>\u5728\u5b9e\u9645\u5e94\u7528\u4e2d\uff0c\u5982\u4f55\u5c06\u9884\u6d4b\u7ed3\u679c\u8f6c\u5316\u4e3a\u5177\u4f53\u7684\u8fd0\u8425\u884c\u52a8\uff1f<\/li>\n<\/ul>\n","protected":false},"excerpt":{"rendered":"<p>5.1 \u7528\u6237\u753b\u50cf\u6784\u5efa\u57fa\u7840 \u3010\u7406\u8bba\u8bb2\u89e3\u3011 \u7528\u6237\u753b\u50cf\uff0c\u5c31\u50cf\u7ed9\u6bcf\u4e2a\u7528\u6237\u753b\u4e00\u5f20\u201c\u6570\u5b57\u8096\u50cf\u201d\uff0c\u4e0a\u9762\u8bb0\u5f55\u7740\u4ed6\u4eec\u7684&hellip; <a href=\"http:\/\/viplao.com\/index.php\/2025\/10\/18\/%e3%80%90%e8%bf%90%e8%90%a5%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90-%e8%bf%9b%e9%98%b6%e7%af%87%e3%80%91%e7%94%a8%e6%88%b7%e8%a1%8c%e4%b8%ba%e5%88%86%e6%9e%90\/\" class=\"more-link read-more\" rel=\"bookmark\">\u7ee7\u7eed\u9605\u8bfb <span class=\"screen-reader-text\">\u3010\u8fd0\u8425\u6570\u636e\u5206\u6790-\u8fdb\u9636\u7bc7\u3011\u7528\u6237\u884c\u4e3a\u5206\u6790<\/span><i class=\"fa fa-arrow-right\"><\/i><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[1],"tags":[28],"views":917,"_links":{"self":[{"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/posts\/4078"}],"collection":[{"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/comments?post=4078"}],"version-history":[{"count":2,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/posts\/4078\/revisions"}],"predecessor-version":[{"id":4101,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/posts\/4078\/revisions\/4101"}],"wp:attachment":[{"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/media?parent=4078"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/categories?post=4078"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/tags?post=4078"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}