{"id":3899,"date":"2025-08-20T11:05:19","date_gmt":"2025-08-20T03:05:19","guid":{"rendered":"http:\/\/viplao.com\/?p=3899"},"modified":"2025-09-13T23:17:52","modified_gmt":"2025-09-13T15:17:52","slug":"%e3%80%90python%e5%ae%9e%e8%b7%b5%e6%a1%88%e4%be%8b%e3%80%91%e7%94%b5%e5%95%86%e5%b9%b3%e5%8f%b0%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90%e5%92%8c%e6%8c%96%e6%8e%98-%e7%94%a8%e6%88%b7%e8%a1%8c%e4%b8%ba-3","status":"publish","type":"post","link":"http:\/\/viplao.com\/index.php\/2025\/08\/20\/%e3%80%90python%e5%ae%9e%e8%b7%b5%e6%a1%88%e4%be%8b%e3%80%91%e7%94%b5%e5%95%86%e5%b9%b3%e5%8f%b0%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90%e5%92%8c%e6%8c%96%e6%8e%98-%e7%94%a8%e6%88%b7%e8%a1%8c%e4%b8%ba-3\/","title":{"rendered":"\u3010Python\u5b9e\u8df5\u6848\u4f8b\u3011\u7535\u5546\u5e73\u53f0\u6570\u636e\u5206\u6790\u548c\u6316\u6398 &#8211; \u7528\u6237\u884c\u4e3a\u5206\u6790"},"content":{"rendered":"\n<p><\/p>\n\n\n\n<p>\u5f00\u53d1\u601d\u8def \u8be5\u811a\u672c\u5c06\u5305\u542b\u4ee5\u4e0b\u529f\u80fd\uff1a<\/p>\n\n\n\n<ol>\n<li>\u6a21\u62df\u751f\u6210\u5305\u542b\u7528\u6237\u8d2d\u4e70\u3001\u6d4f\u89c8\u3001\u52a0\u8d2d\u7b49\u884c\u4e3a\u7684\u7535\u5546\u6570\u636e\u3002<\/li>\n\n\n\n<li>\u8fdb\u884c\u6570\u636e\u9884\u5904\u7406\u548c\u7528\u6237\u884c\u4e3a\u7279\u5f81\u5de5\u7a0b\u3002<\/li>\n\n\n\n<li>\u4f7f\u7528K-Means\u805a\u7c7b\u7b97\u6cd5\u5bf9\u7528\u6237\u8fdb\u884c\u7ec6\u5206\u3002<\/li>\n\n\n\n<li>\u5206\u6790\u5e76\u53ef\u89c6\u5316\u5404\u4e2a\u7528\u6237\u7fa4\u4f53\u7684\u7279\u5f81\u3002<\/li>\n\n\n\n<li>\u751f\u6210\u4e00\u4efd\u5305\u542b\u5206\u6790\u8fc7\u7a0b\u3001\u7528\u6237\u7fa4\u4f53\u753b\u50cf\u548c\u8fd0\u8425\u5efa\u8bae\u7684\u7efc\u5408\u62a5\u544a\u3002<\/li>\n<\/ol>\n\n\n\n<p><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import pandas as pd\nimport numpy as np\nfrom sklearn.cluster import KMeans\nfrom sklearn.preprocessing import StandardScaler, LabelEncoder\nfrom sklearn.decomposition import PCA\nfrom sklearn.metrics import silhouette_score\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom datetime import datetime, timedelta\nimport warnings\nwarnings.filterwarnings(\"ignore\")\n\n# --- \u914d\u7f6e ---\nNUM_USERS = 2000\nNUM_PRODUCTS = 500\nREPORT_PREFIX = '\u7535\u5546\u7528\u6237\u884c\u4e3a\u5206\u6790\u62a5\u544a'\nRANDOM_SEED = 42\n\n# --- \u6570\u636e\u751f\u6210 ---\n\ndef generate_sample_user_behavior_data(n_users, n_products):\n    \"\"\"\u751f\u6210\u6a21\u62df\u7684\u7528\u6237\u884c\u4e3a\u6570\u636e\"\"\"\n    print(\"--- \u6b63\u5728\u751f\u6210\u6a21\u62df\u7528\u6237\u884c\u4e3a\u6570\u636e ---\")\n    np.random.seed(RANDOM_SEED)\n    \n    data = &#91;]\n    \n    # \u6a21\u62df\u7528\u6237\u57fa\u7840\u4fe1\u606f\n    user_ids = &#91;f'user_{i}' for i in range(1, n_users + 1)]\n    user_info = &#91;]\n    for user_id in user_ids:\n        age_group = np.random.choice(&#91;'18-25', '26-35', '36-45', '46-55', '55+'], p=&#91;0.2, 0.3, 0.25, 0.15, 0.1])\n        gender = np.random.choice(&#91;'Male', 'Female'], p=&#91;0.5, 0.5])\n        location = np.random.choice(&#91;'Tier_1', 'Tier_2', 'Tier_3'], p=&#91;0.3, 0.4, 0.3])\n        user_info.append({'user_id': user_id, 'age_group': age_group, 'gender': gender, 'location': location})\n    df_users = pd.DataFrame(user_info)\n\n    # \u6a21\u62df\u7528\u6237\u884c\u4e3a (\u6d4f\u89c8\u3001\u52a0\u8d2d\u3001\u8d2d\u4e70)\n    # \u7b80\u5316\u6a21\u578b\uff1a\u6bcf\u4e2a\u7528\u6237\u6709\u4e0d\u540c\u6570\u91cf\u7684\u884c\u4e3a\u8bb0\u5f55\n    for user_id in user_ids:\n        user_age_group = df_users&#91;df_users&#91;'user_id'] == user_id]&#91;'age_group'].iloc&#91;0]\n        \n        # \u5047\u8bbe\u7528\u6237\u670910-100\u6b21\u884c\u4e3a\u8bb0\u5f55\n        n_actions = np.random.randint(10, 101)\n        \n        for _ in range(n_actions):\n            product_id = f'prod_{np.random.randint(1, n_products + 1)}'\n            action_type = np.random.choice(&#91;'view', 'cart', 'purchase'], p=&#91;0.6, 0.3, 0.1])\n            timestamp = datetime.now() - timedelta(days=np.random.randint(0, 365), seconds=np.random.randint(0, 86400))\n            # \u7b80\u5316\uff1a\u6bcf\u6b21\u884c\u4e3a\u7684\u91d1\u989d\u90fd\u968f\u673a\u751f\u6210\uff0c\u5b9e\u9645\u4e2d\u5e94\u5173\u8054\u5546\u54c1\n            amount = np.random.lognormal(6, 1.2) if action_type == 'purchase' else 0.0\n            \n            data.append({\n                'user_id': user_id,\n                'product_id': product_id,\n                'action_type': action_type,\n                'timestamp': timestamp,\n                'amount': round(amount, 2)\n            })\n            \n    df_actions = pd.DataFrame(data)\n    \n    # --- \u7528\u6237\u884c\u4e3a\u7279\u5f81\u5de5\u7a0b ---\n    print(\"\u6b63\u5728\u8fdb\u884c\u7528\u6237\u884c\u4e3a\u7279\u5f81\u5de5\u7a0b...\")\n    \n    # 1. \u57fa\u7840\u7edf\u8ba1\u7279\u5f81\n    user_stats = df_actions.groupby('user_id').agg(\n        total_actions=('action_type', 'count'),\n        total_purchases=('amount', lambda x: (x > 0).sum()),\n        total_spent=('amount', 'sum'),\n        avg_action_amount=('amount', lambda x: x.sum() \/ (x > 0).sum() if (x > 0).sum() > 0 else 0),\n        first_action_date=('timestamp', 'min'),\n        last_action_date=('timestamp', 'max')\n    ).reset_index()\n    \n    # 2. \u884d\u751f\u7279\u5f81\n    user_stats&#91;'tenure_days'] = (user_stats&#91;'last_action_date'] - user_stats&#91;'first_action_date']).dt.days + 1\n    user_stats&#91;'purchase_freq'] = user_stats&#91;'total_purchases'] \/ user_stats&#91;'tenure_days'] # \u65e5\u5747\u8d2d\u4e70\u6b21\u6570\n    user_stats&#91;'activity_freq'] = user_stats&#91;'total_actions'] \/ user_stats&#91;'tenure_days'] # \u65e5\u5747\u6d3b\u52a8\u6b21\u6570\n    user_stats&#91;'avg_order_value'] = user_stats&#91;'total_spent'] \/ user_stats&#91;'total_purchases'].replace(0, 1) # \u907f\u514d\u9664\u4ee5\u96f6\n    user_stats&#91;'cart_to_purchase_rate'] = user_stats&#91;'total_purchases'] \/ (df_actions&#91;(df_actions&#91;'user_id'].isin(user_stats&#91;'user_id'])) &amp; (df_actions&#91;'action_type'] == 'cart')].groupby('user_id').size().reindex(user_stats&#91;'user_id'], fill_value=0) + 1) # +1 \u907f\u514d\u9664\u4ee5\u96f6\n\n    # 3. \u6700\u8fd1\u6d3b\u8dc3\u5ea6 (Recency)\n    user_stats&#91;'recency_days'] = (datetime.now() - user_stats&#91;'last_action_date']).dt.days\n    \n    # \u5408\u5e76\u7528\u6237\u57fa\u7840\u4fe1\u606f\n    df_final = user_stats.merge(df_users, on='user_id', how='left')\n    \n    csv_filename = f'{REPORT_PREFIX}_\u7528\u6237\u884c\u4e3a\u7279\u5f81\u6570\u636e.csv'\n    df_final.to_csv(csv_filename, index=False, encoding='utf-8-sig')\n    print(f\"\u7528\u6237\u884c\u4e3a\u7279\u5f81\u6570\u636e\u5df2\u751f\u6210\u5e76\u4fdd\u5b58\u81f3: {csv_filename}\")\n    return df_final\n\n# --- \u6570\u636e\u9884\u5904\u7406 ---\n\ndef preprocess_data(df):\n    \"\"\"\u6570\u636e\u9884\u5904\u7406\"\"\"\n    print(\"\\n--- \u6b63\u5728\u8fdb\u884c\u6570\u636e\u9884\u5904\u7406 ---\")\n    df_processed = df.copy()\n    \n    # 1. \u7f16\u7801\u5206\u7c7b\u53d8\u91cf\n    le_age = LabelEncoder()\n    le_gender = LabelEncoder()\n    le_location = LabelEncoder()\n    \n    df_processed&#91;'age_group_encoded'] = le_age.fit_transform(df_processed&#91;'age_group'])\n    df_processed&#91;'gender_encoded'] = le_gender.fit_transform(df_processed&#91;'gender'])\n    df_processed&#91;'location_encoded'] = le_location.fit_transform(df_processed&#91;'location'])\n    \n    # 2. \u9009\u62e9\u7528\u4e8e\u805a\u7c7b\u7684\u6570\u503c\u7279\u5f81\u5217\n    # \u9009\u62e9\u80fd\u4f53\u73b0\u7528\u6237\u4ef7\u503c\u548c\u884c\u4e3a\u6a21\u5f0f\u7684\u7279\u5f81\n    feature_columns = &#91;\n        'total_actions', 'total_purchases', 'total_spent', 'avg_action_amount',\n        'tenure_days', 'purchase_freq', 'activity_freq', 'avg_order_value',\n        'cart_to_purchase_rate', 'recency_days'\n        # \u6ce8\u610f\uff1a\u8fd9\u91cc\u6ca1\u6709\u5305\u542b\u57fa\u7840\u753b\u50cf\u7279\u5f81(age, gender, location)\uff0c\u56e0\u4e3a\u805a\u7c7b\u4e3b\u8981\u5173\u6ce8\u884c\u4e3a\u6a21\u5f0f\u3002\n        # \u5982\u679c\u9700\u8981\u7ed3\u5408\u753b\u50cf\u7ec6\u5206\uff0c\u53ef\u4ee5\u52a0\u5165\u7f16\u7801\u540e\u7684\u7279\u5f81\u3002\n    ]\n    \n    X = df_processed&#91;feature_columns]\n    \n    # 3. \u5904\u7406\u7f3a\u5931\u503c (\u867d\u7136\u5728\u8fd9\u4e2a\u6a21\u62df\u6570\u636e\u4e2d\u4e0d\u592a\u53ef\u80fd\u6709\u7f3a\u5931\uff0c\u4f46\u8fd9\u662f\u597d\u4e60\u60ef)\n    X = X.fillna(0)\n    \n    # 4. \u7279\u5f81\u6807\u51c6\u5316 (\u5bf9K-Means\u81f3\u5173\u91cd\u8981)\n    scaler = StandardScaler()\n    X_scaled = scaler.fit_transform(X)\n    X_scaled_df = pd.DataFrame(X_scaled, columns=feature_columns)\n    \n    print(f\"\u9884\u5904\u7406\u5b8c\u6210\u3002\u7279\u5f81\u77e9\u9635\u5f62\u72b6: {X_scaled_df.shape}\")\n    return X_scaled_df, scaler, feature_columns\n\n# --- \u7528\u6237\u805a\u7c7b\u5206\u6790 ---\n\ndef perform_user_segmentation(X_scaled, max_clusters=10):\n    \"\"\"\u6267\u884c\u7528\u6237\u805a\u7c7b\u5206\u6790\"\"\"\n    print(\"\\n--- \u6b63\u5728\u6267\u884c\u7528\u6237\u805a\u7c7b\u5206\u6790 ---\")\n    \n    # 1. \u786e\u5b9a\u6700\u4f18\u805a\u7c7b\u6570 K (\u8098\u90e8\u6cd5\u5219\u548c\u8f6e\u5ed3\u7cfb\u6570)\n    print(\"\u5bfb\u627e\u6700\u4f18\u805a\u7c7b\u6570 K...\")\n    inertias = &#91;]\n    silhouette_scores = &#91;]\n    K_range = range(2, max_clusters+1)\n    \n    for k in K_range:\n        kmeans = KMeans(n_clusters=k, random_state=RANDOM_SEED, n_init=10)\n        kmeans.fit(X_scaled)\n        inertias.append(kmeans.inertia_)\n        score = silhouette_score(X_scaled, kmeans.labels_)\n        silhouette_scores.append(score)\n        print(f\"  K={k}, Inertia={kmeans.inertia_:.2f}, Silhouette Score={score:.3f}\")\n\n    # \u7ed8\u5236\u8098\u90e8\u6cd5\u5219\u548c\u8f6e\u5ed3\u7cfb\u6570\u56fe\n    fig, ax1 = plt.subplots(figsize=(10, 6))\n\n    color = 'tab:blue'\n    ax1.set_xlabel('\u805a\u7c7b\u6570 K')\n    ax1.set_ylabel('\u7c07\u5185\u5e73\u65b9\u548c (Inertia)', color=color)\n    ax1.plot(K_range, inertias, marker='o', color=color, label='Inertia')\n    ax1.tick_params(axis='y', labelcolor=color)\n\n    ax2 = ax1.twinx()  \n    color = 'tab:red'\n    ax2.set_ylabel('\u8f6e\u5ed3\u7cfb\u6570 (Silhouette Score)', color=color)  \n    ax2.plot(K_range, silhouette_scores, marker='s', color=color, label='Silhouette Score')\n    ax2.tick_params(axis='y', labelcolor=color)\n    \n    fig.tight_layout() \n    plt.title('\u8098\u90e8\u6cd5\u5219 &amp; \u8f6e\u5ed3\u7cfb\u6570 vs \u805a\u7c7b\u6570 K')\n    plt.xticks(K_range)\n    plt.grid(True)\n    # \u5c06\u56fe\u4f8b\u653e\u5728\u56fe\u8868\u4e0b\u65b9\n    lines, labels = ax1.get_legend_handles_labels()\n    lines2, labels2 = ax2.get_legend_handles_labels()\n    ax2.legend(lines + lines2, labels + labels2, loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=2)\n    \n    k_plot_path = f'{REPORT_PREFIX}_\u6700\u4f18K\u503c\u5206\u6790.png'\n    plt.savefig(k_plot_path, bbox_inches='tight')\n    plt.close()\n    print(f\"\u6700\u4f18K\u503c\u5206\u6790\u56fe\u8868\u5df2\u4fdd\u5b58\u81f3: {k_plot_path}\")\n    \n    # \u9009\u62e9\u6700\u4f18K (\u8fd9\u91cc\u6211\u4eec\u9009\u62e9\u8f6e\u5ed3\u7cfb\u6570\u6700\u9ad8\u7684K\uff0c\u4e5f\u53ef\u4ee5\u7ed3\u5408\u8098\u90e8\u6cd5\u5219)\n    optimal_k = K_range&#91;np.argmax(silhouette_scores)]\n    print(f\"\u9009\u62e9\u6700\u4f18\u805a\u7c7b\u6570 K = {optimal_k}\")\n    \n    # 2. \u4f7f\u7528\u6700\u4f18K\u8fdb\u884c\u805a\u7c7b\n    print(f\"\u4f7f\u7528 K={optimal_k} \u8fdb\u884c\u6700\u7ec8\u805a\u7c7b...\")\n    kmeans_final = KMeans(n_clusters=optimal_k, random_state=RANDOM_SEED, n_init=10)\n    cluster_labels = kmeans_final.fit_predict(X_scaled)\n    \n    return cluster_labels, optimal_k, kmeans_final\n\n# --- \u805a\u7c7b\u7ed3\u679c\u5206\u6790\u4e0e\u53ef\u89c6\u5316 ---\n\ndef analyze_and_visualize_clusters(df, X_scaled_df, cluster_labels, feature_columns, optimal_k):\n    \"\"\"\u5206\u6790\u548c\u53ef\u89c6\u5316\u805a\u7c7b\u7ed3\u679c\"\"\"\n    print(\"\\n--- \u6b63\u5728\u5206\u6790\u548c\u53ef\u89c6\u5316\u805a\u7c7b\u7ed3\u679c ---\")\n    \n    df&#91;'cluster'] = cluster_labels\n    \n    # 1. \u5404\u805a\u7c7b\u7684\u57fa\u672c\u7edf\u8ba1\u4fe1\u606f\n    cluster_summary = df.groupby('cluster').agg(\n        user_count=('user_id', 'count'),\n        mean_total_spent=('total_spent', 'mean'),\n        mean_total_purchases=('total_purchases', 'mean'),\n        mean_activity_freq=('activity_freq', 'mean'),\n        mean_recency_days=('recency_days', 'mean'),\n        mean_tenure_days=('tenure_days', 'mean')\n    ).round(2)\n    cluster_summary&#91;'pct_of_users'] = (cluster_summary&#91;'user_count'] \/ len(df) * 100).round(2)\n    \n    print(\"\u5404\u7528\u6237\u7fa4\u4f53\u57fa\u672c\u7edf\u8ba1\u6458\u8981:\")\n    print(cluster_summary.to_string())\n    \n    summary_csv_path = f'{REPORT_PREFIX}_\u7528\u6237\u7fa4\u4f53\u6458\u8981.csv'\n    cluster_summary.to_csv(summary_csv_path, encoding='utf-8-sig')\n    print(f\"\u7528\u6237\u7fa4\u4f53\u6458\u8981\u5df2\u4fdd\u5b58\u81f3: {summary_csv_path}\")\n\n    # 2. \u53ef\u89c6\u5316\uff1a\u4f7f\u7528PCA\u964d\u7ef4\u540e\u7ed8\u5236\u6563\u70b9\u56fe\n    print(\"\u751f\u6210PCA\u964d\u7ef4\u53ef\u89c6\u5316\u56fe...\")\n    pca = PCA(n_components=2, random_state=RANDOM_SEED)\n    X_pca = pca.fit_transform(X_scaled_df)\n    \n    plt.figure(figsize=(10, 8))\n    scatter = plt.scatter(X_pca&#91;:, 0], X_pca&#91;:, 1], c=cluster_labels, cmap='viridis', alpha=0.6)\n    plt.xlabel(f'PC1 ({pca.explained_variance_ratio_&#91;0]:.1%} variance)')\n    plt.ylabel(f'PC2 ({pca.explained_variance_ratio_&#91;1]:.1%} variance)')\n    plt.title('\u7528\u6237\u7fa4\u4f53\u805a\u7c7b (PCA\u964d\u7ef4\u53ef\u89c6\u5316)')\n    plt.colorbar(scatter, label='Cluster')\n    # \u6dfb\u52a0\u805a\u7c7b\u4e2d\u5fc3 (\u5728PCA\u7a7a\u95f4\u4e2d)\n    centers_pca = pca.transform(kmeans.cluster_centers_)\n    plt.scatter(centers_pca&#91;:, 0], centers_pca&#91;:, 1], c='red', marker='x', s=200, linewidths=3, label='Centroids')\n    plt.legend()\n    pca_plot_path = f'{REPORT_PREFIX}_\u7528\u6237\u7fa4\u4f53PCA\u53ef\u89c6\u5316.png'\n    plt.savefig(pca_plot_path)\n    plt.close()\n    print(f\"PCA\u53ef\u89c6\u5316\u56fe\u8868\u5df2\u4fdd\u5b58\u81f3: {pca_plot_path}\")\n\n    # 3. \u53ef\u89c6\u5316\uff1a\u5404\u7fa4\u4f53\u5173\u952e\u7279\u5f81\u96f7\u8fbe\u56fe (\u6bcf\u4e2a\u7fa4\u4f53\u4e00\u4e2a\u56fe)\n    print(\"\u751f\u6210\u5404\u7528\u6237\u7fa4\u4f53\u7279\u5f81\u96f7\u8fbe\u56fe...\")\n    # \u8ba1\u7b97\u6bcf\u4e2a\u7fa4\u4f53\u5728\u5404\u7279\u5f81\u4e0a\u7684\u5e73\u5747Z-score (\u6807\u51c6\u5316\u540e\u7684\u503c)\n    cluster_profiles = X_scaled_df.copy()\n    cluster_profiles&#91;'cluster'] = cluster_labels\n    cluster_averages = cluster_profiles.groupby('cluster').mean()\n    \n    # \u4e3a\u96f7\u8fbe\u56fe\u51c6\u5907\u6570\u636e\n    num_vars = len(feature_columns)\n    angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()\n    angles += angles&#91;:1] # \u95ed\u5408\u56fe\u5f62\n    \n    fig, axes = plt.subplots(2, (optimal_k + 1) \/\/ 2, figsize=(5 * (optimal_k + 1) \/\/ 2, 10), subplot_kw=dict(polar=True))\n    if optimal_k == 2:\n         axes = axes.reshape(1, -1) # \u5982\u679c\u53ea\u67092\u4e2a\u7c07\uff0caxes\u662f\u4e00\u7ef4\u7684\n    fig.suptitle('\u7528\u6237\u7fa4\u4f53\u7279\u5f81\u96f7\u8fbe\u56fe', fontsize=16)\n    \n    for i in range(optimal_k):\n        row = i \/\/ axes.shape&#91;1]\n        col = i % axes.shape&#91;1]\n        ax = axes&#91;row, col] if axes.ndim > 1 else axes&#91;col]\n        \n        values = cluster_averages.iloc&#91;i].tolist()\n        values += values&#91;:1]\n        ax.plot(angles, values, linewidth=2, label=f'Cluster {i}')\n        ax.fill(angles, values, alpha=0.25)\n        ax.set_xticks(angles&#91;:-1])\n        ax.set_xticklabels(feature_columns, fontsize=8)\n        ax.set_ylim(&#91;-3, 3]) # \u6807\u51c6\u5316\u540e\u7684\u503c\u901a\u5e38\u5728\u8fd9\u4e2a\u8303\u56f4\n        ax.set_title(f'\u7fa4\u4f53 {i}', fontsize=12)\n        ax.yaxis.grid(True)\n        \n    # \u9690\u85cf\u591a\u4f59\u7684\u5b50\u56fe (\u5982\u679c\u7c07\u6570\u662f\u5947\u6570)\n    if optimal_k % 2 != 0 and axes.ndim > 1:\n        fig.delaxes(axes&#91;1, -1])\n\n    plt.tight_layout(rect=&#91;0, 0.03, 1, 0.95])\n    radar_plot_path = f'{REPORT_PREFIX}_\u7528\u6237\u7fa4\u4f53\u96f7\u8fbe\u56fe.png'\n    plt.savefig(radar_plot_path)\n    plt.close()\n    print(f\"\u7528\u6237\u7fa4\u4f53\u96f7\u8fbe\u56fe\u5df2\u4fdd\u5b58\u81f3: {radar_plot_path}\")\n    \n    return cluster_summary\n\n# --- \u62a5\u544a\u751f\u6210 ---\n\ndef generate_user_segmentation_report(cluster_summary, optimal_k, k_plot_path, pca_plot_path, radar_plot_path):\n    \"\"\"\u751f\u6210\u6700\u7ec8\u7684\u7528\u6237\u884c\u4e3a\u5206\u6790\u4e0e\u7ec6\u5206\u62a5\u544a\"\"\"\n    print(\"\\n--- \u6b63\u5728\u751f\u6210\u7528\u6237\u884c\u4e3a\u5206\u6790\u4e0e\u7ec6\u5206\u62a5\u544a ---\")\n    from datetime import datetime\n    report_filename = f\"{REPORT_PREFIX}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt\"\n    \n    with open(report_filename, 'w', encoding='utf-8') as f:\n        f.write(\"=\" * 50 + \"\\n\")\n        f.write(\"        \u7535\u5546\u5e73\u53f0\u7528\u6237\u884c\u4e3a\u5206\u6790\u4e0e\u7ec6\u5206\u62a5\u544a\\n\")\n        f.write(f\"        \u751f\u6210\u65f6\u95f4: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\\n\")\n        f.write(\"=\" * 50 + \"\\n\\n\")\n\n        f.write(\"--- 1. \u9879\u76ee\u6982\u8ff0 ---\\n\")\n        f.write(\"\u672c\u9879\u76ee\u65e8\u5728\u901a\u8fc7\u5bf9\u7535\u5546\u5e73\u53f0\u7528\u6237\u884c\u4e3a\u6570\u636e\u7684\u6df1\u5165\u5206\u6790\uff0c\u5c06\u7528\u6237\u5212\u5206\u4e3a\u4e0d\u540c\u7684\u7fa4\u4f53\u3002\\n\")\n        f.write(\"\u76ee\u6807\u662f\u7406\u89e3\u4e0d\u540c\u7528\u6237\u7fa4\u4f53\u7684\u884c\u4e3a\u7279\u5f81\uff0c\u4e3a\u7cbe\u7ec6\u5316\u8fd0\u8425\u3001\u4e2a\u6027\u5316\u63a8\u8350\u548c\u7cbe\u51c6\u8425\u9500\u63d0\u4f9b\u6570\u636e\u652f\u6301\u3002\\n\\n\")\n\n        f.write(\"--- 2. \u6570\u636e\u6982\u89c8 ---\\n\")\n        f.write(\"\u6570\u636e\u6765\u6e90: \u6a21\u62df\u751f\u6210\u7684\u7535\u5546\u5e73\u53f0\u7528\u6237\u884c\u4e3a\u6570\u636e\u3002\\n\")\n        f.write(\"\u6570\u636e\u89c4\u6a21: 2000 \u540d\u7528\u6237\uff0c500 \u79cd\u5546\u54c1\u3002\\n\")\n        f.write(\"\u5173\u952e\u884c\u4e3a: \u6d4f\u89c8 (view), \u52a0\u8d2d (cart), \u8d2d\u4e70 (purchase)\u3002\\n\")\n        f.write(\"\u5173\u952e\u5b57\u6bb5: \u7528\u6237ID, \u5546\u54c1ID, \u884c\u4e3a\u7c7b\u578b, \u65f6\u95f4\u6233, \u91d1\u989d\u7b49\u3002\\n\")\n        f.write(\"\u539f\u59cb\u7279\u5f81\u6570\u636e\u5df2\u4fdd\u5b58\u4e3a CSV \u6587\u4ef6\u3002\\n\\n\")\n\n        f.write(\"--- 3. \u7528\u6237\u884c\u4e3a\u7279\u5f81\u5de5\u7a0b ---\\n\")\n        f.write(\"\u4ece\u539f\u59cb\u884c\u4e3a\u65e5\u5fd7\u4e2d\u63d0\u53d6\u4e86\u80fd\u53cd\u6620\u7528\u6237\u4ef7\u503c\u548c\u884c\u4e3a\u6a21\u5f0f\u7684\u5173\u952e\u6307\u6807\uff1a\\n\")\n        f.write(\"- \u603b\u884c\u4e3a\u6b21\u6570 (total_actions)\\n\")\n        f.write(\"- \u603b\u8d2d\u4e70\u6b21\u6570 (total_purchases)\\n\")\n        f.write(\"- \u603b\u6d88\u8d39\u91d1\u989d (total_spent)\\n\")\n        f.write(\"- \u5e73\u5747\u884c\u4e3a\u91d1\u989d (avg_action_amount)\\n\")\n        f.write(\"- \u7528\u6237\u751f\u547d\u5468\u671f (tenure_days)\\n\")\n        f.write(\"- \u8d2d\u4e70\u9891\u7387 (purchase_freq)\\n\")\n        f.write(\"- \u6d3b\u52a8\u9891\u7387 (activity_freq)\\n\")\n        f.write(\"- \u5e73\u5747\u8ba2\u5355\u4ef7\u503c (avg_order_value)\\n\")\n        f.write(\"- \u52a0\u8d2d\u8f6c\u5316\u7387 (cart_to_purchase_rate)\\n\")\n        f.write(\"- \u6700\u8fd1\u6d3b\u8dc3\u5ea6 (recency_days)\\n\\n\")\n\n        f.write(\"--- 4. \u7528\u6237\u7fa4\u4f53\u7ec6\u5206 ---\\n\")\n        f.write(f\"\u91c7\u7528 K-Means \u805a\u7c7b\u7b97\u6cd5\uff0c\u901a\u8fc7\u8098\u90e8\u6cd5\u5219\u548c\u8f6e\u5ed3\u7cfb\u6570\u5206\u6790\uff0c\u786e\u5b9a\u6700\u4f18\u805a\u7c7b\u6570 K = {optimal_k}\u3002\\n\")\n        f.write(\"\u5206\u6790\u8fc7\u7a0b\u56fe\u8868\u5df2\u751f\u6210\uff0c\u5305\u62ec\uff1a\\n\")\n        f.write(f\"- \u6700\u4f18K\u503c\u9009\u62e9\u5206\u6790\u56fe: {k_plot_path}\\n\")\n        f.write(f\"- PCA\u964d\u7ef4\u53ef\u89c6\u5316\u56fe: {pca_plot_path}\\n\")\n        f.write(f\"- \u7528\u6237\u7fa4\u4f53\u7279\u5f81\u96f7\u8fbe\u56fe: {radar_plot_path}\\n\\n\")\n        \n        f.write(\"--- 5. \u7528\u6237\u7fa4\u4f53\u753b\u50cf ---\\n\")\n        f.write(\"\u6839\u636e\u805a\u7c7b\u7ed3\u679c\uff0c\u7528\u6237\u88ab\u5212\u5206\u4e3a\u4ee5\u4e0b\u7fa4\u4f53\uff0c\u5404\u7fa4\u4f53\u7279\u5f81\u6458\u8981\u5982\u4e0b:\\n\")\n        f.write(cluster_summary.to_string())\n        f.write(\"\\n\\n\u57fa\u4e8e\u4ee5\u4e0a\u6570\u636e\uff0c\u53ef\u4ee5\u5bf9\u6bcf\u4e2a\u7fa4\u4f53\u8fdb\u884c\u547d\u540d\u548c\u63cf\u8ff0\uff1a\\n\")\n        f.write(\"(\u547d\u540d\u9700\u7ed3\u5408\u96f7\u8fbe\u56fe\u7b49\u53ef\u89c6\u5316\u4fe1\u606f\u8fdb\u884c\u4eba\u5de5\u89e3\u8bfb)\\n\")\n        f.write(\"\u4f8b\u5982\uff1a\\n\")\n        f.write(\"- \u9ad8\u4ef7\u503c\u5ba2\u6237: \u9ad8\u6d88\u8d39\u91d1\u989d\u3001\u9ad8\u8d2d\u4e70\u9891\u7387\u3001\u4f4e\u8fd1\u671f\u6d3b\u8dc3\u5ea6\u53ef\u80fd\u8868\u793a\u5fe0\u8bda\u8001\u5ba2\u6237\u3002\\n\")\n        f.write(\"- \u6f5c\u529b\u5ba2\u6237: \u4e2d\u7b49\u6d3b\u52a8\u9891\u7387\u3001\u4e2d\u7b49\u6d88\u8d39\u3001\u9ad8\u8fd1\u671f\u6d3b\u8dc3\u5ea6\u3002\\n\")\n        f.write(\"- \u65b0\u5174\u5ba2\u6237: \u77ed\u751f\u547d\u5468\u671f\u3001\u4f4e\u6d3b\u52a8\u9891\u7387\u3001\u4f46\u9ad8\u8fd1\u671f\u6d3b\u8dc3\u5ea6\u3002\\n\")\n        f.write(\"- \u6d41\u5931\u98ce\u9669\u5ba2\u6237: \u4f4e\u6d3b\u52a8\u9891\u7387\u3001\u4f4e\u8d2d\u4e70\u9891\u7387\u3001\u9ad8\u8fd1\u671f\u6d3b\u8dc3\u5ea6\u3002\\n\")\n        f.write(\"- \u4f4e\u4ef7\u503c\u5ba2\u6237: \u5404\u9879\u6307\u6807\u5747\u8f83\u4f4e\u3002\\n\\n\")\n\n        f.write(\"--- 6. \u8fd0\u8425\u7b56\u7565\u4e0e\u5efa\u8bae ---\\n\")\n        f.write(\"1. \u7cbe\u51c6\u8425\u9500:\\n\")\n        f.write(\"   - \u5bf9'\u9ad8\u4ef7\u503c\u5ba2\u6237'\u63d0\u4f9bVIP\u670d\u52a1\u3001\u4e13\u5c5e\u4f18\u60e0\uff0c\u63d0\u9ad8\u5fe0\u8bda\u5ea6\u3002\\n\")\n        f.write(\"   - \u5bf9'\u6f5c\u529b\u5ba2\u6237'\u63a8\u9001\u4e2a\u6027\u5316\u5546\u54c1\u63a8\u8350\uff0c\u523a\u6fc0\u6d88\u8d39\u3002\\n\")\n        f.write(\"   - \u5bf9'\u65b0\u5174\u5ba2\u6237'\u63d0\u4f9b\u65b0\u4eba\u793c\u5305\uff0c\u5f15\u5bfc\u5b8c\u6210\u9996\u5355\u3002\\n\")\n        f.write(\"   - \u5bf9'\u6d41\u5931\u98ce\u9669\u5ba2\u6237'\u53d1\u9001\u53ec\u56de\u4f18\u60e0\u5238\u6216\u8fdb\u884c\u7528\u6237\u5173\u6000\u3002\\n\")\n        f.write(\"2. \u4e2a\u6027\u5316\u63a8\u8350: \u6839\u636e\u4e0d\u540c\u7fa4\u4f53\u7684\u504f\u597d\u8c03\u6574\u63a8\u8350\u7b97\u6cd5\u3002\\n\")\n        f.write(\"3. \u4ea7\u54c1\u4f18\u5316: \u5206\u6790\u5404\u7fa4\u4f53\u70ed\u95e8\u5546\u54c1\uff0c\u4f18\u5316\u5546\u54c1\u7ed3\u6784\u3002\\n\")\n        f.write(\"4. \u7528\u6237\u4f53\u9a8c: \u9488\u5bf9\u4e0d\u540c\u7fa4\u4f53\u4f18\u5316App\u6216\u7f51\u7ad9\u7684\u7528\u6237\u754c\u9762\u548c\u529f\u80fd\u3002\\n\")\n        f.write(\"5. \u6a21\u578b\u8fed\u4ee3: \u5b9a\u671f\u66f4\u65b0\u7528\u6237\u884c\u4e3a\u6570\u636e\u548c\u7fa4\u4f53\u5212\u5206\uff0c\u4ee5\u9002\u5e94\u5e02\u573a\u53d8\u5316\u3002\\n\\n\")\n\n        f.write(\"=\" * 50 + \"\\n\")\n        f.write(\"                    \u62a5\u544a\u7ed3\u675f\\n\")\n        f.write(\"=\" * 50 + \"\\n\")\n\n    print(f\"\u7528\u6237\u884c\u4e3a\u5206\u6790\u4e0e\u7ec6\u5206\u62a5\u544a\u5df2\u751f\u6210: {report_filename}\")\n\n# --- \u4e3b\u51fd\u6570 ---\n\ndef main():\n    \"\"\"\u4e3b\u51fd\u6570\"\"\"\n    # 1. \u751f\u6210\u6570\u636e\n    df_user_features = generate_sample_user_behavior_data(NUM_USERS, NUM_PRODUCTS)\n    \n    # 2. \u6570\u636e\u9884\u5904\u7406\n    X_scaled_df, scaler, feature_cols = preprocess_data(df_user_features)\n    \n    # 3. \u7528\u6237\u805a\u7c7b\u5206\u6790\n    cluster_labels, optimal_k, kmeans = perform_user_segmentation(X_scaled_df)\n    \n    # 4. \u5206\u6790\u548c\u53ef\u89c6\u5316\u805a\u7c7b\u7ed3\u679c\n    cluster_summary = analyze_and_visualize_clusters(df_user_features, X_scaled_df, cluster_labels, feature_cols, optimal_k)\n    \n    # 5. \u751f\u6210\u62a5\u544a\n    generate_user_segmentation_report(cluster_summary, optimal_k, f'{REPORT_PREFIX}_\u6700\u4f18K\u503c\u5206\u6790.png', f'{REPORT_PREFIX}_\u7528\u6237\u7fa4\u4f53PCA\u53ef\u89c6\u5316.png', f'{REPORT_PREFIX}_\u7528\u6237\u7fa4\u4f53\u96f7\u8fbe\u56fe.png')\n    \n    print(\"\\n\u7528\u6237\u884c\u4e3a\u5206\u6790\u4e0e\u7ec6\u5206\u6d41\u7a0b\u5b8c\u6210\u3002\")\n\nif __name__ == \"__main__\":\n    main()<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>\u5f00\u53d1\u601d\u8def \u8be5\u811a\u672c\u5c06\u5305\u542b\u4ee5\u4e0b\u529f\u80fd\uff1a<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[1],"tags":[28],"views":332,"_links":{"self":[{"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/posts\/3899"}],"collection":[{"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/comments?post=3899"}],"version-history":[{"count":1,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/posts\/3899\/revisions"}],"predecessor-version":[{"id":3900,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/posts\/3899\/revisions\/3900"}],"wp:attachment":[{"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/media?parent=3899"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/categories?post=3899"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/tags?post=3899"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}