{"id":2770,"date":"2024-10-26T07:02:10","date_gmt":"2024-10-25T23:02:10","guid":{"rendered":"http:\/\/viplao.com\/?p=2770"},"modified":"2024-10-27T22:34:30","modified_gmt":"2024-10-27T14:34:30","slug":"%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90-%e4%bc%9a%e5%91%98%e6%95%b0%e5%ad%97%e5%8c%96%e8%bf%90%e8%90%a5%e5%ae%9e%e8%b7%b5%e6%a1%88%e4%be%8b","status":"publish","type":"post","link":"http:\/\/viplao.com\/index.php\/2024\/10\/26\/%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90-%e4%bc%9a%e5%91%98%e6%95%b0%e5%ad%97%e5%8c%96%e8%bf%90%e8%90%a5%e5%ae%9e%e8%b7%b5%e6%a1%88%e4%be%8b\/","title":{"rendered":"\u6570\u5b57\u5316\u8fd0\u8425\u57fa\u7840\u6280\u80fd &#8211; \u4f1a\u5458\u6570\u5b57\u5316\u8fd0\u8425\u5b9e\u8df5\u6848\u4f8b"},"content":{"rendered":"\n<p>\u5bfc\u5165\u5e93<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import time  # \u65f6\u95f4\u5e93\n\nimport numpy as np  # numpy\u5e93\nimport pandas as pd  # pandas\u5e93\nimport pymysql  # mysql\u8fde\u63a5\u5e93\nfrom sklearn.ensemble import RandomForestClassifier # RF\u5e93\n\n# from pyecharts import Bar3D # \u8001\u7248\u672c\u4ee3\u7801\uff0c3D\u67f1\u5f62\u56fe\nfrom pyecharts.charts import Bar3D # \u65b0\u7248\u672cdiam\uff0c3D\u67f1\u5f62\u56fe\nfrom pyecharts import options as opts # \u65b0\u7248\u672c\u4ee3\u7801\uff0c\u5148\u5bfc\u5165\u914d\u7f6e\u65b9\u6cd5\u5e93<\/code><\/pre>\n\n\n\n<p>\u8bfb\u53d6\u6570\u636e<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>sheet_names = &#91;'2015','2016','2017','2018','\u4f1a\u5458\u7b49\u7ea7']\nsheet_datas = &#91;pd.read_excel('sales.xlsx',sheet_name=i) for i in sheet_names]<\/code><\/pre>\n\n\n\n<p>\u6570\u7ec4\u6570\u636e\u67e5\u770b<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>for each_name,each_data in zip(sheet_names,sheet_datas):    \n    print('&#91;data summary for {0:=^50}]'.format(each_name))\n    print('Overview:','\\n',each_data.head(4))# \u5c55\u793a\u6570\u636e\u524d4\u6761\n    print('DESC:','\\n',each_data.describe())# \u6570\u636e\u63cf\u8ff0\u6027\u4fe1\u606f\n    print('NA records',each_data.isnull().any(axis=1).sum()) # \u7f3a\u5931\u503c\u8bb0\u5f55\u6570    \n    print('Dtypes',each_data.dtypes) # \u6570\u636e\u7c7b\u578b<\/code><\/pre>\n\n\n\n<p>\u6570\u636e\u9884\u5904\u7406<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># \u53bb\u9664\u7f3a\u5931\u503c\u548c\u5f02\u5e38\u503c\nfor ind,each_data in enumerate(sheet_datas&#91;:-1]):    \n    sheet_datas&#91;ind] = each_data.dropna()# \u4e22\u5f03\u7f3a\u5931\u503c\u8bb0\u5f55\n    sheet_datas&#91;ind] = each_data&#91;each_data&#91;'\u8ba2\u5355\u91d1\u989d'] &gt; 1]# \u4e22\u5f03\u8ba2\u5355\u91d1\u989d&lt;=1\u7684\u8bb0\u5f55\n    sheet_datas&#91;ind]&#91;'max_year_date'] = each_data&#91;'\u63d0\u4ea4\u65e5\u671f'].max() # \u589e\u52a0\u4e00\u5217\u6700\u5927\u65e5\u671f\u503c\n# \u6c47\u603b\u6240\u6709\u6570\u636e\ndata_merge = pd.concat(sheet_datas&#91;:-1],axis=0)\n# \u83b7\u53d6\u5404\u81ea\u5e74\u4efd\u6570\u636e\ndata_merge&#91;'date_interval'] = data_merge&#91;'max_year_date']-data_merge&#91;'\u63d0\u4ea4\u65e5\u671f']\ndata_merge&#91;'year'] = data_merge&#91;'\u63d0\u4ea4\u65e5\u671f'].dt.year\n# \u8f6c\u6362\u65e5\u671f\u95f4\u9694\u4e3a\u6570\u5b57\ndata_merge&#91;'date_interval'] = data_merge&#91;'date_interval'].apply(lambda x: x.days) # \u8f6c\u6362\u65e5\u671f\u95f4\u9694\u4e3a\u6570\u5b57\n#data_merge.head()\n\n# \u6309\u4f1a\u5458ID\u505a\u6c47\u603b\nrfm_gb = data_merge.groupby(&#91;'year','\u4f1a\u5458ID'],as_index=False).agg({'date_interval': 'min',  # \u8ba1\u7b97\u6700\u8fd1\u4e00\u6b21\u8ba2\u5355\u65f6\u95f4\n                                                   '\u63d0\u4ea4\u65e5\u671f': 'count', # \u8ba1\u7b97\u8ba2\u5355\u9891\u7387\n                                                   '\u8ba2\u5355\u91d1\u989d': 'sum'})  # \u8ba1\u7b97\u8ba2\u5355\u603b\u91d1\u989d\n# \u91cd\u547d\u540d\u5217\u540d\nrfm_gb.columns =  &#91;'year','\u4f1a\u5458ID','r','f','m']\nrfm_gb.head()<\/code><\/pre>\n\n\n\n<p>\u6570\u636e\u5206\u5757<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># \u67e5\u770b\u6570\u636e\u5206\u5e03\ndesc_pd = rfm_gb.iloc&#91;:,2:].describe().T\nprint(desc_pd)\n# \u5b9a\u4e49\u533a\u95f4\u8fb9\u754c\nr_bins = &#91;-1,79,255,365] # \u6ce8\u610f\u8d77\u59cb\u8fb9\u754c\u5c0f\u4e8e\u6700\u5c0f\u503c\nf_bins = &#91;0,2,5,130] \nm_bins = &#91;0,69,1199,206252]<\/code><\/pre>\n\n\n\n<p>\u8ba1\u7b97\u6743\u91cd<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># \u5339\u914d\u4f1a\u5458\u7b49\u7ea7\u548crfm\u5f97\u5206\nrfm_merge = pd.merge(rfm_gb,sheet_datas&#91;-1],on='\u4f1a\u5458ID',how='inner')\n\n# rf\u83b7\u5f97rfm\u56e0\u5b50\u5f97\u5206\nclf = RandomForestClassifier()\nclf = clf.fit(rfm_merge&#91;&#91;'r','f','m']],rfm_merge&#91;'\u4f1a\u5458\u7b49\u7ea7'])\nweights = clf.feature_importances_\nprint('feature importance:',weights)<\/code><\/pre>\n\n\n\n<p>RFM\u8ba1\u7b97\u8fc7\u7a0b<\/p>\n\n\n\n<p># RFM\u5206\u7bb1\u5f97\u5206<\/p>\n\n\n\n<p>rfm_gb[&#8216;r_score&#8217;] = pd.cut(rfm_gb[&#8216;r&#8217;], r_bins, labels=[i for i in range(len(r_bins)-1,0,-1)]) &nbsp;# \u8ba1\u7b97R\u5f97\u5206<\/p>\n\n\n\n<p>rfm_gb[&#8216;f_score&#8217;] = pd.cut(rfm_gb[&#8216;f&#8217;], f_bins, labels=[i+1 for i in range(len(f_bins)-1)]) &nbsp;# \u8ba1\u7b97F\u5f97\u5206<\/p>\n\n\n\n<p>rfm_gb[&#8216;m_score&#8217;] = pd.cut(rfm_gb[&#8216;m&#8217;], m_bins, labels=[i+1 for i in range(len(m_bins)-1)]) &nbsp;# \u8ba1\u7b97M\u5f97\u5206<\/p>\n\n\n\n<p><\/p>\n\n\n\n<p># \u8ba1\u7b97RFM\u603b\u5f97\u5206<\/p>\n\n\n\n<p># \u65b9\u6cd5\u4e00\uff1a\u52a0\u6743\u5f97\u5206<\/p>\n\n\n\n<p>rfm_gb = rfm_gb.apply(np.int32) # cate\u8f6c\u6570\u503c<\/p>\n\n\n\n<p>rfm_gb[&#8216;rfm_score&#8217;] = rfm_gb[&#8216;r_score&#8217;] * weights[0] + rfm_gb[&#8216;f_score&#8217;] * weights[1] + rfm_gb[<\/p>\n\n\n\n<p>&#8216;m_score&#8217;] * weights[2]<\/p>\n\n\n\n<p><\/p>\n\n\n\n<p><\/p>\n\n\n\n<p># \u65b9\u6cd5\u4e8c\uff1aRFM\u7ec4\u5408<\/p>\n\n\n\n<p>rfm_gb[&#8216;r_score&#8217;] = rfm_gb[&#8216;r_score&#8217;].astype(np.str)<\/p>\n\n\n\n<p>rfm_gb[&#8216;f_score&#8217;] = rfm_gb[&#8216;f_score&#8217;].astype(np.str)<\/p>\n\n\n\n<p>rfm_gb[&#8216;m_score&#8217;] = rfm_gb[&#8216;m_score&#8217;].astype(np.str)<\/p>\n\n\n\n<p>rfm_gb[&#8216;rfm_group&#8217;] = rfm_gb[&#8216;r_score&#8217;].str.cat(rfm_gb[&#8216;f_score&#8217;]).str.cat(<\/p>\n\n\n\n<p>rfm_gb[&#8216;m_score&#8217;])<\/p>\n\n\n\n<p>\u4fdd\u5b58\u6570\u636e<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>rfm_gb.to_excel('sales_rfm_score.xlsx')  # \u4fdd\u5b58\u6570\u636e\u4e3aExcel<\/code><\/pre>\n\n\n\n<p>\u56fe\u5f62\u5c55\u73b0<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># \u56fe\u5f62\u6570\u636e\u6c47\u603b\ndisplay_data = rfm_gb.groupby(&#91;'rfm_group','year'],as_index=False)&#91;'\u4f1a\u5458ID'].count()\ndisplay_data.columns = &#91;'rfm_group','year','number']\ndisplay_data&#91;'rfm_group'] = display_data&#91;'rfm_group'].astype(np.int32)\ndisplay_data.head()\n\n# \u65b0\u7248\u672c\nbar3d = Bar3D(init_opts=opts.InitOpts(width=\"900px\", height=\"600px\"))\nrange_color = &#91;'#313695', '#4575b4', '#74add1', '#abd9e9', '#e0f3f8', '#ffffbf',\n               '#fee090', '#fdae61', '#f46d43', '#d73027', '#a50026']\ndata = &#91;d.tolist() for d in display_data.values]\nbar3d.add(\n    series_name=\"rfm\u5206\u7ec4\u7ed3\u679c\",\n    data=data,\n    xaxis3d_opts=opts.Axis3DOpts(type_=\"category\"),\n    yaxis3d_opts=opts.Axis3DOpts(type_=\"category\"),\n    zaxis3d_opts=opts.Axis3DOpts(type_=\"value\")\n)\nbar3d.set_global_opts(\n        visualmap_opts=opts.VisualMapOpts(\n            max_ =display_data&#91;'number'].max(),\n            range_color=range_color,\n        )\n    )\nbar3d.render_notebook()<\/code><\/pre>\n\n\n\n<p><\/p>\n\n\n\n<p><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u5bfc\u5165\u5e93 \u8bfb\u53d6\u6570\u636e \u6570\u7ec4\u6570\u636e\u67e5\u770b \u6570\u636e\u9884\u5904\u7406 \u6570\u636e\u5206\u5757 \u8ba1\u7b97\u6743\u91cd RFM\u8ba1\u7b97\u8fc7\u7a0b # RFM\u5206\u7bb1\u5f97\u5206 &hellip; <a href=\"http:\/\/viplao.com\/index.php\/2024\/10\/26\/%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90-%e4%bc%9a%e5%91%98%e6%95%b0%e5%ad%97%e5%8c%96%e8%bf%90%e8%90%a5%e5%ae%9e%e8%b7%b5%e6%a1%88%e4%be%8b\/\" class=\"more-link read-more\" rel=\"bookmark\">\u7ee7\u7eed\u9605\u8bfb <span class=\"screen-reader-text\">\u6570\u5b57\u5316\u8fd0\u8425\u57fa\u7840\u6280\u80fd &#8211; \u4f1a\u5458\u6570\u5b57\u5316\u8fd0\u8425\u5b9e\u8df5\u6848\u4f8b<\/span><i class=\"fa fa-arrow-right\"><\/i><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[1],"tags":[28],"views":493,"_links":{"self":[{"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/posts\/2770"}],"collection":[{"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/comments?post=2770"}],"version-history":[{"count":2,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/posts\/2770\/revisions"}],"predecessor-version":[{"id":2806,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/posts\/2770\/revisions\/2806"}],"wp:attachment":[{"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/media?parent=2770"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/categories?post=2770"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/tags?post=2770"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}