{"id":3533,"date":"2025-06-28T07:45:50","date_gmt":"2025-06-27T23:45:50","guid":{"rendered":"http:\/\/viplao.com\/?p=3533"},"modified":"2025-06-28T13:39:14","modified_gmt":"2025-06-28T05:39:14","slug":"%e3%80%90%e5%ae%9e%e8%b7%b5%e7%bb%8f%e9%aa%8c%e3%80%91%e7%94%b5%e5%95%86%e5%b9%b3%e5%8f%b0%e9%94%80%e5%94%ae%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90%e5%ae%9e%e8%b7%b5%e5%88%86%e8%a7%a3-%e6%95%b0","status":"publish","type":"post","link":"http:\/\/viplao.com\/index.php\/2025\/06\/28\/%e3%80%90%e5%ae%9e%e8%b7%b5%e7%bb%8f%e9%aa%8c%e3%80%91%e7%94%b5%e5%95%86%e5%b9%b3%e5%8f%b0%e9%94%80%e5%94%ae%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90%e5%ae%9e%e8%b7%b5%e5%88%86%e8%a7%a3-%e6%95%b0\/","title":{"rendered":"\u3010Python10\u5e74\u7ecf\u9a8c\u603b\u7ed3\u3011\u7b2c\u4e8c\u8bfe \u7535\u5546\u5e73\u53f0\u9500\u552e\u6570\u636e\u5206\u6790\u5b9e\u8df5\u5206\u89e3 &#8211; \u6570\u636e\u6e05\u6d17\uff08Data Cleaning\uff09"},"content":{"rendered":"\n<p>\u4e0b\u9762\u6211\u4eec\u5c06\u5c55\u793a\u5982\u4f55\u5904\u7406\u8fd9\u4e9b\u5e38\u89c1\u7684\u6570\u636e\u6e05\u6d17\u4efb\u52a1\u3002\u5047\u8bbe\u6211\u4eec\u6709\u4e00\u4e2a\u5305\u542b\u9500\u552e\u8ba2\u5355\u6570\u636e\u7684DataFrame\uff0c\u5e76\u9010\u6b65\u5e94\u7528\u8fd9\u4e9b\u6e05\u6d17\u64cd\u4f5c\u3002<\/p>\n\n\n\n<p>\u5e38\u89c1\u7528\u7684\u5206\u6790\u573a\u666f\uff1a<\/p>\n\n\n\n<p>\u5904\u7406\u7f3a\u5931\u503c\u5e76\u5220\u9664\u65e0\u6548\u8ba2\u5355<br>\u53bb\u9664\u91cd\u590d\u8ba2\u5355\u8bb0\u5f55<br>\u6807\u51c6\u5316\u65f6\u95f4\u683c\u5f0f\uff08\u5982\u201c2025-06-01\u201d\uff09<br>\u6e05\u6d17\u975e\u6807\u51c6\u91d1\u989d\u5b57\u6bb5\uff08\u5982\u5305\u542b\u8d27\u5e01\u7b26\u53f7\uff09<br>\u77eb\u6b63\u9519\u8bef\u7684\u4ea7\u54c1ID\u6216\u54c1\u7c7b\u7f16\u7801<br>\u586b\u5145\u7a7a\u503c\uff08\u4f7f\u7528\u5747\u503c\u3001\u4f17\u6570\u6216\u5411\u524d\u586b\u5145\uff09<br>\u8fc7\u6ee4\u6389\u6d4b\u8bd5\u8ba2\u5355\u6216\u5f02\u5e38\u7528\u6237\u6570\u636e<br>\u5bf9\u6570\u503c\u578b\u5b57\u6bb5\u8fdb\u884c\u7c7b\u578b\u8f6c\u6362\uff08str \u2192 float\uff09<br>\u62c6\u5206\u5730\u5740\u5b57\u6bb5\u4e3a\u7701\u5e02\u533a\u4e09\u7ea7\u7ef4\u5ea6<br>\u5220\u9664\u65e0\u610f\u4e49\u5b57\u6bb5\uff08\u5982\u4e34\u65f6\u8c03\u8bd5\u5217\uff09<\/p>\n\n\n\n<p><\/p>\n\n\n\n<p>\u9996\u5148\uff0c\u8ba9\u6211\u4eec\u521b\u5efa\u4e00\u4e2a\u793a\u4f8bDataFrame\u6765\u6a21\u62df\u539f\u59cb\u6570\u636e\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import pandas as pd\nimport numpy as np\n\n# \u521b\u5efa\u793a\u4f8bDataFrame\ndata = {\n    'order_id': &#91;1, 2, 3, 4, 5, 6, 7, 8],\n    'product_id': &#91;'A001', 'B002', None, 'D004', 'E005', 'F006', 'G007', 'H008'],\n    'category_code': &#91;'C1', 'C2', 'C1', 'C3', 'C4', 'C5', 'C6', 'C7'],\n    'amount': &#91;'$100.00', '$200.00', '$300.00', None, '$500.00', '$600.00', '$700.00', '$800.00'],\n    'order_date': &#91;'2025-06-01', '2025-06-02', '2025-06-03', '2025-06-04', '2025-06-05', '2025-06-06', '2025-06-07', '2025-06-08'],\n    'customer_id': &#91;101, 102, 103, 104, 105, 106, 107, 108],\n    'address': &#91;'Beijing, China', 'Shanghai, China', 'Guangzhou, China', 'Shenzhen, China', 'Hangzhou, China', 'Chengdu, China', 'Nanjing, China', 'Wuhan, China'],\n    'test_order': &#91;False, True, False, False, False, False, False, False],\n    'debug_col': &#91;None, None, None, None, None, None, None, None]\n}\n\ndf = pd.DataFrame(data)\nprint(\"\u539f\u59cb\u6570\u636e:\")\nprint(df)<\/code><\/pre>\n\n\n\n<p>\u63a5\u4e0b\u6765\uff0c\u6211\u4eec\u5c06\u9010\u4e2a\u5904\u7406\u4e0a\u8ff0\u63d0\u5230\u7684\u6570\u636e\u6e05\u6d17\u4efb\u52a1\u3002<\/p>\n\n\n\n<div id=\"ez-toc-container\" class=\"ez-toc-v2_0_71 counter-hierarchy ez-toc-counter ez-toc-grey ez-toc-container-direction\">\n<div class=\"ez-toc-title-container\">\n<p class=\"ez-toc-title\" style=\"cursor:inherit\">\u6587\u7ae0\u76ee\u5f55<\/p>\n<span class=\"ez-toc-title-toggle\"><a href=\"#\" class=\"ez-toc-pull-right ez-toc-btn ez-toc-btn-xs ez-toc-btn-default ez-toc-toggle\" aria-label=\"Toggle Table of Content\"><span class=\"ez-toc-js-icon-con\"><span class=\"\"><span class=\"eztoc-hide\" style=\"display:none;\">Toggle<\/span><span class=\"ez-toc-icon-toggle-span\"><svg style=\"fill: #999;color:#999\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" class=\"list-377408\" width=\"20px\" height=\"20px\" viewBox=\"0 0 24 24\" fill=\"none\"><path d=\"M6 6H4v2h2V6zm14 0H8v2h12V6zM4 11h2v2H4v-2zm16 0H8v2h12v-2zM4 16h2v2H4v-2zm16 0H8v2h12v-2z\" fill=\"currentColor\"><\/path><\/svg><svg style=\"fill: #999;color:#999\" class=\"arrow-unsorted-368013\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"10px\" height=\"10px\" viewBox=\"0 0 24 24\" version=\"1.2\" baseProfile=\"tiny\"><path d=\"M18.2 9.3l-6.2-6.3-6.2 6.3c-.2.2-.3.4-.3.7s.1.5.3.7c.2.2.4.3.7.3h11c.3 0 .5-.1.7-.3.2-.2.3-.5.3-.7s-.1-.5-.3-.7zM5.8 14.7l6.2 6.3 6.2-6.3c.2-.2.3-.5.3-.7s-.1-.5-.3-.7c-.2-.2-.4-.3-.7-.3h-11c-.3 0-.5.1-.7.3-.2.2-.3.5-.3.7s.1.5.3.7z\"\/><\/svg><\/span><\/span><\/span><\/a><\/span><\/div>\n<nav><ul class='ez-toc-list ez-toc-list-level-1 eztoc-toggle-hide-by-default' ><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-1\" href=\"http:\/\/viplao.com\/index.php\/2025\/06\/28\/%e3%80%90%e5%ae%9e%e8%b7%b5%e7%bb%8f%e9%aa%8c%e3%80%91%e7%94%b5%e5%95%86%e5%b9%b3%e5%8f%b0%e9%94%80%e5%94%ae%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90%e5%ae%9e%e8%b7%b5%e5%88%86%e8%a7%a3-%e6%95%b0\/#1_%E5%A4%84%E7%90%86%E7%BC%BA%E5%A4%B1%E5%80%BC%E5%B9%B6%E5%88%A0%E9%99%A4%E6%97%A0%E6%95%88%E8%AE%A2%E5%8D%95\" title=\"1. \u5904\u7406\u7f3a\u5931\u503c\u5e76\u5220\u9664\u65e0\u6548\u8ba2\u5355\">1. \u5904\u7406\u7f3a\u5931\u503c\u5e76\u5220\u9664\u65e0\u6548\u8ba2\u5355<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-2\" href=\"http:\/\/viplao.com\/index.php\/2025\/06\/28\/%e3%80%90%e5%ae%9e%e8%b7%b5%e7%bb%8f%e9%aa%8c%e3%80%91%e7%94%b5%e5%95%86%e5%b9%b3%e5%8f%b0%e9%94%80%e5%94%ae%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90%e5%ae%9e%e8%b7%b5%e5%88%86%e8%a7%a3-%e6%95%b0\/#2_%E5%8E%BB%E9%99%A4%E9%87%8D%E5%A4%8D%E8%AE%A2%E5%8D%95%E8%AE%B0%E5%BD%95\" title=\"2. \u53bb\u9664\u91cd\u590d\u8ba2\u5355\u8bb0\u5f55\">2. \u53bb\u9664\u91cd\u590d\u8ba2\u5355\u8bb0\u5f55<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-3\" href=\"http:\/\/viplao.com\/index.php\/2025\/06\/28\/%e3%80%90%e5%ae%9e%e8%b7%b5%e7%bb%8f%e9%aa%8c%e3%80%91%e7%94%b5%e5%95%86%e5%b9%b3%e5%8f%b0%e9%94%80%e5%94%ae%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90%e5%ae%9e%e8%b7%b5%e5%88%86%e8%a7%a3-%e6%95%b0\/#3_%E6%A0%87%E5%87%86%E5%8C%96%E6%97%B6%E9%97%B4%E6%A0%BC%E5%BC%8F%EF%BC%88%E5%A6%82%E2%80%9C2025-06-01%E2%80%9D%EF%BC%89\" title=\"3. \u6807\u51c6\u5316\u65f6\u95f4\u683c\u5f0f\uff08\u5982\u201c2025-06-01\u201d\uff09\">3. \u6807\u51c6\u5316\u65f6\u95f4\u683c\u5f0f\uff08\u5982\u201c2025-06-01\u201d\uff09<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-4\" href=\"http:\/\/viplao.com\/index.php\/2025\/06\/28\/%e3%80%90%e5%ae%9e%e8%b7%b5%e7%bb%8f%e9%aa%8c%e3%80%91%e7%94%b5%e5%95%86%e5%b9%b3%e5%8f%b0%e9%94%80%e5%94%ae%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90%e5%ae%9e%e8%b7%b5%e5%88%86%e8%a7%a3-%e6%95%b0\/#4_%E6%B8%85%E6%B4%97%E9%9D%9E%E6%A0%87%E5%87%86%E9%87%91%E9%A2%9D%E5%AD%97%E6%AE%B5%EF%BC%88%E5%A6%82%E5%8C%85%E5%90%AB%E8%B4%A7%E5%B8%81%E7%AC%A6%E5%8F%B7%EF%BC%89\" title=\"4. \u6e05\u6d17\u975e\u6807\u51c6\u91d1\u989d\u5b57\u6bb5\uff08\u5982\u5305\u542b\u8d27\u5e01\u7b26\u53f7\uff09\">4. \u6e05\u6d17\u975e\u6807\u51c6\u91d1\u989d\u5b57\u6bb5\uff08\u5982\u5305\u542b\u8d27\u5e01\u7b26\u53f7\uff09<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-5\" href=\"http:\/\/viplao.com\/index.php\/2025\/06\/28\/%e3%80%90%e5%ae%9e%e8%b7%b5%e7%bb%8f%e9%aa%8c%e3%80%91%e7%94%b5%e5%95%86%e5%b9%b3%e5%8f%b0%e9%94%80%e5%94%ae%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90%e5%ae%9e%e8%b7%b5%e5%88%86%e8%a7%a3-%e6%95%b0\/#5_%E7%9F%AB%E6%AD%A3%E9%94%99%E8%AF%AF%E7%9A%84%E4%BA%A7%E5%93%81ID%E6%88%96%E5%93%81%E7%B1%BB%E7%BC%96%E7%A0%81\" title=\"5. \u77eb\u6b63\u9519\u8bef\u7684\u4ea7\u54c1ID\u6216\u54c1\u7c7b\u7f16\u7801\">5. \u77eb\u6b63\u9519\u8bef\u7684\u4ea7\u54c1ID\u6216\u54c1\u7c7b\u7f16\u7801<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-6\" href=\"http:\/\/viplao.com\/index.php\/2025\/06\/28\/%e3%80%90%e5%ae%9e%e8%b7%b5%e7%bb%8f%e9%aa%8c%e3%80%91%e7%94%b5%e5%95%86%e5%b9%b3%e5%8f%b0%e9%94%80%e5%94%ae%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90%e5%ae%9e%e8%b7%b5%e5%88%86%e8%a7%a3-%e6%95%b0\/#6_%E5%A1%AB%E5%85%85%E7%A9%BA%E5%80%BC%EF%BC%88%E4%BD%BF%E7%94%A8%E5%9D%87%E5%80%BC%E3%80%81%E4%BC%97%E6%95%B0%E6%88%96%E5%90%91%E5%89%8D%E5%A1%AB%E5%85%85%EF%BC%89\" title=\"6. \u586b\u5145\u7a7a\u503c\uff08\u4f7f\u7528\u5747\u503c\u3001\u4f17\u6570\u6216\u5411\u524d\u586b\u5145\uff09\">6. \u586b\u5145\u7a7a\u503c\uff08\u4f7f\u7528\u5747\u503c\u3001\u4f17\u6570\u6216\u5411\u524d\u586b\u5145\uff09<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-7\" href=\"http:\/\/viplao.com\/index.php\/2025\/06\/28\/%e3%80%90%e5%ae%9e%e8%b7%b5%e7%bb%8f%e9%aa%8c%e3%80%91%e7%94%b5%e5%95%86%e5%b9%b3%e5%8f%b0%e9%94%80%e5%94%ae%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90%e5%ae%9e%e8%b7%b5%e5%88%86%e8%a7%a3-%e6%95%b0\/#7_%E8%BF%87%E6%BB%A4%E6%8E%89%E6%B5%8B%E8%AF%95%E8%AE%A2%E5%8D%95%E6%88%96%E5%BC%82%E5%B8%B8%E7%94%A8%E6%88%B7%E6%95%B0%E6%8D%AE\" title=\"7. \u8fc7\u6ee4\u6389\u6d4b\u8bd5\u8ba2\u5355\u6216\u5f02\u5e38\u7528\u6237\u6570\u636e\">7. \u8fc7\u6ee4\u6389\u6d4b\u8bd5\u8ba2\u5355\u6216\u5f02\u5e38\u7528\u6237\u6570\u636e<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-8\" href=\"http:\/\/viplao.com\/index.php\/2025\/06\/28\/%e3%80%90%e5%ae%9e%e8%b7%b5%e7%bb%8f%e9%aa%8c%e3%80%91%e7%94%b5%e5%95%86%e5%b9%b3%e5%8f%b0%e9%94%80%e5%94%ae%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90%e5%ae%9e%e8%b7%b5%e5%88%86%e8%a7%a3-%e6%95%b0\/#8_%E5%AF%B9%E6%95%B0%E5%80%BC%E5%9E%8B%E5%AD%97%E6%AE%B5%E8%BF%9B%E8%A1%8C%E7%B1%BB%E5%9E%8B%E8%BD%AC%E6%8D%A2%EF%BC%88str_%E2%86%92_float%EF%BC%89\" title=\"8. \u5bf9\u6570\u503c\u578b\u5b57\u6bb5\u8fdb\u884c\u7c7b\u578b\u8f6c\u6362\uff08str \u2192 float\uff09\">8. \u5bf9\u6570\u503c\u578b\u5b57\u6bb5\u8fdb\u884c\u7c7b\u578b\u8f6c\u6362\uff08str \u2192 float\uff09<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-9\" href=\"http:\/\/viplao.com\/index.php\/2025\/06\/28\/%e3%80%90%e5%ae%9e%e8%b7%b5%e7%bb%8f%e9%aa%8c%e3%80%91%e7%94%b5%e5%95%86%e5%b9%b3%e5%8f%b0%e9%94%80%e5%94%ae%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90%e5%ae%9e%e8%b7%b5%e5%88%86%e8%a7%a3-%e6%95%b0\/#9_%E6%8B%86%E5%88%86%E5%9C%B0%E5%9D%80%E5%AD%97%E6%AE%B5%E4%B8%BA%E7%9C%81%E5%B8%82%E5%8C%BA%E4%B8%89%E7%BA%A7%E7%BB%B4%E5%BA%A6\" title=\"9. \u62c6\u5206\u5730\u5740\u5b57\u6bb5\u4e3a\u7701\u5e02\u533a\u4e09\u7ea7\u7ef4\u5ea6\">9. \u62c6\u5206\u5730\u5740\u5b57\u6bb5\u4e3a\u7701\u5e02\u533a\u4e09\u7ea7\u7ef4\u5ea6<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-10\" href=\"http:\/\/viplao.com\/index.php\/2025\/06\/28\/%e3%80%90%e5%ae%9e%e8%b7%b5%e7%bb%8f%e9%aa%8c%e3%80%91%e7%94%b5%e5%95%86%e5%b9%b3%e5%8f%b0%e9%94%80%e5%94%ae%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90%e5%ae%9e%e8%b7%b5%e5%88%86%e8%a7%a3-%e6%95%b0\/#10_%E5%88%A0%E9%99%A4%E6%97%A0%E6%84%8F%E4%B9%89%E5%AD%97%E6%AE%B5%EF%BC%88%E5%A6%82%E4%B8%B4%E6%97%B6%E8%B0%83%E8%AF%95%E5%88%97%EF%BC%89\" title=\"10. \u5220\u9664\u65e0\u610f\u4e49\u5b57\u6bb5\uff08\u5982\u4e34\u65f6\u8c03\u8bd5\u5217\uff09\">10. \u5220\u9664\u65e0\u610f\u4e49\u5b57\u6bb5\uff08\u5982\u4e34\u65f6\u8c03\u8bd5\u5217\uff09<\/a><\/li><\/ul><\/nav><\/div>\n<h3 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"1_%E5%A4%84%E7%90%86%E7%BC%BA%E5%A4%B1%E5%80%BC%E5%B9%B6%E5%88%A0%E9%99%A4%E6%97%A0%E6%95%88%E8%AE%A2%E5%8D%95\"><\/span>1. \u5904\u7406\u7f3a\u5931\u503c\u5e76\u5220\u9664\u65e0\u6548\u8ba2\u5355<span class=\"ez-toc-section-end\"><\/span><\/h3>\n\n\n\n<p>\u5047\u8bbe<code>product_id<\/code>\u4e3a\u7a7a\u6216<code>amount<\/code>\u4e3a\u7a7a\u7684\u8ba2\u5355\u662f\u65e0\u6548\u7684\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># \u5220\u9664\u65e0\u6548\u8ba2\u5355\ndf_cleaned = df.dropna(subset=&#91;'product_id', 'amount'])\nprint(\"\\n\u5904\u7406\u7f3a\u5931\u503c\u540e:\")\nprint(df_cleaned)<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"2_%E5%8E%BB%E9%99%A4%E9%87%8D%E5%A4%8D%E8%AE%A2%E5%8D%95%E8%AE%B0%E5%BD%95\"><\/span>2. \u53bb\u9664\u91cd\u590d\u8ba2\u5355\u8bb0\u5f55<span class=\"ez-toc-section-end\"><\/span><\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code># \u53bb\u9664\u91cd\u590d\u8ba2\u5355\u8bb0\u5f55\ndf_cleaned = df_cleaned.drop_duplicates()\nprint(\"\\n\u53bb\u9664\u91cd\u590d\u8ba2\u5355\u8bb0\u5f55\u540e:\")\nprint(df_cleaned)<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"3_%E6%A0%87%E5%87%86%E5%8C%96%E6%97%B6%E9%97%B4%E6%A0%BC%E5%BC%8F%EF%BC%88%E5%A6%82%E2%80%9C2025-06-01%E2%80%9D%EF%BC%89\"><\/span>3. \u6807\u51c6\u5316\u65f6\u95f4\u683c\u5f0f\uff08\u5982\u201c2025-06-01\u201d\uff09<span class=\"ez-toc-section-end\"><\/span><\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code># \u8f6c\u6362order_date\u4e3a\u65e5\u671f\u683c\u5f0f\ndf_cleaned&#91;'order_date'] = pd.to_datetime(df_cleaned&#91;'order_date'], format='%Y-%m-%d')\nprint(\"\\n\u6807\u51c6\u5316\u65f6\u95f4\u683c\u5f0f\u540e:\")\nprint(df_cleaned)<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"4_%E6%B8%85%E6%B4%97%E9%9D%9E%E6%A0%87%E5%87%86%E9%87%91%E9%A2%9D%E5%AD%97%E6%AE%B5%EF%BC%88%E5%A6%82%E5%8C%85%E5%90%AB%E8%B4%A7%E5%B8%81%E7%AC%A6%E5%8F%B7%EF%BC%89\"><\/span>4. \u6e05\u6d17\u975e\u6807\u51c6\u91d1\u989d\u5b57\u6bb5\uff08\u5982\u5305\u542b\u8d27\u5e01\u7b26\u53f7\uff09<span class=\"ez-toc-section-end\"><\/span><\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code># \u79fb\u9664\u91d1\u989d\u4e2d\u7684\u7f8e\u5143\u7b26\u53f7\u5e76\u8f6c\u6362\u4e3a\u6d6e\u70b9\u6570\ndf_cleaned&#91;'amount'] = df_cleaned&#91;'amount'].str.replace('$', '').astype(float)\nprint(\"\\n\u6e05\u6d17\u975e\u6807\u51c6\u91d1\u989d\u5b57\u6bb5\u540e:\")\nprint(df_cleaned)<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"5_%E7%9F%AB%E6%AD%A3%E9%94%99%E8%AF%AF%E7%9A%84%E4%BA%A7%E5%93%81ID%E6%88%96%E5%93%81%E7%B1%BB%E7%BC%96%E7%A0%81\"><\/span>5. \u77eb\u6b63\u9519\u8bef\u7684\u4ea7\u54c1ID\u6216\u54c1\u7c7b\u7f16\u7801<span class=\"ez-toc-section-end\"><\/span><\/h3>\n\n\n\n<p>\u5047\u8bbe\u6211\u4eec\u9700\u8981\u5c06\u6240\u6709\u4ea7\u54c1ID\u8f6c\u6362\u4e3a\u5927\u5199\u5f62\u5f0f\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># \u5c06product_id\u8f6c\u6362\u4e3a\u5927\u5199\ndf_cleaned&#91;'product_id'] = df_cleaned&#91;'product_id'].str.upper()\nprint(\"\\n\u77eb\u6b63\u9519\u8bef\u7684\u4ea7\u54c1ID\u540e:\")\nprint(df_cleaned)<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"6_%E5%A1%AB%E5%85%85%E7%A9%BA%E5%80%BC%EF%BC%88%E4%BD%BF%E7%94%A8%E5%9D%87%E5%80%BC%E3%80%81%E4%BC%97%E6%95%B0%E6%88%96%E5%90%91%E5%89%8D%E5%A1%AB%E5%85%85%EF%BC%89\"><\/span>6. \u586b\u5145\u7a7a\u503c\uff08\u4f7f\u7528\u5747\u503c\u3001\u4f17\u6570\u6216\u5411\u524d\u586b\u5145\uff09<span class=\"ez-toc-section-end\"><\/span><\/h3>\n\n\n\n<p>\u8fd9\u91cc\u6211\u4eec\u7528\u5747\u503c\u586b\u5145<code>amount<\/code>\u5217\u7684\u7a7a\u503c\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># \u4f7f\u7528\u5747\u503c\u586b\u5145amount\u5217\u7684\u7a7a\u503c\ndf_cleaned&#91;'amount'].fillna(df_cleaned&#91;'amount'].mean(), inplace=True)\nprint(\"\\n\u586b\u5145\u7a7a\u503c\u540e:\")\nprint(df_cleaned)<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"7_%E8%BF%87%E6%BB%A4%E6%8E%89%E6%B5%8B%E8%AF%95%E8%AE%A2%E5%8D%95%E6%88%96%E5%BC%82%E5%B8%B8%E7%94%A8%E6%88%B7%E6%95%B0%E6%8D%AE\"><\/span>7. \u8fc7\u6ee4\u6389\u6d4b\u8bd5\u8ba2\u5355\u6216\u5f02\u5e38\u7528\u6237\u6570\u636e<span class=\"ez-toc-section-end\"><\/span><\/h3>\n\n\n\n<p>\u5047\u8bbe\u6211\u4eec\u8981\u8fc7\u6ee4\u6389<code>test_order<\/code>\u4e3aTrue\u7684\u8ba2\u5355\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># \u8fc7\u6ee4\u6389\u6d4b\u8bd5\u8ba2\u5355\ndf_cleaned = df_cleaned&#91;df_cleaned&#91;'test_order'] == False]\nprint(\"\\n\u8fc7\u6ee4\u6389\u6d4b\u8bd5\u8ba2\u5355\u540e:\")\nprint(df_cleaned)<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"8_%E5%AF%B9%E6%95%B0%E5%80%BC%E5%9E%8B%E5%AD%97%E6%AE%B5%E8%BF%9B%E8%A1%8C%E7%B1%BB%E5%9E%8B%E8%BD%AC%E6%8D%A2%EF%BC%88str_%E2%86%92_float%EF%BC%89\"><\/span>8. \u5bf9\u6570\u503c\u578b\u5b57\u6bb5\u8fdb\u884c\u7c7b\u578b\u8f6c\u6362\uff08str \u2192 float\uff09<span class=\"ez-toc-section-end\"><\/span><\/h3>\n\n\n\n<p>\u5728\u8fd9\u4e2a\u4f8b\u5b50\u4e2d\uff0c<code>amount<\/code>\u5df2\u7ecf\u88ab\u8f6c\u6362\u4e3afloat\uff0c\u6240\u4ee5\u8fd9\u4e00\u6b65\u5df2\u7ecf\u5728\u524d\u9762\u5b8c\u6210\u4e86\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"9_%E6%8B%86%E5%88%86%E5%9C%B0%E5%9D%80%E5%AD%97%E6%AE%B5%E4%B8%BA%E7%9C%81%E5%B8%82%E5%8C%BA%E4%B8%89%E7%BA%A7%E7%BB%B4%E5%BA%A6\"><\/span>9. \u62c6\u5206\u5730\u5740\u5b57\u6bb5\u4e3a\u7701\u5e02\u533a\u4e09\u7ea7\u7ef4\u5ea6<span class=\"ez-toc-section-end\"><\/span><\/h3>\n\n\n\n<p>\u5047\u8bbe\u5730\u5740\u683c\u5f0f\u4e3a\u201c\u57ce\u5e02, \u56fd\u5bb6\u201d\uff0c\u6211\u4eec\u53ef\u4ee5\u62c6\u5206\u4e3a\u4e24\u4e2a\u65b0\u5217\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># \u62c6\u5206address\u5b57\u6bb5\u4e3acity\u548ccountry\ndf_cleaned&#91;&#91;'city', 'country']] = df_cleaned&#91;'address'].str.split(', ', expand=True)\nprint(\"\\n\u62c6\u5206\u5730\u5740\u5b57\u6bb5\u540e:\")\nprint(df_cleaned)<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"10_%E5%88%A0%E9%99%A4%E6%97%A0%E6%84%8F%E4%B9%89%E5%AD%97%E6%AE%B5%EF%BC%88%E5%A6%82%E4%B8%B4%E6%97%B6%E8%B0%83%E8%AF%95%E5%88%97%EF%BC%89\"><\/span>10. \u5220\u9664\u65e0\u610f\u4e49\u5b57\u6bb5\uff08\u5982\u4e34\u65f6\u8c03\u8bd5\u5217\uff09<span class=\"ez-toc-section-end\"><\/span><\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code># \u5220\u9664\u65e0\u610f\u4e49\u5b57\u6bb5\ndf_cleaned = df_cleaned.drop(columns=&#91;'debug_col'])\nprint(\"\\n\u5220\u9664\u65e0\u610f\u4e49\u5b57\u6bb5\u540e:\")\nprint(df_cleaned)<\/code><\/pre>\n\n\n\n<p>\u7efc\u5408\u4ee5\u4e0a\u6b65\u9aa4\uff0c\u6700\u7ec8\u7684\u6e05\u7406\u540e\u7684DataFrame\u5982\u4e0b\uff1a<\/p>\n\n\n\n<p>\u8fd9\u6bb5\u4ee3\u7801\u5c55\u793a\u4e86\u4ece\u539f\u59cb\u6570\u636e\u5230\u7ecf\u8fc7\u5168\u9762\u6e05\u6d17\u7684\u6570\u636e\u7684\u8fc7\u7a0b\u3002\u4f60\u53ef\u4ee5\u6839\u636e\u5b9e\u9645\u9700\u6c42\u8c03\u6574\u6bcf\u4e00\u6b65\u7684\u64cd\u4f5c\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import pandas as pd\nimport numpy as np\n\n# \u521b\u5efa\u793a\u4f8bDataFrame\ndata = {\n    'order_id': &#91;1, 2, 3, 4, 5, 6, 7, 8],\n    'product_id': &#91;'A001', 'B002', None, 'D004', 'E005', 'F006', 'G007', 'H008'],\n    'category_code': &#91;'C1', 'C2', 'C1', 'C3', 'C4', 'C5', 'C6', 'C7'],\n    'amount': &#91;'$100.00', '$200.00', '$300.00', None, '$500.00', '$600.00', '$700.00', '$800.00'],\n    'order_date': &#91;'2025-06-01', '2025-06-02', '2025-06-03', '2025-06-04', '2025-06-05', '2025-06-06', '2025-06-07', '2025-06-08'],\n    'customer_id': &#91;101, 102, 103, 104, 105, 106, 107, 108],\n    'address': &#91;'Beijing, China', 'Shanghai, China', 'Guangzhou, China', 'Shenzhen, China', 'Hangzhou, China', 'Chengdu, China', 'Nanjing, China', 'Wuhan, China'],\n    'test_order': &#91;False, True, False, False, False, False, False, False],\n    'debug_col': &#91;None, None, None, None, None, None, None, None]\n}\n\ndf = pd.DataFrame(data)\n\n# \u5220\u9664\u65e0\u6548\u8ba2\u5355\ndf_cleaned = df.dropna(subset=&#91;'product_id', 'amount'])\n\n# \u53bb\u9664\u91cd\u590d\u8ba2\u5355\u8bb0\u5f55\ndf_cleaned = df_cleaned.drop_duplicates()\n\n# \u8f6c\u6362order_date\u4e3a\u65e5\u671f\u683c\u5f0f\ndf_cleaned&#91;'order_date'] = pd.to_datetime(df_cleaned&#91;'order_date'], format='%Y-%m-%d')\n\n# \u79fb\u9664\u91d1\u989d\u4e2d\u7684\u7f8e\u5143\u7b26\u53f7\u5e76\u8f6c\u6362\u4e3a\u6d6e\u70b9\u6570\ndf_cleaned&#91;'amount'] = df_cleaned&#91;'amount'].str.replace('$', '').astype(float)\n\n# \u5c06product_id\u8f6c\u6362\u4e3a\u5927\u5199\ndf_cleaned&#91;'product_id'] = df_cleaned&#91;'product_id'].str.upper()\n\n# \u4f7f\u7528\u5747\u503c\u586b\u5145amount\u5217\u7684\u7a7a\u503c\ndf_cleaned&#91;'amount'].fillna(df_cleaned&#91;'amount'].mean(), inplace=True)\n\n# \u8fc7\u6ee4\u6389\u6d4b\u8bd5\u8ba2\u5355\ndf_cleaned = df_cleaned&#91;df_cleaned&#91;'test_order'] == False]\n\n# \u62c6\u5206address\u5b57\u6bb5\u4e3acity\u548ccountry\ndf_cleaned&#91;&#91;'city', 'country']] = df_cleaned&#91;'address'].str.split(', ', expand=True)\n\n# \u5220\u9664\u65e0\u610f\u4e49\u5b57\u6bb5\ndf_cleaned = df_cleaned.drop(columns=&#91;'debug_col', 'address', 'test_order'])\n\nprint(\"\u6700\u7ec8\u6e05\u7406\u540e\u7684\u6570\u636e:\")\nprint(df_cleaned)\n\n\n\n<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>\u4e0b\u9762\u6211\u4eec\u5c06\u5c55\u793a\u5982\u4f55\u5904\u7406\u8fd9\u4e9b\u5e38\u89c1\u7684\u6570\u636e\u6e05\u6d17\u4efb\u52a1\u3002\u5047\u8bbe\u6211\u4eec\u6709\u4e00\u4e2a\u5305\u542b\u9500\u552e\u8ba2\u5355\u6570\u636e\u7684DataFrame\uff0c\u5e76&hellip; <a href=\"http:\/\/viplao.com\/index.php\/2025\/06\/28\/%e3%80%90%e5%ae%9e%e8%b7%b5%e7%bb%8f%e9%aa%8c%e3%80%91%e7%94%b5%e5%95%86%e5%b9%b3%e5%8f%b0%e9%94%80%e5%94%ae%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90%e5%ae%9e%e8%b7%b5%e5%88%86%e8%a7%a3-%e6%95%b0\/\" class=\"more-link read-more\" rel=\"bookmark\">\u7ee7\u7eed\u9605\u8bfb <span class=\"screen-reader-text\">\u3010Python10\u5e74\u7ecf\u9a8c\u603b\u7ed3\u3011\u7b2c\u4e8c\u8bfe \u7535\u5546\u5e73\u53f0\u9500\u552e\u6570\u636e\u5206\u6790\u5b9e\u8df5\u5206\u89e3 &#8211; \u6570\u636e\u6e05\u6d17\uff08Data Cleaning\uff09<\/span><i class=\"fa fa-arrow-right\"><\/i><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[1],"tags":[28],"views":859,"_links":{"self":[{"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/posts\/3533"}],"collection":[{"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/comments?post=3533"}],"version-history":[{"count":3,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/posts\/3533\/revisions"}],"predecessor-version":[{"id":3560,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/posts\/3533\/revisions\/3560"}],"wp:attachment":[{"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/media?parent=3533"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/categories?post=3533"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/tags?post=3533"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}