{"id":2897,"date":"2024-11-16T20:37:19","date_gmt":"2024-11-16T12:37:19","guid":{"rendered":"http:\/\/viplao.com\/?p=2897"},"modified":"2024-11-16T20:37:25","modified_gmt":"2024-11-16T12:37:25","slug":"python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b","status":"publish","type":"post","link":"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/","title":{"rendered":"PYTHON\u57fa\u7840\u6280\u80fd \u2013\u00a023\u4e2aPython\u5728\u81ea\u7136\u8bed\u8a00\u5904\u7406\u4e2d\u7684\u5e94\u7528\u5b9e\u4f8b"},"content":{"rendered":"\n<div id=\"ez-toc-container\" class=\"ez-toc-v2_0_71 counter-hierarchy ez-toc-counter ez-toc-grey ez-toc-container-direction\">\n<div class=\"ez-toc-title-container\">\n<p class=\"ez-toc-title\" style=\"cursor:inherit\">\u6587\u7ae0\u76ee\u5f55<\/p>\n<span class=\"ez-toc-title-toggle\"><a href=\"#\" class=\"ez-toc-pull-right ez-toc-btn ez-toc-btn-xs ez-toc-btn-default ez-toc-toggle\" aria-label=\"Toggle Table of Content\"><span class=\"ez-toc-js-icon-con\"><span class=\"\"><span class=\"eztoc-hide\" style=\"display:none;\">Toggle<\/span><span class=\"ez-toc-icon-toggle-span\"><svg style=\"fill: #999;color:#999\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" class=\"list-377408\" width=\"20px\" height=\"20px\" viewBox=\"0 0 24 24\" fill=\"none\"><path d=\"M6 6H4v2h2V6zm14 0H8v2h12V6zM4 11h2v2H4v-2zm16 0H8v2h12v-2zM4 16h2v2H4v-2zm16 0H8v2h12v-2z\" fill=\"currentColor\"><\/path><\/svg><svg style=\"fill: #999;color:#999\" class=\"arrow-unsorted-368013\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"10px\" height=\"10px\" viewBox=\"0 0 24 24\" version=\"1.2\" baseProfile=\"tiny\"><path d=\"M18.2 9.3l-6.2-6.3-6.2 6.3c-.2.2-.3.4-.3.7s.1.5.3.7c.2.2.4.3.7.3h11c.3 0 .5-.1.7-.3.2-.2.3-.5.3-.7s-.1-.5-.3-.7zM5.8 14.7l6.2 6.3 6.2-6.3c.2-.2.3-.5.3-.7s-.1-.5-.3-.7c-.2-.2-.4-.3-.7-.3h-11c-.3 0-.5.1-.7.3-.2.2-.3.5-.3.7s.1.5.3.7z\"\/><\/svg><\/span><\/span><\/span><\/a><\/span><\/div>\n<nav><ul class='ez-toc-list ez-toc-list-level-1 eztoc-toggle-hide-by-default' ><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-1\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#1_%E6%96%87%E6%9C%AC%E6%B8%85%E6%B4%97\" title=\"1. \u6587\u672c\u6e05\u6d17\">1. \u6587\u672c\u6e05\u6d17<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-2\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#2_%E5%88%86%E8%AF%8D\" title=\"2. \u5206\u8bcd\">2. \u5206\u8bcd<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-3\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#3_%E5%8E%BB%E9%99%A4%E5%81%9C%E7%94%A8%E8%AF%8D\" title=\"3. \u53bb\u9664\u505c\u7528\u8bcd\">3. \u53bb\u9664\u505c\u7528\u8bcd<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-4\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#4_%E8%AF%8D%E5%B9%B2%E6%8F%90%E5%8F%96\" title=\"4. \u8bcd\u5e72\u63d0\u53d6\">4. \u8bcd\u5e72\u63d0\u53d6<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-5\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#5_%E8%AF%8D%E5%BD%A2%E8%BF%98%E5%8E%9F\" title=\"5. \u8bcd\u5f62\u8fd8\u539f\">5. \u8bcd\u5f62\u8fd8\u539f<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-6\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#6_%E8%AF%8D%E9%A2%91%E7%BB%9F%E8%AE%A1\" title=\"6. \u8bcd\u9891\u7edf\u8ba1\">6. \u8bcd\u9891\u7edf\u8ba1<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-7\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#7_%E6%83%85%E6%84%9F%E5%88%86%E6%9E%90\" title=\"7. \u60c5\u611f\u5206\u6790\">7. \u60c5\u611f\u5206\u6790<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-8\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#8_%E8%AF%8D%E5%90%91%E9%87%8F%E5%8C%96\" title=\"8. \u8bcd\u5411\u91cf\u5316\">8. \u8bcd\u5411\u91cf\u5316<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-9\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#9_%E4%B8%BB%E9%A2%98%E5%BB%BA%E6%A8%A1\" title=\"9. \u4e3b\u9898\u5efa\u6a21\">9. \u4e3b\u9898\u5efa\u6a21<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-10\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#10_%E6%96%87%E6%9C%AC%E5%88%86%E7%B1%BB\" title=\"10. \u6587\u672c\u5206\u7c7b\">10. \u6587\u672c\u5206\u7c7b<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-11\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#11_%E5%91%BD%E5%90%8D%E5%AE%9E%E4%BD%93%E8%AF%86%E5%88%AB%EF%BC%88NER%EF%BC%89\" title=\"11. \u547d\u540d\u5b9e\u4f53\u8bc6\u522b\uff08NER\uff09\">11. \u547d\u540d\u5b9e\u4f53\u8bc6\u522b\uff08NER\uff09<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-12\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#12_%E6%9C%BA%E5%99%A8%E7%BF%BB%E8%AF%91\" title=\"12. \u673a\u5668\u7ffb\u8bd1\">12. \u673a\u5668\u7ffb\u8bd1<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-13\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#13_%E6%96%87%E6%9C%AC%E6%91%98%E8%A6%81\" title=\"13. \u6587\u672c\u6458\u8981\">13. \u6587\u672c\u6458\u8981<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-14\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#14_%E8%AF%8D%E4%BA%91%E7%94%9F%E6%88%90\" title=\"14. \u8bcd\u4e91\u751f\u6210\">14. \u8bcd\u4e91\u751f\u6210<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-15\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#15_%E9%97%AE%E7%AD%94%E7%B3%BB%E7%BB%9F\" title=\"15. \u95ee\u7b54\u7cfb\u7edf\">15. \u95ee\u7b54\u7cfb\u7edf<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-16\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#16_%E4%BF%A1%E6%81%AF%E6%8A%BD%E5%8F%96\" title=\"16. \u4fe1\u606f\u62bd\u53d6\">16. \u4fe1\u606f\u62bd\u53d6<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-17\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#17_%E5%85%B3%E7%B3%BB%E6%8A%BD%E5%8F%96\" title=\"17. \u5173\u7cfb\u62bd\u53d6\">17. \u5173\u7cfb\u62bd\u53d6<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-18\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#18_%E6%96%87%E6%9C%AC%E8%81%9A%E7%B1%BB\" title=\"18. \u6587\u672c\u805a\u7c7b\">18. \u6587\u672c\u805a\u7c7b<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-19\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#19_%E4%BA%8B%E4%BB%B6%E6%A3%80%E6%B5%8B\" title=\"19. \u4e8b\u4ef6\u68c0\u6d4b\">19. \u4e8b\u4ef6\u68c0\u6d4b<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-20\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#20_%E8%AF%8D%E6%80%A7%E6%A0%87%E6%B3%A8\" title=\"20. \u8bcd\u6027\u6807\u6ce8\">20. \u8bcd\u6027\u6807\u6ce8<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-21\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#21_%E4%BE%9D%E5%AD%98%E5%8F%A5%E6%B3%95%E5%88%86%E6%9E%90\" title=\"21. \u4f9d\u5b58\u53e5\u6cd5\u5206\u6790\">21. \u4f9d\u5b58\u53e5\u6cd5\u5206\u6790<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-22\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#22_%E8%AF%AD%E6%B3%95%E6%A0%91%E6%9E%84%E5%BB%BA\" title=\"22. \u8bed\u6cd5\u6811\u6784\u5efa\">22. \u8bed\u6cd5\u6811\u6784\u5efa<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-23\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#23_%E8%AF%8D%E6%80%A7%E8%BD%AC%E6%8D%A2\" title=\"23. \u8bcd\u6027\u8f6c\u6362\">23. \u8bcd\u6027\u8f6c\u6362<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-24\" href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/#%E5%AE%9E%E6%88%98%E6%A1%88%E4%BE%8B%EF%BC%9A%E6%83%85%E6%84%9F%E5%88%86%E6%9E%90%E5%9C%A8%E7%94%B5%E5%95%86%E8%AF%84%E8%AE%BA%E4%B8%AD%E7%9A%84%E5%BA%94%E7%94%A8\" title=\"\u5b9e\u6218\u6848\u4f8b\uff1a\u60c5\u611f\u5206\u6790\u5728\u7535\u5546\u8bc4\u8bba\u4e2d\u7684\u5e94\u7528\">\u5b9e\u6218\u6848\u4f8b\uff1a\u60c5\u611f\u5206\u6790\u5728\u7535\u5546\u8bc4\u8bba\u4e2d\u7684\u5e94\u7528<\/a><\/li><\/ul><\/nav><\/div>\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"1_%E6%96%87%E6%9C%AC%E6%B8%85%E6%B4%97\"><\/span><strong>1. \u6587\u672c\u6e05\u6d17<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u6587\u672c\u6e05\u6d17\u662f\u4efb\u4f55 NLP \u9879\u76ee\u7684\u7b2c\u4e00\u6b65\u3002\u5b83\u6d89\u53ca\u53bb\u9664\u4e0d\u9700\u8981\u7684\u4fe1\u606f\uff0c\u5982\u6807\u70b9\u7b26\u53f7\u3001\u6570\u5b57\u3001\u7279\u6b8a\u5b57\u7b26\u7b49\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import&nbsp;re<br><br>def&nbsp;clean_text(text):<br>&nbsp;&nbsp;&nbsp;&nbsp;<em>#&nbsp;\u53bb\u9664\u6807\u70b9\u7b26\u53f7<\/em><br>&nbsp;&nbsp;&nbsp;&nbsp;text&nbsp;=&nbsp;re.sub(r'&#91;^\\w\\s]',&nbsp;'',&nbsp;text)<br>&nbsp;&nbsp;&nbsp;&nbsp;<em>#&nbsp;\u53bb\u9664\u6570\u5b57<\/em><br>&nbsp;&nbsp;&nbsp;&nbsp;text&nbsp;=&nbsp;re.sub(r'\\d+',&nbsp;'',&nbsp;text)<br>&nbsp;&nbsp;&nbsp;&nbsp;<em>#&nbsp;\u5c06\u6240\u6709\u5b57\u6bcd\u8f6c\u4e3a\u5c0f\u5199<\/em><br>&nbsp;&nbsp;&nbsp;&nbsp;text&nbsp;=&nbsp;text.lower()<br>&nbsp;&nbsp;&nbsp;&nbsp;return&nbsp;text<br><br><em>#&nbsp;\u793a\u4f8b\u6587\u672c<\/em><br>text&nbsp;=&nbsp;\"Hello,&nbsp;World!&nbsp;This&nbsp;is&nbsp;an&nbsp;example&nbsp;text&nbsp;with&nbsp;numbers&nbsp;123&nbsp;and&nbsp;symbols&nbsp;#@$.\"<br>cleaned_text&nbsp;=&nbsp;clean_text(text)<br><br>print(cleaned_text)&nbsp;&nbsp;<em>#&nbsp;\u8f93\u51fa:&nbsp;hello&nbsp;world&nbsp;this&nbsp;is&nbsp;an&nbsp;example&nbsp;text&nbsp;with&nbsp;numbers&nbsp;and&nbsp;symbols<\/em><\/code><\/pre>\n\n\n\n<p><strong>\u89e3\u91ca\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>re<\/code> \u6a21\u5757\u7684 <code>sub()<\/code> \u65b9\u6cd5\u53bb\u9664\u6807\u70b9\u7b26\u53f7\u548c\u6570\u5b57\u3002<\/li>\n\n\n\n<li><code>lower()<\/code> \u65b9\u6cd5\u5c06\u6240\u6709\u5b57\u6bcd\u8f6c\u6362\u4e3a\u5c0f\u5199\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"2_%E5%88%86%E8%AF%8D\"><\/span><strong>2. \u5206\u8bcd<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u5206\u8bcd\u662f\u5c06\u6587\u672c\u62c6\u5206\u6210\u5355\u8bcd\u7684\u8fc7\u7a0b\u3002\u8fd9\u6709\u52a9\u4e8e\u8fdb\u4e00\u6b65\u5904\u7406\uff0c\u5982\u8bcd\u9891\u7edf\u8ba1\u3001\u60c5\u611f\u5206\u6790\u7b49\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>from&nbsp;nltk.tokenize&nbsp;import&nbsp;word_tokenize<br><br><em>#&nbsp;\u793a\u4f8b\u6587\u672c<\/em><br>text&nbsp;=&nbsp;\"Hello,&nbsp;World!&nbsp;This&nbsp;is&nbsp;an&nbsp;example&nbsp;text.\"<br><br><em>#&nbsp;\u5206\u8bcd<\/em><br>tokens&nbsp;=&nbsp;word_tokenize(text)<br><br>print(tokens)&nbsp;&nbsp;<em>#&nbsp;\u8f93\u51fa:&nbsp;&#91;'Hello',&nbsp;',',&nbsp;'World',&nbsp;'!',&nbsp;'This',&nbsp;'is',&nbsp;'an',&nbsp;'example',&nbsp;'text',&nbsp;'.']<\/em><\/code><\/pre>\n\n\n\n<p><strong>\u89e3\u91ca\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>nltk<\/code> \u5e93\u4e2d\u7684 <code>word_tokenize()<\/code> \u51fd\u6570\u8fdb\u884c\u5206\u8bcd\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"3_%E5%8E%BB%E9%99%A4%E5%81%9C%E7%94%A8%E8%AF%8D\"><\/span><strong>3. \u53bb\u9664\u505c\u7528\u8bcd<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u505c\u7528\u8bcd\u662f\u6307\u5728\u6587\u672c\u4e2d\u9891\u7e41\u51fa\u73b0\u4f46\u5bf9\u8bed\u4e49\u8d21\u732e\u8f83\u5c0f\u7684\u8bcd\uff0c\u5982\u201cthe\u201d\u3001\u201cis\u201d\u7b49\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>from&nbsp;nltk.corpus&nbsp;import&nbsp;stopwords<br>from&nbsp;nltk.tokenize&nbsp;import&nbsp;word_tokenize<br><br><em>#&nbsp;\u793a\u4f8b\u6587\u672c<\/em><br>text&nbsp;=&nbsp;\"The&nbsp;quick&nbsp;brown&nbsp;fox&nbsp;jumps&nbsp;over&nbsp;the&nbsp;lazy&nbsp;dog.\"<br><br><em>#&nbsp;\u5206\u8bcd<\/em><br>tokens&nbsp;=&nbsp;word_tokenize(text)<br><br><em>#&nbsp;\u53bb\u9664\u505c\u7528\u8bcd<\/em><br>stop_words&nbsp;=&nbsp;set(stopwords.words('english'))<br>filtered_tokens&nbsp;=&nbsp;&#91;token&nbsp;for&nbsp;token&nbsp;in&nbsp;tokens&nbsp;if&nbsp;token.lower()&nbsp;not&nbsp;in&nbsp;stop_words]<br><br>print(filtered_tokens)&nbsp;&nbsp;<em>#&nbsp;\u8f93\u51fa:&nbsp;&#91;'quick',&nbsp;'brown',&nbsp;'fox',&nbsp;'jumps',&nbsp;'over',&nbsp;'lazy',&nbsp;'dog']<\/em><\/code><\/pre>\n\n\n\n<p><strong>\u89e3\u91ca\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>nltk.corpus.stopwords<\/code> \u83b7\u53d6\u82f1\u8bed\u505c\u7528\u8bcd\u5217\u8868\u3002<\/li>\n\n\n\n<li>\u4f7f\u7528\u5217\u8868\u63a8\u5bfc\u5f0f\u8fc7\u6ee4\u6389\u505c\u7528\u8bcd\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"4_%E8%AF%8D%E5%B9%B2%E6%8F%90%E5%8F%96\"><\/span><strong>4. \u8bcd\u5e72\u63d0\u53d6<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u8bcd\u5e72\u63d0\u53d6\u662f\u5c06\u5355\u8bcd\u8fd8\u539f\u4e3a\u5176\u57fa\u672c\u5f62\u5f0f\u7684\u8fc7\u7a0b\uff0c\u6709\u52a9\u4e8e\u51cf\u5c11\u8bcd\u6c47\u91cf\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>from&nbsp;nltk.stem&nbsp;import&nbsp;PorterStemmer<br>from&nbsp;nltk.tokenize&nbsp;import&nbsp;word_tokenize<br><br><em>#&nbsp;\u793a\u4f8b\u6587\u672c<\/em><br>text&nbsp;=&nbsp;\"running&nbsp;dogs&nbsp;are&nbsp;barking&nbsp;loudly.\"<br><br><em>#&nbsp;\u5206\u8bcd<\/em><br>tokens&nbsp;=&nbsp;word_tokenize(text)<br><br><em>#&nbsp;\u8bcd\u5e72\u63d0\u53d6<\/em><br>stemmer&nbsp;=&nbsp;PorterStemmer()<br>stemmed_tokens&nbsp;=&nbsp;&#91;stemmer.stem(token)&nbsp;for&nbsp;token&nbsp;in&nbsp;tokens]<br><br>print(stemmed_tokens)&nbsp;&nbsp;<em>#&nbsp;\u8f93\u51fa:&nbsp;&#91;'run',&nbsp;'dog',&nbsp;'are',&nbsp;'bark',&nbsp;'loudli',&nbsp;'.']<\/em><\/code><\/pre>\n\n\n\n<p><strong>\u89e3\u91ca\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>PorterStemmer<\/code> \u5bf9\u5355\u8bcd\u8fdb\u884c\u8bcd\u5e72\u63d0\u53d6\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"5_%E8%AF%8D%E5%BD%A2%E8%BF%98%E5%8E%9F\"><\/span><strong>5. \u8bcd\u5f62\u8fd8\u539f<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u8bcd\u5f62\u8fd8\u539f\u7c7b\u4f3c\u4e8e\u8bcd\u5e72\u63d0\u53d6\uff0c\u4f46\u5b83\u4f7f\u7528\u8bcd\u5178\u6765\u627e\u5230\u5355\u8bcd\u7684\u57fa\u672c\u5f62\u5f0f\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>from&nbsp;nltk.stem&nbsp;import&nbsp;WordNetLemmatizer<br>from&nbsp;nltk.tokenize&nbsp;import&nbsp;word_tokenize<br><br><em>#&nbsp;\u793a\u4f8b\u6587\u672c<\/em><br>text&nbsp;=&nbsp;\"running&nbsp;dogs&nbsp;are&nbsp;barking&nbsp;loudly.\"<br><br><em>#&nbsp;\u5206\u8bcd<\/em><br>tokens&nbsp;=&nbsp;word_tokenize(text)<br><br><em>#&nbsp;\u8bcd\u5f62\u8fd8\u539f<\/em><br>lemmatizer&nbsp;=&nbsp;WordNetLemmatizer()<br>lemmatized_tokens&nbsp;=&nbsp;&#91;lemmatizer.lemmatize(token)&nbsp;for&nbsp;token&nbsp;in&nbsp;tokens]<br><br>print(lemmatized_tokens)&nbsp;&nbsp;<em>#&nbsp;\u8f93\u51fa:&nbsp;&#91;'running',&nbsp;'dog',&nbsp;'are',&nbsp;'barking',&nbsp;'loudly',&nbsp;'.']<\/em><\/code><\/pre>\n\n\n\n<p><strong>\u89e3\u91ca\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>WordNetLemmatizer<\/code> \u8fdb\u884c\u8bcd\u5f62\u8fd8\u539f\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"6_%E8%AF%8D%E9%A2%91%E7%BB%9F%E8%AE%A1\"><\/span><strong>6. \u8bcd\u9891\u7edf\u8ba1<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u8bcd\u9891\u7edf\u8ba1\u53ef\u4ee5\u5e2e\u52a9\u6211\u4eec\u4e86\u89e3\u6587\u672c\u4e2d\u6700\u5e38\u89c1\u7684\u8bcd\u6c47\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>from&nbsp;nltk.tokenize&nbsp;import&nbsp;word_tokenize<br>from&nbsp;nltk.probability&nbsp;import&nbsp;FreqDist<br>import&nbsp;matplotlib.pyplot&nbsp;as&nbsp;plt<br><br><em>#&nbsp;\u793a\u4f8b\u6587\u672c<\/em><br>text&nbsp;=&nbsp;\"This&nbsp;is&nbsp;a&nbsp;sample&nbsp;text.&nbsp;This&nbsp;text&nbsp;contains&nbsp;some&nbsp;words&nbsp;that&nbsp;are&nbsp;repeated&nbsp;several&nbsp;times.\"<br><br><em>#&nbsp;\u5206\u8bcd<\/em><br>tokens&nbsp;=&nbsp;word_tokenize(text)<br><br><em>#&nbsp;\u8ba1\u7b97\u8bcd\u9891<\/em><br>fdist&nbsp;=&nbsp;FreqDist(tokens)<br><br><em>#&nbsp;\u7ed8\u5236\u8bcd\u9891\u56fe<\/em><br>plt.figure(figsize=(10,&nbsp;5))<br>fdist.plot(10)<br>plt.show()<\/code><\/pre>\n\n\n\n<p><strong>\u89e3\u91ca\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>FreqDist<\/code> \u8ba1\u7b97\u8bcd\u9891\u3002<\/li>\n\n\n\n<li>\u4f7f\u7528 <code>matplotlib<\/code> \u7ed8\u5236\u8bcd\u9891\u56fe\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"7_%E6%83%85%E6%84%9F%E5%88%86%E6%9E%90\"><\/span><strong>7. \u60c5\u611f\u5206\u6790<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u60c5\u611f\u5206\u6790\u7528\u4e8e\u5224\u65ad\u6587\u672c\u7684\u60c5\u611f\u503e\u5411\uff0c\u5982\u6b63\u9762\u3001\u8d1f\u9762\u6216\u4e2d\u6027\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>from&nbsp;nltk.sentiment&nbsp;import&nbsp;SentimentIntensityAnalyzer<br><br><em>#&nbsp;\u793a\u4f8b\u6587\u672c<\/em><br>text&nbsp;=&nbsp;\"I&nbsp;love&nbsp;this&nbsp;movie.&nbsp;It's&nbsp;amazing!\"<br><br><em>#&nbsp;\u60c5\u611f\u5206\u6790<\/em><br>sia&nbsp;=&nbsp;SentimentIntensityAnalyzer()<br>sentiment_scores&nbsp;=&nbsp;sia.polarity_scores(text)<br><br>print(sentiment_scores)&nbsp;&nbsp;<em>#&nbsp;\u8f93\u51fa:&nbsp;{'neg':&nbsp;0.0,&nbsp;'neu':&nbsp;0.429,&nbsp;'pos':&nbsp;0.571,&nbsp;'compound':&nbsp;0.8159}<\/em><\/code><\/pre>\n\n\n\n<p><strong>\u89e3\u91ca\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>SentimentIntensityAnalyzer<\/code> \u8fdb\u884c\u60c5\u611f\u5206\u6790\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"8_%E8%AF%8D%E5%90%91%E9%87%8F%E5%8C%96\"><\/span><strong>8. \u8bcd\u5411\u91cf\u5316<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u8bcd\u5411\u91cf\u5316\u5c06\u5355\u8bcd\u8868\u793a\u4e3a\u6570\u503c\u5411\u91cf\uff0c\u4fbf\u4e8e\u8ba1\u7b97\u673a\u5904\u7406\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import&nbsp;gensim.downloader&nbsp;as&nbsp;api<br><br><em>#&nbsp;\u52a0\u8f7d\u9884\u8bad\u7ec3\u7684&nbsp;Word2Vec&nbsp;\u6a21\u578b<\/em><br>model&nbsp;=&nbsp;api.load(\"glove-twitter-25\")<br><br><em>#&nbsp;\u793a\u4f8b\u6587\u672c<\/em><br>text&nbsp;=&nbsp;\"This&nbsp;is&nbsp;a&nbsp;sample&nbsp;sentence.\"<br><br><em>#&nbsp;\u5206\u8bcd<\/em><br>tokens&nbsp;=&nbsp;text.split()<br><br><em>#&nbsp;\u5411\u91cf\u5316<\/em><br>vectorized_tokens&nbsp;=&nbsp;&#91;model&#91;token]&nbsp;for&nbsp;token&nbsp;in&nbsp;tokens&nbsp;if&nbsp;token&nbsp;in&nbsp;model.key_to_index]<br><br>print(vectorized_tokens)<\/code><\/pre>\n\n\n\n<p><strong>\u89e3\u91ca\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>gensim<\/code> \u5e93\u52a0\u8f7d\u9884\u8bad\u7ec3\u7684 Word2Vec \u6a21\u578b\u3002<\/li>\n\n\n\n<li>\u5c06\u5355\u8bcd\u8f6c\u6362\u4e3a\u5411\u91cf\u8868\u793a\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"9_%E4%B8%BB%E9%A2%98%E5%BB%BA%E6%A8%A1\"><\/span><strong>9. \u4e3b\u9898\u5efa\u6a21<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u4e3b\u9898\u5efa\u6a21\u7528\u4e8e\u8bc6\u522b\u6587\u6863\u96c6\u5408\u4e2d\u7684\u4e3b\u9898\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>from&nbsp;gensim&nbsp;import&nbsp;corpora,&nbsp;models<br><br><em>#&nbsp;\u793a\u4f8b\u6587\u672c<\/em><br>documents&nbsp;=&nbsp;&#91;<br>&nbsp;&nbsp;&nbsp;&nbsp;\"Human&nbsp;machine&nbsp;interface&nbsp;for&nbsp;lab&nbsp;abc&nbsp;computer&nbsp;applications\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"A&nbsp;survey&nbsp;of&nbsp;user&nbsp;opinion&nbsp;of&nbsp;computer&nbsp;system&nbsp;response&nbsp;time\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"The&nbsp;EPS&nbsp;user&nbsp;interface&nbsp;management&nbsp;system\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"System&nbsp;and&nbsp;human&nbsp;system&nbsp;engineering&nbsp;testing&nbsp;of&nbsp;EPS\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"Relation&nbsp;of&nbsp;user&nbsp;perceived&nbsp;response&nbsp;time&nbsp;to&nbsp;error&nbsp;measurement\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"The&nbsp;generation&nbsp;of&nbsp;random&nbsp;binary&nbsp;unordered&nbsp;trees\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"The&nbsp;intersection&nbsp;graph&nbsp;of&nbsp;paths&nbsp;in&nbsp;trees\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"Graph&nbsp;minors&nbsp;IV&nbsp;Widths&nbsp;of&nbsp;trees&nbsp;and&nbsp;well&nbsp;quasi&nbsp;ordering\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"Graph&nbsp;minors&nbsp;A&nbsp;survey\"<br>]<br><br><em>#&nbsp;\u5206\u8bcd<\/em><br>texts&nbsp;=&nbsp;&#91;&#91;word&nbsp;for&nbsp;word&nbsp;in&nbsp;document.lower().split()]&nbsp;for&nbsp;document&nbsp;in&nbsp;documents]<br><br><em>#&nbsp;\u521b\u5efa\u8bcd\u5178<\/em><br>dictionary&nbsp;=&nbsp;corpora.Dictionary(texts)<br><br><em>#&nbsp;\u8f6c\u6362\u4e3a\u6587\u6863-\u8bcd\u9891\u77e9\u9635<\/em><br>corpus&nbsp;=&nbsp;&#91;dictionary.doc2bow(text)&nbsp;for&nbsp;text&nbsp;in&nbsp;texts]<br><br><em>#&nbsp;LDA&nbsp;\u6a21\u578b<\/em><br>lda&nbsp;=&nbsp;models.LdaModel(corpus,&nbsp;num_topics=2,&nbsp;id2word=dictionary,&nbsp;passes=10)<br><br><em>#&nbsp;\u6253\u5370\u4e3b\u9898<\/em><br>for&nbsp;topic&nbsp;in&nbsp;lda.print_topics(num_topics=2,&nbsp;num_words=5):<br>&nbsp;&nbsp;&nbsp;&nbsp;print(topic)<\/code><\/pre>\n\n\n\n<p><strong>\u89e3\u91ca\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>gensim<\/code> \u5e93\u8fdb\u884c\u4e3b\u9898\u5efa\u6a21\u3002<\/li>\n\n\n\n<li>\u4f7f\u7528 LDA \u6a21\u578b\u8bc6\u522b\u4e3b\u9898\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"10_%E6%96%87%E6%9C%AC%E5%88%86%E7%B1%BB\"><\/span><strong>10. \u6587\u672c\u5206\u7c7b<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u6587\u672c\u5206\u7c7b\u662f\u5c06\u6587\u672c\u5206\u914d\u7ed9\u9884\u5b9a\u4e49\u7c7b\u522b\u7684\u8fc7\u7a0b\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>from&nbsp;sklearn.feature_extraction.text&nbsp;import&nbsp;CountVectorizer<br>from&nbsp;sklearn.naive_bayes&nbsp;import&nbsp;MultinomialNB<br>from&nbsp;sklearn.model_selection&nbsp;import&nbsp;train_test_split<br>from&nbsp;sklearn.metrics&nbsp;import&nbsp;accuracy_score<br><br><em>#&nbsp;\u793a\u4f8b\u6570\u636e<\/em><br>documents&nbsp;=&nbsp;&#91;<br>&nbsp;&nbsp;&nbsp;&nbsp;\"Human&nbsp;machine&nbsp;interface&nbsp;for&nbsp;lab&nbsp;abc&nbsp;computer&nbsp;applications\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"A&nbsp;survey&nbsp;of&nbsp;user&nbsp;opinion&nbsp;of&nbsp;computer&nbsp;system&nbsp;response&nbsp;time\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"The&nbsp;EPS&nbsp;user&nbsp;interface&nbsp;management&nbsp;system\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"System&nbsp;and&nbsp;human&nbsp;system&nbsp;engineering&nbsp;testing&nbsp;of&nbsp;EPS\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"Relation&nbsp;of&nbsp;user&nbsp;perceived&nbsp;response&nbsp;time&nbsp;to&nbsp;error&nbsp;measurement\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"The&nbsp;generation&nbsp;of&nbsp;random&nbsp;binary&nbsp;unordered&nbsp;trees\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"The&nbsp;intersection&nbsp;graph&nbsp;of&nbsp;paths&nbsp;in&nbsp;trees\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"Graph&nbsp;minors&nbsp;IV&nbsp;Widths&nbsp;of&nbsp;trees&nbsp;and&nbsp;well&nbsp;quasi&nbsp;ordering\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"Graph&nbsp;minors&nbsp;A&nbsp;survey\"<br>]<br><br>labels&nbsp;=&nbsp;&#91;0,&nbsp;0,&nbsp;0,&nbsp;0,&nbsp;0,&nbsp;1,&nbsp;1,&nbsp;1,&nbsp;1]<br><br><em>#&nbsp;\u5206\u8bcd<\/em><br>vectorizer&nbsp;=&nbsp;CountVectorizer()<br>X&nbsp;=&nbsp;vectorizer.fit_transform(documents)<br><br><em>#&nbsp;\u5212\u5206\u8bad\u7ec3\u96c6\u548c\u6d4b\u8bd5\u96c6<\/em><br>X_train,&nbsp;X_test,&nbsp;y_train,&nbsp;y_test&nbsp;=&nbsp;train_test_split(X,&nbsp;labels,&nbsp;test_size=0.2,&nbsp;random_state=42)<br><br><em>#&nbsp;\u8bad\u7ec3\u6a21\u578b<\/em><br>classifier&nbsp;=&nbsp;MultinomialNB()<br>classifier.fit(X_train,&nbsp;y_train)<br><br><em>#&nbsp;\u9884\u6d4b<\/em><br>y_pred&nbsp;=&nbsp;classifier.predict(X_test)<br><br><em>#&nbsp;\u8bc4\u4f30\u51c6\u786e\u7387<\/em><br>accuracy&nbsp;=&nbsp;accuracy_score(y_test,&nbsp;y_pred)<br>print(f\"Accuracy:&nbsp;{accuracy:.2f}\")<\/code><\/pre>\n\n\n\n<p><strong>\u89e3\u91ca\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>sklearn<\/code> \u5e93\u8fdb\u884c\u6587\u672c\u5206\u7c7b\u3002<\/li>\n\n\n\n<li>\u4f7f\u7528\u6734\u7d20\u8d1d\u53f6\u65af\u5206\u7c7b\u5668\u8fdb\u884c\u9884\u6d4b\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"11_%E5%91%BD%E5%90%8D%E5%AE%9E%E4%BD%93%E8%AF%86%E5%88%AB%EF%BC%88NER%EF%BC%89\"><\/span><strong>11. \u547d\u540d\u5b9e\u4f53\u8bc6\u522b\uff08NER\uff09<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u547d\u540d\u5b9e\u4f53\u8bc6\u522b\u7528\u4e8e\u8bc6\u522b\u6587\u672c\u4e2d\u7684\u7279\u5b9a\u5b9e\u4f53\uff0c\u5982\u4eba\u540d\u3001\u5730\u540d\u7b49\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import&nbsp;spacy<br><br><em>#&nbsp;\u52a0\u8f7d\u9884\u8bad\u7ec3\u6a21\u578b<\/em><br>nlp&nbsp;=&nbsp;spacy.load(\"en_core_web_sm\")<br><br><em>#&nbsp;\u793a\u4f8b\u6587\u672c<\/em><br>text&nbsp;=&nbsp;\"Apple&nbsp;is&nbsp;looking&nbsp;at&nbsp;buying&nbsp;U.K.&nbsp;startup&nbsp;for&nbsp;$1&nbsp;billion.\"<br><br><em>#&nbsp;\u5904\u7406\u6587\u672c<\/em><br>doc&nbsp;=&nbsp;nlp(text)<br><br><em>#&nbsp;\u63d0\u53d6\u5b9e\u4f53<\/em><br>for&nbsp;ent&nbsp;in&nbsp;doc.ents:<br>&nbsp;&nbsp;&nbsp;&nbsp;print(ent.text,&nbsp;ent.label_)<br><br><em>#&nbsp;\u8f93\u51fa:<\/em><br><em>#&nbsp;Apple&nbsp;ORG<\/em><br><em>#&nbsp;U.K.&nbsp;GPE<\/em><br><em>#&nbsp;$1&nbsp;billion&nbsp;MONEY<\/em><\/code><\/pre>\n\n\n\n<p><strong>\u89e3\u91ca\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>spacy<\/code> \u5e93\u8fdb\u884c\u547d\u540d\u5b9e\u4f53\u8bc6\u522b\u3002<\/li>\n\n\n\n<li>\u63d0\u53d6\u6587\u672c\u4e2d\u7684\u5b9e\u4f53\u53ca\u5176\u7c7b\u578b\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"12_%E6%9C%BA%E5%99%A8%E7%BF%BB%E8%AF%91\"><\/span><strong>12. \u673a\u5668\u7ffb\u8bd1<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u673a\u5668\u7ffb\u8bd1\u7528\u4e8e\u5c06\u4e00\u79cd\u8bed\u8a00\u7684\u6587\u672c\u8f6c\u6362\u4e3a\u53e6\u4e00\u79cd\u8bed\u8a00\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>from&nbsp;googletrans&nbsp;import&nbsp;Translator<br><br><em>#&nbsp;\u521b\u5efa\u7ffb\u8bd1\u5668\u5bf9\u8c61<\/em><br>translator&nbsp;=&nbsp;Translator()<br><br><em>#&nbsp;\u793a\u4f8b\u6587\u672c<\/em><br>text&nbsp;=&nbsp;\"Hello,&nbsp;how&nbsp;are&nbsp;you?\"<br><br><em>#&nbsp;\u7ffb\u8bd1\u6587\u672c<\/em><br>translated_text&nbsp;=&nbsp;translator.translate(text,&nbsp;src='en',&nbsp;dest='fr')<br><br>print(translated_text.text)&nbsp;&nbsp;<em>#&nbsp;\u8f93\u51fa:&nbsp;Bonjour,&nbsp;comment&nbsp;\u00e7a&nbsp;va&nbsp;?<\/em><\/code><\/pre>\n\n\n\n<p><strong>\u89e3\u91ca\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>googletrans<\/code> \u5e93\u8fdb\u884c\u6587\u672c\u7ffb\u8bd1\u3002<\/li>\n\n\n\n<li>\u5c06\u82f1\u6587\u6587\u672c\u7ffb\u8bd1\u6210\u6cd5\u6587\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"13_%E6%96%87%E6%9C%AC%E6%91%98%E8%A6%81\"><\/span><strong>13. \u6587\u672c\u6458\u8981<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u6587\u672c\u6458\u8981\u662f\u751f\u6210\u6587\u672c\u7684\u7b80\u6d01\u7248\u672c\uff0c\u4fdd\u7559\u4e3b\u8981\u4fe1\u606f\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>from&nbsp;transformers&nbsp;import&nbsp;pipeline<br><br><em>#&nbsp;\u521b\u5efa\u6458\u8981\u751f\u6210\u5668<\/em><br>summarizer&nbsp;=&nbsp;pipeline(\"summarization\")<br><br><em>#&nbsp;\u793a\u4f8b\u6587\u672c<\/em><br>text&nbsp;=&nbsp;\"\"\"<br>Natural&nbsp;language&nbsp;processing&nbsp;(NLP)&nbsp;is&nbsp;a&nbsp;subfield&nbsp;of&nbsp;linguistics,&nbsp;computer&nbsp;science,&nbsp;<br>and&nbsp;artificial&nbsp;intelligence&nbsp;concerned&nbsp;with&nbsp;the&nbsp;interactions&nbsp;between&nbsp;computers&nbsp;and&nbsp;<br>human&nbsp;(natural)&nbsp;languages.&nbsp;As&nbsp;such,&nbsp;NLP&nbsp;is&nbsp;related&nbsp;to&nbsp;the&nbsp;area&nbsp;of&nbsp;human\u2013computer&nbsp;interaction.<br>Many&nbsp;challenges&nbsp;in&nbsp;NLP&nbsp;involve&nbsp;natural&nbsp;language&nbsp;understanding,&nbsp;that&nbsp;is,&nbsp;enabling&nbsp;computers&nbsp;<br>to&nbsp;derive&nbsp;meaning&nbsp;from&nbsp;human&nbsp;or&nbsp;natural&nbsp;language&nbsp;input,&nbsp;and&nbsp;others&nbsp;involve&nbsp;natural&nbsp;language&nbsp;<br>generation.<br>\"\"\"<br><br><em>#&nbsp;\u751f\u6210\u6458\u8981<\/em><br>summary&nbsp;=&nbsp;summarizer(text,&nbsp;max_length=100,&nbsp;min_length=30,&nbsp;do_sample=False)<br><br>print(summary&#91;0]&#91;'summary_text'])<\/code><\/pre>\n\n\n\n<p>************************************************### Python \u5728\u81ea\u7136\u8bed\u8a00\u5904\u7406\uff08NLP\uff09\u4e2d\u768413\u4e2a\u5e94\u7528\u5b9e\u4f8b\uff08\u7eed\uff09<\/p>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"14_%E8%AF%8D%E4%BA%91%E7%94%9F%E6%88%90\"><\/span><strong>14. \u8bcd\u4e91\u751f\u6210<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u8bcd\u4e91\u662f\u4e00\u79cd\u53ef\u89c6\u5316\u5de5\u5177\uff0c\u53ef\u4ee5\u76f4\u89c2\u5730\u5c55\u793a\u6587\u672c\u4e2d\u6700\u5e38\u51fa\u73b0\u7684\u8bcd\u6c47\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>from&nbsp;wordcloud&nbsp;import&nbsp;WordCloud<br>import&nbsp;matplotlib.pyplot&nbsp;as&nbsp;plt<br><br><em>#&nbsp;\u793a\u4f8b\u6587\u672c<\/em><br>text&nbsp;=&nbsp;\"Natural&nbsp;language&nbsp;processing&nbsp;(NLP)&nbsp;is&nbsp;a&nbsp;subfield&nbsp;of&nbsp;linguistics,&nbsp;computer&nbsp;science,&nbsp;and&nbsp;artificial&nbsp;intelligence&nbsp;concerned&nbsp;with&nbsp;the&nbsp;interactions&nbsp;between&nbsp;computers&nbsp;and&nbsp;human&nbsp;(natural)&nbsp;languages.\"<br><br><em>#&nbsp;\u751f\u6210\u8bcd\u4e91<\/em><br>wordcloud&nbsp;=&nbsp;WordCloud(width=800,&nbsp;height=400,&nbsp;background_color='white').generate(text)<br><br><em>#&nbsp;\u663e\u793a\u8bcd\u4e91<\/em><br>plt.figure(figsize=(10,&nbsp;5))<br>plt.imshow(wordcloud,&nbsp;interpolation='bilinear')<br>plt.axis('off')<br>plt.show()<\/code><\/pre>\n\n\n\n<p><strong>\u89e3\u91ca\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>wordcloud<\/code> \u5e93\u751f\u6210\u8bcd\u4e91\u3002<\/li>\n\n\n\n<li>\u8bbe\u7f6e\u8bcd\u4e91\u7684\u5bbd\u5ea6\u3001\u9ad8\u5ea6\u548c\u80cc\u666f\u989c\u8272\u3002<\/li>\n\n\n\n<li>\u4f7f\u7528 <code>matplotlib<\/code> \u663e\u793a\u8bcd\u4e91\u56fe\u50cf\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"15_%E9%97%AE%E7%AD%94%E7%B3%BB%E7%BB%9F\"><\/span><strong>15. \u95ee\u7b54\u7cfb\u7edf<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u95ee\u7b54\u7cfb\u7edf\u7528\u4e8e\u56de\u7b54\u7528\u6237\u63d0\u51fa\u7684\u95ee\u9898\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>from&nbsp;transformers&nbsp;import&nbsp;pipeline<br><br><em>#&nbsp;\u521b\u5efa\u95ee\u7b54\u6a21\u578b<\/em><br>qa_pipeline&nbsp;=&nbsp;pipeline(\"question-answering\",&nbsp;model=\"distilbert-base-cased-distilled-squad\")<br><br><em>#&nbsp;\u793a\u4f8b\u95ee\u9898\u548c\u4e0a\u4e0b\u6587<\/em><br>context&nbsp;=&nbsp;\"Natural&nbsp;language&nbsp;processing&nbsp;(NLP)&nbsp;is&nbsp;a&nbsp;subfield&nbsp;of&nbsp;linguistics,&nbsp;computer&nbsp;science,&nbsp;and&nbsp;artificial&nbsp;intelligence&nbsp;concerned&nbsp;with&nbsp;the&nbsp;interactions&nbsp;between&nbsp;computers&nbsp;and&nbsp;human&nbsp;(natural)&nbsp;languages.\"<br>question&nbsp;=&nbsp;\"What&nbsp;is&nbsp;NLP?\"<br><br><em>#&nbsp;\u751f\u6210\u7b54\u6848<\/em><br>answer&nbsp;=&nbsp;qa_pipeline(question=question,&nbsp;context=context)<br><br>print(answer&#91;'answer'])&nbsp;&nbsp;<em>#&nbsp;\u8f93\u51fa:&nbsp;Natural&nbsp;language&nbsp;processing&nbsp;(NLP)&nbsp;is&nbsp;a&nbsp;subfield&nbsp;of&nbsp;linguistics,&nbsp;computer&nbsp;science,&nbsp;and&nbsp;artificial&nbsp;intelligence&nbsp;concerned&nbsp;with&nbsp;the&nbsp;interactions&nbsp;between&nbsp;computers&nbsp;and&nbsp;human&nbsp;(natural)&nbsp;languages.<\/em><\/code><\/pre>\n\n\n\n<p><strong>\u89e3\u91ca\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>transformers<\/code> \u5e93\u521b\u5efa\u95ee\u7b54\u6a21\u578b\u3002<\/li>\n\n\n\n<li>\u63d0\u4f9b\u95ee\u9898\u548c\u4e0a\u4e0b\u6587\u6587\u672c\u3002<\/li>\n\n\n\n<li>\u751f\u6210\u7b54\u6848\u5e76\u6253\u5370\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"16_%E4%BF%A1%E6%81%AF%E6%8A%BD%E5%8F%96\"><\/span><strong>16. \u4fe1\u606f\u62bd\u53d6<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u4fe1\u606f\u62bd\u53d6\u662f\u4ece\u975e\u7ed3\u6784\u5316\u6587\u672c\u4e2d\u63d0\u53d6\u6709\u7528\u4fe1\u606f\u7684\u8fc7\u7a0b\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>from&nbsp;transformers&nbsp;import&nbsp;pipeline<br><br><em>#&nbsp;\u521b\u5efa\u4fe1\u606f\u62bd\u53d6\u6a21\u578b<\/em><br>ner_pipeline&nbsp;=&nbsp;pipeline(\"ner\",&nbsp;model=\"dbmdz\/bert-large-cuneiform-sumerian-ner\")<br><br><em>#&nbsp;\u793a\u4f8b\u6587\u672c<\/em><br>text&nbsp;=&nbsp;\"Sargon&nbsp;was&nbsp;a&nbsp;king&nbsp;of&nbsp;Akkad.\"<br><br><em>#&nbsp;\u63d0\u53d6\u4fe1\u606f<\/em><br>entities&nbsp;=&nbsp;ner_pipeline(text)<br><br>print(entities)<br><em>#&nbsp;\u8f93\u51fa:<\/em><br><em>#&nbsp;&#91;{'entity':&nbsp;'B-PER',&nbsp;'score':&nbsp;0.9999799728393555,&nbsp;'index':&nbsp;0,&nbsp;'word':&nbsp;'Sargon',&nbsp;'start':&nbsp;0,&nbsp;'end':&nbsp;6},<\/em><br><em>#&nbsp;&nbsp;{'entity':&nbsp;'B-LOC',&nbsp;'score':&nbsp;0.9999675750732422,&nbsp;'index':&nbsp;5,&nbsp;'word':&nbsp;'Akkad',&nbsp;'start':&nbsp;14,&nbsp;'end':&nbsp;19}]<\/em><\/code><\/pre>\n\n\n\n<p><strong>\u89e3\u91ca\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>transformers<\/code> \u5e93\u521b\u5efa\u4fe1\u606f\u62bd\u53d6\u6a21\u578b\u3002<\/li>\n\n\n\n<li>\u63d0\u53d6\u6587\u672c\u4e2d\u7684\u5b9e\u4f53\u53ca\u5176\u7c7b\u578b\u3002<\/li>\n\n\n\n<li>\u6253\u5370\u63d0\u53d6\u7ed3\u679c\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"17_%E5%85%B3%E7%B3%BB%E6%8A%BD%E5%8F%96\"><\/span><strong>17. \u5173\u7cfb\u62bd\u53d6<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u5173\u7cfb\u62bd\u53d6\u662f\u4ece\u6587\u672c\u4e2d\u8bc6\u522b\u5b9e\u4f53\u4e4b\u95f4\u7684\u5173\u7cfb\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>from&nbsp;transformers&nbsp;import&nbsp;pipeline<br><br><em>#&nbsp;\u521b\u5efa\u5173\u7cfb\u62bd\u53d6\u6a21\u578b<\/em><br>re_pipeline&nbsp;=&nbsp;pipeline(\"relation-extraction\",&nbsp;model=\"joeddav\/xlm-roberta-large-xnli\")<br><br><em>#&nbsp;\u793a\u4f8b\u6587\u672c<\/em><br>text&nbsp;=&nbsp;\"Sargon&nbsp;was&nbsp;a&nbsp;king&nbsp;of&nbsp;Akkad.\"<br><br><em>#&nbsp;\u5b9a\u4e49\u5b9e\u4f53\u5bf9<\/em><br>entity_pairs&nbsp;=&nbsp;&#91;<br>&nbsp;&nbsp;&nbsp;&nbsp;{\"entity\":&nbsp;\"Sargon\",&nbsp;\"offset\":&nbsp;(0,&nbsp;6)},<br>&nbsp;&nbsp;&nbsp;&nbsp;{\"entity\":&nbsp;\"king\",&nbsp;\"offset\":&nbsp;(10,&nbsp;14)},<br>&nbsp;&nbsp;&nbsp;&nbsp;{\"entity\":&nbsp;\"Akkad\",&nbsp;\"offset\":&nbsp;(17,&nbsp;22)}<br>]<br><br><em>#&nbsp;\u63d0\u53d6\u5173\u7cfb<\/em><br>relations&nbsp;=&nbsp;re_pipeline(text,&nbsp;entity_pairs)<br><br>print(relations)<br><em>#&nbsp;\u8f93\u51fa:<\/em><br><em>#&nbsp;&#91;{'score':&nbsp;0.9999675750732422,&nbsp;'entity':&nbsp;'was&nbsp;a',&nbsp;'label':&nbsp;'is_a',&nbsp;'entity_pair':&nbsp;{'entity_0':&nbsp;'Sargon',&nbsp;'entity_1':&nbsp;'king'},&nbsp;'index':&nbsp;0,&nbsp;'confidence':&nbsp;0.9999675750732422}]<\/em><\/code><\/pre>\n\n\n\n<p><strong>\u89e3\u91ca\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>transformers<\/code> \u5e93\u521b\u5efa\u5173\u7cfb\u62bd\u53d6\u6a21\u578b\u3002<\/li>\n\n\n\n<li>\u5b9a\u4e49\u5b9e\u4f53\u5bf9\u3002<\/li>\n\n\n\n<li>\u63d0\u53d6\u5b9e\u4f53\u4e4b\u95f4\u7684\u5173\u7cfb\u3002<\/li>\n\n\n\n<li>\u6253\u5370\u63d0\u53d6\u7ed3\u679c\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"18_%E6%96%87%E6%9C%AC%E8%81%9A%E7%B1%BB\"><\/span><strong>18. \u6587\u672c\u805a\u7c7b<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u6587\u672c\u805a\u7c7b\u662f\u5c06\u76f8\u4f3c\u7684\u6587\u6863\u5f52\u4e3a\u4e00\u7c7b\u7684\u8fc7\u7a0b\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>from&nbsp;sklearn.feature_extraction.text&nbsp;import&nbsp;TfidfVectorizer<br>from&nbsp;sklearn.cluster&nbsp;import&nbsp;KMeans<br>from&nbsp;sklearn.metrics&nbsp;import&nbsp;silhouette_score<br><br><em>#&nbsp;\u793a\u4f8b\u6587\u672c<\/em><br>documents&nbsp;=&nbsp;&#91;<br>&nbsp;&nbsp;&nbsp;&nbsp;\"Human&nbsp;machine&nbsp;interface&nbsp;for&nbsp;lab&nbsp;abc&nbsp;computer&nbsp;applications\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"A&nbsp;survey&nbsp;of&nbsp;user&nbsp;opinion&nbsp;of&nbsp;computer&nbsp;system&nbsp;response&nbsp;time\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"The&nbsp;EPS&nbsp;user&nbsp;interface&nbsp;management&nbsp;system\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"System&nbsp;and&nbsp;human&nbsp;system&nbsp;engineering&nbsp;testing&nbsp;of&nbsp;EPS\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"Relation&nbsp;of&nbsp;user&nbsp;perceived&nbsp;response&nbsp;time&nbsp;to&nbsp;error&nbsp;measurement\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"The&nbsp;generation&nbsp;of&nbsp;random&nbsp;binary&nbsp;unordered&nbsp;trees\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"The&nbsp;intersection&nbsp;graph&nbsp;of&nbsp;paths&nbsp;in&nbsp;trees\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"Graph&nbsp;minors&nbsp;IV&nbsp;Widths&nbsp;of&nbsp;trees&nbsp;and&nbsp;well&nbsp;quasi&nbsp;ordering\",<br>&nbsp;&nbsp;&nbsp;&nbsp;\"Graph&nbsp;minors&nbsp;A&nbsp;survey\"<br>]<br><br><em>#&nbsp;TF-IDF&nbsp;\u5411\u91cf\u5316<\/em><br>vectorizer&nbsp;=&nbsp;TfidfVectorizer()<br>X&nbsp;=&nbsp;vectorizer.fit_transform(documents)<br><br><em>#&nbsp;K-Means&nbsp;\u805a\u7c7b<\/em><br>kmeans&nbsp;=&nbsp;KMeans(n_clusters=2,&nbsp;random_state=42)<br>kmeans.fit(X)<br><br><em>#&nbsp;\u8bc4\u4f30\u805a\u7c7b\u8d28\u91cf<\/em><br>silhouette_avg&nbsp;=&nbsp;silhouette_score(X,&nbsp;kmeans.labels_)<br>print(f\"Silhouette&nbsp;Score:&nbsp;{silhouette_avg:.2f}\")<br><br><em>#&nbsp;\u6253\u5370\u805a\u7c7b\u7ed3\u679c<\/em><br>for&nbsp;i,&nbsp;doc&nbsp;in&nbsp;enumerate(documents):<br>&nbsp;&nbsp;&nbsp;&nbsp;print(f\"{doc}&nbsp;-&gt;&nbsp;Cluster&nbsp;{kmeans.labels_&#91;i]}\")<\/code><\/pre>\n\n\n\n<p><strong>\u89e3\u91ca\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>TfidfVectorizer<\/code> \u5bf9\u6587\u6863\u8fdb\u884c TF-IDF \u5411\u91cf\u5316\u3002<\/li>\n\n\n\n<li>\u4f7f\u7528 <code>KMeans<\/code> \u8fdb\u884c\u805a\u7c7b\u3002<\/li>\n\n\n\n<li>\u8bc4\u4f30\u805a\u7c7b\u8d28\u91cf\u3002<\/li>\n\n\n\n<li>\u6253\u5370\u6bcf\u4e2a\u6587\u6863\u7684\u805a\u7c7b\u7ed3\u679c\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"19_%E4%BA%8B%E4%BB%B6%E6%A3%80%E6%B5%8B\"><\/span><strong>19. \u4e8b\u4ef6\u68c0\u6d4b<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u4e8b\u4ef6\u68c0\u6d4b\u662f\u4ece\u6587\u672c\u4e2d\u8bc6\u522b\u7279\u5b9a\u4e8b\u4ef6\u7684\u8fc7\u7a0b\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>from&nbsp;transformers&nbsp;import&nbsp;pipeline<br><br><em>#&nbsp;\u521b\u5efa\u4e8b\u4ef6\u68c0\u6d4b\u6a21\u578b<\/em><br>event_pipeline&nbsp;=&nbsp;pipeline(\"event-extraction\",&nbsp;model=\"microsoft\/layoutlmv2-base-uncased-finetuned-funsd\")<br><br><em>#&nbsp;\u793a\u4f8b\u6587\u672c<\/em><br>text&nbsp;=&nbsp;\"The&nbsp;company&nbsp;announced&nbsp;a&nbsp;new&nbsp;product&nbsp;launch&nbsp;on&nbsp;Monday.\"<br><br><em>#&nbsp;\u4e8b\u4ef6\u68c0\u6d4b<\/em><br>events&nbsp;=&nbsp;event_pipeline(text)<br><br>print(events)<br><em>#&nbsp;\u8f93\u51fa:<\/em><br><em>#&nbsp;&#91;{'event_type':&nbsp;'Product&nbsp;Launch',&nbsp;'trigger':&nbsp;'launch',&nbsp;'trigger_start':&nbsp;35,&nbsp;'trigger_end':&nbsp;40,&nbsp;'arguments':&nbsp;&#91;{'entity':&nbsp;'company',&nbsp;'entity_start':&nbsp;4,&nbsp;'entity_end':&nbsp;10,&nbsp;'role':&nbsp;'Company'},&nbsp;{'entity':&nbsp;'Monday',&nbsp;'entity_start':&nbsp;38,&nbsp;'entity_end':&nbsp;44,&nbsp;'role':&nbsp;'Date'}]}]<\/em><\/code><\/pre>\n\n\n\n<p><strong>\u89e3\u91ca\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>transformers<\/code> \u5e93\u521b\u5efa\u4e8b\u4ef6\u68c0\u6d4b\u6a21\u578b\u3002<\/li>\n\n\n\n<li>\u63d0\u53d6\u6587\u672c\u4e2d\u7684\u4e8b\u4ef6\u53ca\u5176\u89e6\u53d1\u8bcd\u548c\u53c2\u6570\u3002<\/li>\n\n\n\n<li>\u6253\u5370\u4e8b\u4ef6\u68c0\u6d4b\u7ed3\u679c\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"20_%E8%AF%8D%E6%80%A7%E6%A0%87%E6%B3%A8\"><\/span><strong>20. \u8bcd\u6027\u6807\u6ce8<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u8bcd\u6027\u6807\u6ce8\u662f\u5c06\u6587\u672c\u4e2d\u7684\u6bcf\u4e2a\u5355\u8bcd\u6807\u8bb0\u4e3a\u5176\u5bf9\u5e94\u7684\u8bcd\u6027\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>from&nbsp;nltk&nbsp;import&nbsp;pos_tag<br>from&nbsp;nltk.tokenize&nbsp;import&nbsp;word_tokenize<br><br><em>#&nbsp;\u793a\u4f8b\u6587\u672c<\/em><br>text&nbsp;=&nbsp;\"John&nbsp;likes&nbsp;to&nbsp;watch&nbsp;movies.&nbsp;Mary&nbsp;likes&nbsp;movies&nbsp;too.\"<br><br><em>#&nbsp;\u5206\u8bcd<\/em><br>tokens&nbsp;=&nbsp;word_tokenize(text)<br><br><em>#&nbsp;\u8bcd\u6027\u6807\u6ce8<\/em><br>tagged_tokens&nbsp;=&nbsp;pos_tag(tokens)<br><br>print(tagged_tokens)<br><em>#&nbsp;\u8f93\u51fa:<\/em><br><em>#&nbsp;&#91;('John',&nbsp;'NNP'),&nbsp;('likes',&nbsp;'VBZ'),&nbsp;('to',&nbsp;'TO'),&nbsp;('watch',&nbsp;'VB'),&nbsp;('movies',&nbsp;'NNS'),&nbsp;('.',&nbsp;'.'),&nbsp;('Mary',&nbsp;'NNP'),&nbsp;('likes',&nbsp;'VBZ'),&nbsp;('movies',&nbsp;'NNS'),&nbsp;('too',&nbsp;'RB'),&nbsp;('.',&nbsp;'.')]<\/em><\/code><\/pre>\n\n\n\n<p><strong>\u89e3\u91ca\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>nltk<\/code> \u5e93\u8fdb\u884c\u5206\u8bcd\u3002<\/li>\n\n\n\n<li>\u4f7f\u7528 <code>pos_tag<\/code> \u8fdb\u884c\u8bcd\u6027\u6807\u6ce8\u3002<\/li>\n\n\n\n<li>\u6253\u5370\u6807\u6ce8\u7ed3\u679c\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"21_%E4%BE%9D%E5%AD%98%E5%8F%A5%E6%B3%95%E5%88%86%E6%9E%90\"><\/span><strong>21. \u4f9d\u5b58\u53e5\u6cd5\u5206\u6790<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u4f9d\u5b58\u53e5\u6cd5\u5206\u6790\u662f\u5206\u6790\u53e5\u5b50\u4e2d\u8bcd\u4e0e\u8bcd\u4e4b\u95f4\u7684\u4f9d\u5b58\u5173\u7cfb\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import&nbsp;spacy<br><br><em>#&nbsp;\u52a0\u8f7d\u9884\u8bad\u7ec3\u6a21\u578b<\/em><br>nlp&nbsp;=&nbsp;spacy.load(\"en_core_web_sm\")<br><br><em>#&nbsp;\u793a\u4f8b\u6587\u672c<\/em><br>text&nbsp;=&nbsp;\"John&nbsp;likes&nbsp;to&nbsp;watch&nbsp;movies.&nbsp;Mary&nbsp;likes&nbsp;movies&nbsp;too.\"<br><br><em>#&nbsp;\u5904\u7406\u6587\u672c<\/em><br>doc&nbsp;=&nbsp;nlp(text)<br><br><em>#&nbsp;\u4f9d\u5b58\u53e5\u6cd5\u5206\u6790<\/em><br>for&nbsp;token&nbsp;in&nbsp;doc:<br>&nbsp;&nbsp;&nbsp;&nbsp;print(token.text,&nbsp;token.dep_,&nbsp;token.head.text,&nbsp;token.head.pos_,<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&#91;child&nbsp;for&nbsp;child&nbsp;in&nbsp;token.children])<br><br><em>#&nbsp;\u8f93\u51fa:<\/em><br><em>#&nbsp;John&nbsp;nsubj&nbsp;likes&nbsp;VERB&nbsp;&#91;]<\/em><br><em>#&nbsp;likes&nbsp;ROOT&nbsp;likes&nbsp;VERB&nbsp;&#91;to]<\/em><br><em>#&nbsp;to&nbsp;mark&nbsp;likes&nbsp;VERB&nbsp;&#91;watch]<\/em><br><em>#&nbsp;watch&nbsp;xcomp&nbsp;likes&nbsp;VERB&nbsp;&#91;]<\/em><br><em>#&nbsp;movies&nbsp;dobj&nbsp;likes&nbsp;VERB&nbsp;&#91;]<\/em><br><em>#&nbsp;.&nbsp;punct&nbsp;likes&nbsp;PUNCT&nbsp;&#91;]<\/em><br><em>#&nbsp;Mary&nbsp;nsubj&nbsp;likes&nbsp;VERB&nbsp;&#91;]<\/em><br><em>#&nbsp;likes&nbsp;ROOT&nbsp;likes&nbsp;VERB&nbsp;&#91;]<\/em><br><em>#&nbsp;movies&nbsp;dobj&nbsp;likes&nbsp;VERB&nbsp;&#91;]<\/em><br><em>#&nbsp;too&nbsp;advmod&nbsp;likes&nbsp;VERB&nbsp;&#91;]<\/em><br><em>#&nbsp;.&nbsp;punct&nbsp;likes&nbsp;PUNCT&nbsp;&#91;]<\/em><\/code><\/pre>\n\n\n\n<p><strong>\u89e3\u91ca\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>spacy<\/code> \u5e93\u8fdb\u884c\u4f9d\u5b58\u53e5\u6cd5\u5206\u6790\u3002<\/li>\n\n\n\n<li>\u6253\u5370\u6bcf\u4e2a\u8bcd\u7684\u4f9d\u5b58\u5173\u7cfb\u53ca\u5176\u7236\u8282\u70b9\u548c\u5b50\u8282\u70b9\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"22_%E8%AF%AD%E6%B3%95%E6%A0%91%E6%9E%84%E5%BB%BA\"><\/span><strong>22. \u8bed\u6cd5\u6811\u6784\u5efa<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u8bed\u6cd5\u6811\u6784\u5efa\u662f\u5c06\u53e5\u5b50\u7684\u8bed\u6cd5\u7ed3\u6784\u8868\u793a\u4e3a\u6811\u72b6\u7ed3\u6784\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import&nbsp;nltk<br>from&nbsp;nltk&nbsp;import&nbsp;Tree<br><br><em>#&nbsp;\u793a\u4f8b\u6587\u672c<\/em><br>text&nbsp;=&nbsp;\"John&nbsp;likes&nbsp;to&nbsp;watch&nbsp;movies.&nbsp;Mary&nbsp;likes&nbsp;movies&nbsp;too.\"<br><br><em>#&nbsp;\u5206\u8bcd<\/em><br>tokens&nbsp;=&nbsp;nltk.word_tokenize(text)<br><br><em>#&nbsp;\u8bcd\u6027\u6807\u6ce8<\/em><br>tagged_tokens&nbsp;=&nbsp;nltk.pos_tag(tokens)<br><br><em>#&nbsp;\u6784\u5efa\u8bed\u6cd5\u6811<\/em><br>grammar&nbsp;=&nbsp;\"NP:&nbsp;{&lt;DT&gt;?&lt;JJ&gt;*&lt;NN&gt;}\"<br>cp&nbsp;=&nbsp;nltk.RegexpParser(grammar)<br>result&nbsp;=&nbsp;cp.parse(tagged_tokens)<br><br><em>#&nbsp;\u663e\u793a\u8bed\u6cd5\u6811<\/em><br>result.draw()<\/code><\/pre>\n\n\n\n<p><strong>\u89e3\u91ca\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>nltk<\/code> \u5e93\u8fdb\u884c\u5206\u8bcd\u548c\u8bcd\u6027\u6807\u6ce8\u3002<\/li>\n\n\n\n<li>\u4f7f\u7528\u6b63\u5219\u8868\u8fbe\u5f0f\u6784\u5efa\u8bed\u6cd5\u6811\u3002<\/li>\n\n\n\n<li>\u4f7f\u7528 <code>draw<\/code> \u65b9\u6cd5\u663e\u793a\u8bed\u6cd5\u6811\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"23_%E8%AF%8D%E6%80%A7%E8%BD%AC%E6%8D%A2\"><\/span><strong>23. \u8bcd\u6027\u8f6c\u6362<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u8bcd\u6027\u8f6c\u6362\u662f\u5c06\u4e00\u4e2a\u8bcd\u4ece\u4e00\u79cd\u8bcd\u6027\u8f6c\u6362\u4e3a\u53e6\u4e00\u79cd\u8bcd\u6027\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>from&nbsp;nltk.stem&nbsp;import&nbsp;WordNetLemmatizer<br>from&nbsp;nltk.corpus&nbsp;import&nbsp;wordnet<br><br><em>#&nbsp;\u793a\u4f8b\u6587\u672c<\/em><br>text&nbsp;=&nbsp;\"running&nbsp;dogs&nbsp;are&nbsp;barking&nbsp;loudly.\"<br><br><em>#&nbsp;\u5206\u8bcd<\/em><br>tokens&nbsp;=&nbsp;text.split()<br><br><em>#&nbsp;\u8bcd\u6027\u8f6c\u6362<\/em><br>lemmatizer&nbsp;=&nbsp;WordNetLemmatizer()<br>converted_tokens&nbsp;=&nbsp;&#91;]<br><br>for&nbsp;token&nbsp;in&nbsp;tokens:<br>&nbsp;&nbsp;&nbsp;&nbsp;<em>#&nbsp;\u83b7\u53d6\u8bcd\u6027<\/em><br>&nbsp;&nbsp;&nbsp;&nbsp;pos&nbsp;=&nbsp;wordnet.NOUN&nbsp;if&nbsp;token.endswith('ing')&nbsp;else&nbsp;wordnet.VERB<br>&nbsp;&nbsp;&nbsp;&nbsp;converted_token&nbsp;=&nbsp;lemmatizer.lemmatize(token,&nbsp;pos=pos)<br>&nbsp;&nbsp;&nbsp;&nbsp;converted_tokens.append(converted_token)<br><br>print(converted_tokens)<br><em>#&nbsp;\u8f93\u51fa:<\/em><br><em>#&nbsp;&#91;'run',&nbsp;'dog',&nbsp;'are',&nbsp;'bark',&nbsp;'loudli',&nbsp;'.']<\/em><\/code><\/pre>\n\n\n\n<p><strong>\u89e3\u91ca\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>WordNetLemmatizer<\/code> \u8fdb\u884c\u8bcd\u6027\u8f6c\u6362\u3002<\/li>\n\n\n\n<li>\u6839\u636e\u8bcd\u5c3e\u5224\u65ad\u8bcd\u6027\u3002<\/li>\n\n\n\n<li>\u6253\u5370\u8f6c\u6362\u540e\u7684\u7ed3\u679c\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"%E5%AE%9E%E6%88%98%E6%A1%88%E4%BE%8B%EF%BC%9A%E6%83%85%E6%84%9F%E5%88%86%E6%9E%90%E5%9C%A8%E7%94%B5%E5%95%86%E8%AF%84%E8%AE%BA%E4%B8%AD%E7%9A%84%E5%BA%94%E7%94%A8\"><\/span><strong>\u5b9e\u6218\u6848\u4f8b\uff1a\u60c5\u611f\u5206\u6790\u5728\u7535\u5546\u8bc4\u8bba\u4e2d\u7684\u5e94\u7528<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p>\u5047\u8bbe\u6211\u4eec\u6b63\u5728\u4e3a\u4e00\u5bb6\u7535\u5546\u5e73\u53f0\u5f00\u53d1\u4e00\u4e2a\u60c5\u611f\u5206\u6790\u7cfb\u7edf\uff0c\u7528\u4e8e\u81ea\u52a8\u5206\u6790\u7528\u6237\u8bc4\u8bba\u7684\u60c5\u611f\u503e\u5411\u3002\u5177\u4f53\u6b65\u9aa4\u5982\u4e0b\uff1a<\/p>\n\n\n\n<p><strong>1.<\/strong> <strong>\u6570\u636e\u6536\u96c6<\/strong>\uff1a<\/p>\n\n\n\n<ul>\n<li>\u6536\u96c6\u7535\u5546\u5e73\u53f0\u4e0a\u7684\u7528\u6237\u8bc4\u8bba\u6570\u636e\u3002<\/li>\n<\/ul>\n\n\n\n<p><strong>2.<\/strong> <strong>\u6570\u636e\u9884\u5904\u7406<\/strong>\uff1a<\/p>\n\n\n\n<ul>\n<li>\u6e05\u6d17\u6587\u672c\u6570\u636e\uff0c\u53bb\u9664\u65e0\u5173\u4fe1\u606f\u3002<\/li>\n\n\n\n<li>\u5206\u8bcd\u5e76\u53bb\u9664\u505c\u7528\u8bcd\u3002<\/li>\n<\/ul>\n\n\n\n<p><strong>3.<\/strong> <strong>\u60c5\u611f\u5206\u6790<\/strong>\uff1a<\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>SentimentIntensityAnalyzer<\/code> \u8fdb\u884c\u60c5\u611f\u5206\u6790\u3002<\/li>\n\n\n\n<li>\u8ba1\u7b97\u6bcf\u4e2a\u8bc4\u8bba\u7684\u60c5\u611f\u5f97\u5206\u3002<\/li>\n<\/ul>\n\n\n\n<p><strong>4.<\/strong> <strong>\u7ed3\u679c\u5c55\u793a<\/strong>\uff1a<\/p>\n\n\n\n<ul>\n<li>\u5c06\u5206\u6790\u7ed3\u679c\u53ef\u89c6\u5316\uff0c\u5c55\u793a\u6b63\u9762\u3001\u8d1f\u9762\u548c\u4e2d\u6027\u8bc4\u8bba\u7684\u6bd4\u4f8b\u3002<\/li>\n<\/ul>\n\n\n\n<p><strong>\u4ee3\u7801\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import&nbsp;pandas&nbsp;as&nbsp;pd<br>from&nbsp;nltk.sentiment&nbsp;import&nbsp;SentimentIntensityAnalyzer<br>import&nbsp;matplotlib.pyplot&nbsp;as&nbsp;plt<br><br><em>#&nbsp;\u52a0\u8f7d\u8bc4\u8bba\u6570\u636e<\/em><br>data&nbsp;=&nbsp;pd.read_csv('reviews.csv')<br>comments&nbsp;=&nbsp;data&#91;'comment'].tolist()<br><br><em>#&nbsp;\u60c5\u611f\u5206\u6790<\/em><br>sia&nbsp;=&nbsp;SentimentIntensityAnalyzer()<br><br>sentiments&nbsp;=&nbsp;&#91;]<br>for&nbsp;comment&nbsp;in&nbsp;comments:<br>&nbsp;&nbsp;&nbsp;&nbsp;sentiment_scores&nbsp;=&nbsp;sia.polarity_scores(comment)<br>&nbsp;&nbsp;&nbsp;&nbsp;sentiments.append(sentiment_scores&#91;'compound'])<br><br><em>#&nbsp;\u8ba1\u7b97\u60c5\u611f\u7c7b\u522b<\/em><br>positive_count&nbsp;=&nbsp;sum(1&nbsp;for&nbsp;score&nbsp;in&nbsp;sentiments&nbsp;if&nbsp;score&nbsp;&gt;&nbsp;0)<br>negative_count&nbsp;=&nbsp;sum(1&nbsp;for&nbsp;score&nbsp;in&nbsp;sentiments&nbsp;if&nbsp;score&nbsp;&lt;&nbsp;0)<br>neutral_count&nbsp;=&nbsp;sum(1&nbsp;for&nbsp;score&nbsp;in&nbsp;sentiments&nbsp;if&nbsp;score&nbsp;==&nbsp;0)<br><br><em>#&nbsp;\u53ef\u89c6\u5316\u7ed3\u679c<\/em><br>labels&nbsp;=&nbsp;&#91;'Positive',&nbsp;'Negative',&nbsp;'Neutral']<br>sizes&nbsp;=&nbsp;&#91;positive_count,&nbsp;negative_count,&nbsp;neutral_count]<br><br>plt.figure(figsize=(8,&nbsp;8))<br>plt.pie(sizes,&nbsp;labels=labels,&nbsp;autopct='%1.1f%%',&nbsp;startangle=140)<br>plt.title('Sentiment&nbsp;Analysis&nbsp;of&nbsp;Product&nbsp;Reviews')<br>plt.show()<\/code><\/pre>\n\n\n\n<p><strong>\u89e3\u91ca\uff1a<\/strong><\/p>\n\n\n\n<ul>\n<li>\u4f7f\u7528 <code>pandas<\/code> \u5e93\u52a0\u8f7d\u8bc4\u8bba\u6570\u636e\u3002<\/li>\n\n\n\n<li>\u4f7f\u7528 <code>SentimentIntensityAnalyzer<\/code> \u8ba1\u7b97\u6bcf\u4e2a\u8bc4\u8bba\u7684\u60c5\u611f\u5f97\u5206\u3002<\/li>\n\n\n\n<li>\u7edf\u8ba1\u6b63\u9762\u3001\u8d1f\u9762\u548c\u4e2d\u6027\u8bc4\u8bba\u7684\u6570\u91cf\u3002<\/li>\n\n\n\n<li>\u4f7f\u7528 <code>matplotlib<\/code> \u7ed8\u5236\u997c\u56fe\u5c55\u793a\u7ed3\u679c\u3002<\/li>\n<\/ul>\n","protected":false},"excerpt":{"rendered":"<p>1. \u6587\u672c\u6e05\u6d17 \u6587\u672c\u6e05\u6d17\u662f\u4efb\u4f55 NLP \u9879\u76ee\u7684\u7b2c\u4e00\u6b65\u3002\u5b83\u6d89\u53ca\u53bb\u9664\u4e0d\u9700\u8981\u7684\u4fe1\u606f\uff0c\u5982\u6807\u70b9\u7b26\u53f7\u3001\u6570\u5b57\u3001\u7279\u6b8a&hellip; <a href=\"http:\/\/viplao.com\/index.php\/2024\/11\/16\/python%e5%9f%ba%e7%a1%80%e6%8a%80%e8%83%bd-23%e4%b8%aapython%e5%9c%a8%e8%87%aa%e7%84%b6%e8%af%ad%e8%a8%80%e5%a4%84%e7%90%86%e4%b8%ad%e7%9a%84%e5%ba%94%e7%94%a8%e5%ae%9e%e4%be%8b\/\" class=\"more-link read-more\" rel=\"bookmark\">\u7ee7\u7eed\u9605\u8bfb <span class=\"screen-reader-text\">PYTHON\u57fa\u7840\u6280\u80fd \u2013\u00a023\u4e2aPython\u5728\u81ea\u7136\u8bed\u8a00\u5904\u7406\u4e2d\u7684\u5e94\u7528\u5b9e\u4f8b<\/span><i class=\"fa fa-arrow-right\"><\/i><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[1],"tags":[28],"views":1015,"_links":{"self":[{"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/posts\/2897"}],"collection":[{"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/comments?post=2897"}],"version-history":[{"count":1,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/posts\/2897\/revisions"}],"predecessor-version":[{"id":2898,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/posts\/2897\/revisions\/2898"}],"wp:attachment":[{"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/media?parent=2897"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/categories?post=2897"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/viplao.com\/index.php\/wp-json\/wp\/v2\/tags?post=2897"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}