{"id":3438,"date":"2023-06-13T10:30:37","date_gmt":"2023-06-13T02:30:37","guid":{"rendered":"http:\/\/cnliutz.ipyingshe.net\/?p=3438"},"modified":"2023-06-13T10:31:11","modified_gmt":"2023-06-13T02:31:11","slug":"pyrhon-ocr%e6%96%87%e5%ad%97%e8%af%86%e5%88%ab","status":"publish","type":"post","link":"http:\/\/g1n29wqq.ipyingshe.net:5347\/?p=3438","title":{"rendered":"pyrhon OCR\u6587\u5b57\u8bc6\u522b"},"content":{"rendered":"\n<pre class=\"wp-block-code\"><code>#\u5c06\u8bc6\u522b\u7684\u6587\u672c\u6574\u7406\u6210word\u3001txt\u683c\u5f0f\u7684\u6587\u4ef6\uff1a\n\nimport pytesseract\nfrom PIL import Image\nimport docx\nimport os\n\n# \u6253\u5f00\u56fe\u7247\nimg = Image.open('example.png')\n\n# \u8bc6\u522b\u56fe\u7247\u4e2d\u7684\u6587\u672c\ntext = pytesseract.image_to_string(img, lang='eng')\n\n# \u53bb\u9664\u591a\u4f59\u7684\u7a7a\u683c\u548c\u6362\u884c\u7b26\ntext = ' '.join(text.split())\ntext = text.replace('\\n', ' ')\n\n# \u5c06\u6587\u672c\u6309\u7167\u6bb5\u843d\u8fdb\u884c\u5206\u5272\nparagraphs = text.split('\\n\\n')\n\n# \u5c06\u6587\u672c\u6574\u7406\u6210word\u683c\u5f0f\u7684\u6587\u4ef6\ndoc = docx.Document()\nfor p in paragraphs:\n    doc.add_paragraph(p)\ndoc.save('example.docx')\n\n# \u5c06\u6587\u672c\u6574\u7406\u6210txt\u683c\u5f0f\u7684\u6587\u4ef6\nwith open('example.txt', 'w') as f:\n    for p in paragraphs:\n        f.write(p + '\\n\\n')\n\n# \u6253\u5f00\u751f\u6210\u7684\u6587\u4ef6\nos.system('start example.docx')\nos.system('start example.txt')<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[2,10],"tags":[],"class_list":["post-3438","post","type-post","status-publish","format-standard","hentry","category-2","category-python"],"_links":{"self":[{"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=\/wp\/v2\/posts\/3438","targetHints":{"allow":["GET"]}}],"collection":[{"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=3438"}],"version-history":[{"count":1,"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=\/wp\/v2\/posts\/3438\/revisions"}],"predecessor-version":[{"id":3439,"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=\/wp\/v2\/posts\/3438\/revisions\/3439"}],"wp:attachment":[{"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=3438"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=3438"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/g1n29wqq.ipyingshe.net:5347\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=3438"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}