{"id":242,"date":"2021-08-01T07:51:00","date_gmt":"2021-08-01T07:51:00","guid":{"rendered":"https:\/\/262235.xyz\/?p=242"},"modified":"2021-08-01T07:51:00","modified_gmt":"2021-08-01T07:51:00","slug":"242","status":"publish","type":"post","link":"https:\/\/lyvba.com\/index.php\/2021\/08\/01\/242\/","title":{"rendered":"Python\u628a\u7f51\u9875\u8f6c\u6362\u6210 Markdown \u6587\u672c"},"content":{"rendered":"<p><img decoding=\"async\" src=\"https:\/\/lyvba.com\/wp-content\/uploads\/2021\/08\/2281641762.png\" alt=\"python.png\" title=\"python.png\"><\/p>\n<ul>\n<li>html2text \u662f\u4e00\u4e2a Python \u811a\u672c\u5e93\uff0c\u53ef\u5c06 HTML \u9875\u9762\u8f6c\u6362\u4e3a\u5e72\u51c0\u3001\u6613\u4e8e\u9605\u8bfb\u7684\u7eaf ASCII \u6587\u672c\u3002 \u66f4\u597d\u7684\u662f\uff0c\u8be5 ASCII \u4e5f\u6070\u597d\u662f\u6709\u6548\u7684 Markdown\uff08\u4e00\u79cd\u6587\u672c\u5230 HTML \u7684\u683c\u5f0f\uff09\u3002<\/li>\n<\/ul>\n<h3>\u5b89\u88c5 html2text\u5e93<\/h3>\n<pre><code>pip3 install html2text<\/code><\/pre>\n<h3>\u4f60\u53ef\u4ee5\u5728 Python \u4e2d\u4f7f\u7528\u5b83\uff1a<\/h3>\n<pre><code>import html2text\nprint (html2text.html2text(\"&lt;p&gt;Hello, world.&lt;\/p&gt;\"))<\/code><\/pre>\n<h3>\u6216\u8005\u4f7f\u7528\u4e00\u4e9b\u914d\u7f6e\u9009\u9879\uff1a<\/h3>\n<pre><code>import html2text\nh = html2text.HTML2Text()\nh.ignore_links = True\nprint (h.handle(\"&lt;p&gt;Hello, &lt;a href='http:\/\/earth.google.com\/'&gt;world&lt;\/a&gt;!\"))<\/code><\/pre>\n<h3>\u81ea\u5df1\u5199\u7684\u811a\u672c html2md.py \u7528\u6765\u628a\u63d0\u53d6\u6307\u5b9a\u7f51\u5740\u7684 Markdown \u6587\u672c<\/h3>\n<ul>\n<li>\u4f7f\u7528:   python3 html2md.py [url] <\/li>\n<\/ul>\n<pre><code>#!\/usr\/bin\/python3\n\nimport requests\nimport html2text\nfrom sys import argv\n\n\n# \u62fc\u63a5\u8bf7\u6c42\u5730\u5740\nurl = 'https:\/\/www.lyvba.com\/index.php\/category\/learn\/'\n\nif (len(argv) &gt; 1) :\n    url = argv[1]\n\n# \u8bf7\u6c42\u5934\uff0c\u6a21\u62df\u6d4f\u89c8\u5668UA\nheaders = {\n    'User-Agent': ' '.join(['Mozilla\/5.0 (Windows NT 10.0; Win64; x64; ServiceUI 14)',\n                            'AppleWebKit\/537.36 (KHTML, like Gecko)', 'Chrome\/70.0.3538.102', 'Safari\/537.36',\n                            'Edge\/18.18363'])\n}\n\n# \u53d1\u9001\u8bf7\u6c42\nr = requests.get(url=url, headers=headers)\n\n# html \u8f6c\u6362 markdown\nhtml = r.text\ntext = html2text.html2text(html)\n\n# \u8f6c\u6362\u65f6\u5ffd\u7565\u94fe\u63a5\nif (len(argv) &gt; 2) :\n    url = argv[1]\n    h = html2text.HTML2Text()\n    # Ignore converting links from HTML\n    h.ignore_links = True\n    text = h.handle(html)\n\nprint(text)\n<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>html2text \u662f\u4e00\u4e2a Python \u811a\u672c\u5e93\uff0c\u53ef\u5c06 HTML \u9875\u9762\u8f6c\u6362\u4e3a\u5e72\u51c0\u3001\u6613\u4e8e\u9605\u8bfb\u7684\u7eaf AS [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[12],"tags":[45],"class_list":["post-242","post","type-post","status-publish","format-standard","hentry","category-learn","tag-python"],"_links":{"self":[{"href":"https:\/\/lyvba.com\/index.php\/wp-json\/wp\/v2\/posts\/242","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/lyvba.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/lyvba.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/lyvba.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/lyvba.com\/index.php\/wp-json\/wp\/v2\/comments?post=242"}],"version-history":[{"count":0,"href":"https:\/\/lyvba.com\/index.php\/wp-json\/wp\/v2\/posts\/242\/revisions"}],"wp:attachment":[{"href":"https:\/\/lyvba.com\/index.php\/wp-json\/wp\/v2\/media?parent=242"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/lyvba.com\/index.php\/wp-json\/wp\/v2\/categories?post=242"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/lyvba.com\/index.php\/wp-json\/wp\/v2\/tags?post=242"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}