{"id":1604,"date":"2024-09-15T10:41:33","date_gmt":"2024-09-15T02:41:33","guid":{"rendered":"https:\/\/lyvba.com\/?p=1604"},"modified":"2024-09-15T11:02:03","modified_gmt":"2024-09-15T03:02:03","slug":"python3-speech-synthesis","status":"publish","type":"post","link":"https:\/\/lyvba.com\/index.php\/2024\/09\/15\/python3-speech-synthesis\/","title":{"rendered":"\u5b66\u4e60\u7f16\u7a0b: \u4f7f\u7528\u5fae\u8f6f\u8bed\u97f3\u628a\u6587\u672c\u8f6c\u8bed\u97f3\uff0c\u4f7f\u7528FFMPEG\u628a\u8bed\u97f3\u5408\u5e76\u5230\u89c6\u9891\u4e2d"},"content":{"rendered":"<p><img decoding=\"async\" src=\"https:\/\/lyvba.com\/wp-content\/uploads\/2024\/09\/SPEECH.webp\" alt=\"\" \/><\/p>\n<h2>\u4f7f\u7528\u5fae\u8f6f\u8bed\u97f3\u628a\u6587\u672c\u8f6c\u8bed\u97f3-\u524d\u63d0\u6761\u4ef6\uff0c\u6fc0\u6d3b\u514d\u8d39\u7684\u5fae\u8f6fAI\u8bed\u97f3<\/h2>\n<ul>\n<li>\u4f7f\u7528\u5b66\u751f\u8ba4\u8bc1\u6fc0\u6d3b\u5fae\u8f6fAzure\u5e10\u6237\u4e2d\u7684AI\u8bed\u97f3\u529f\u80fd\uff0c\u597d\u6d88\u606f\u662f\u5fae\u8f6fAI\u8bed\u97f3\u8f6c\u6362\u662f\u5b8c\u5168\u514d\u8d39\u7684\uff0c\u6240\u4ee5\u4f60\u6709\u4fe1\u7528\u5361\u4e5f\u53ef\u4ee5\u6fc0\u6d3b\u5fae\u8f6fAzure\u5e10\u6237\u7684AI\u8bed\u97f3\u529f\u80fd\uff0c\u5b8c\u5168\u514d\u8d39\uff0c\u4e0d\u7528\u82b1\u94b1\u3002<\/li>\n<li>\u5f00\u542f\u8bed\u97f3\u9879\u76ee\u540e\uff0c\u53ef\u4ee5\u5f97\u5230\u5fae\u8f6f\u7ed9\u7684 <code>API-KEY<\/code>\uff0c\u7c7b\u4f3c\u5982\u4e0b\uff0c\u628a\u4ed6\u6dfb\u52a0\u5230\u73af\u5883\u53d8\u91cf\n<pre><code>SPEECH_KEY      73a1a****************76015b44217\nSPEECH_REGION   eastasia<\/code><\/pre>\n<p><img decoding=\"async\" src=\"https:\/\/lyvba.com\/wp-content\/uploads\/2024\/09\/API-KEY.png\" alt=\"\" \/><\/p>\n<\/li>\n<li>\u53c2\u8003\u56fe\u7247\u628a  <code>API-KEY<\/code> \u6dfb\u52a0\u5230 Windows \u73af\u5883\u53d8\u91cf\u540e\uff0c\u5c31\u53ef\u4ee5\u4f7f\u7528 Python \u4ee3\u7801\u628a\u6587\u672c\u8f6c\u8bed\u97f3\u4e86<\/li>\n<\/ul>\n<h3>\u5fae\u8f6f\u8bed\u97f3 \u6653\u6653 \u6587\u672c\u8f6c\u8bed\u97f3 \u6e90\u7801 <code>speech_synthesis.py<\/code><\/h3>\n<pre><code class=\"language-python\">import os\nimport azure.cognitiveservices.speech as speechsdk\n\nclass SPEECH:\n    def __init__(self):\n        # \u9700\u8981\u8f6c\u8bed\u97f3\u7684\u6587\u672c\u6587\u5b57\n        # \u8bbe\u7f6e\u97f3\u9891\u4fdd\u5b58\u672c\u5730\u5730\u5740\n        self.output_file = &quot;r:\\\\output.wav&quot;\n        # \u4ece\u73af\u5883\u53d8\u91cf\u4e2d\u83b7\u53d6\u8ba2\u9605\u5bc6\u94a5\u548c\u533a\u57df\u4fe1\u606f\n        self.speech_config = speechsdk.SpeechConfig(subscription=os.environ.get(&#039;SPEECH_KEY&#039;),\n                                                    region=os.environ.get(&#039;SPEECH_REGION&#039;))\n        # \u8bbe\u7f6e\u8bed\u97f3\u5408\u6210\u4f7f\u7528\u7684\u8bed\u8a00\u2014\u2014  \u4e91\u67ab  \n        self.speech_config.speech_synthesis_language = &quot;zh-CN&quot;\n        # self.speech_config.speech_synthesis_voice_name = &#039;zh-CN-YunfengNeural&#039;    # \u4e91\u67ab\n        self.speech_config.speech_synthesis_voice_name = &#039;zh-CN-XiaoxiaoNeural&#039;     # \u6653\u6653\n\n    def speak_txt(self):\n        # \u914d\u7f6e\u97f3\u9891\u8f93\u51fa\u4e3a\u9ed8\u8ba4\u626c\u58f0\u5668\n        audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)\n        return audio_config\n\n    def down_audio(self):\n        # \u914d\u7f6e\u97f3\u9891\u8f93\u51fa\u4e3a\u9ed8\u8ba4\u626c\u58f0\u5668\n        audio_config = speechsdk.audio.AudioOutputConfig(filename=self.output_file)\n        return audio_config\n\n    def run(self, text, audio_config):\n\n        # \u521b\u5efa\u8bed\u97f3\u5408\u6210\u5668\n        speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.speech_config, audio_config=audio_config)\n\n        # \u8c03\u7528\u8bed\u97f3\u5408\u6210API\u5c06\u6587\u672c\u8f6c\u6362\u4e3a\u8bed\u97f3\n        speech_synthesis_result = speech_synthesizer.speak_text_async(text).get()\n\n        # \u68c0\u67e5\u8bed\u97f3\u5408\u6210\u7684\u7ed3\u679c\u5e76\u8fdb\u884c\u76f8\u5e94\u7684\u5904\u7406\n        if speech_synthesis_result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:\n            print(&quot;Speech synthesized for text [{}]&quot;.format(text))\n        elif speech_synthesis_result.reason == speechsdk.ResultReason.Canceled:\n            cancellation_details = speech_synthesis_result.cancellation_details\n            print(&quot;Speech synthesis canceled: {}&quot;.format(cancellation_details.reason))\n            if cancellation_details.reason == speechsdk.CancellationReason.Error:\n                if cancellation_details.error_details:\n                    print(&quot;Error details: {}&quot;.format(cancellation_details.error_details))\n                    print(&quot;Did you set the speech resource key and region values?&quot;)\n\nif __name__ == &quot;__main__&quot;:\n    my_speech = SPEECH()\n    while True:\n        # \u4ece\u63a7\u5236\u53f0\u83b7\u53d6\u8981\u8f6c\u6362\u4e3a\u8bed\u97f3\u7684\u6587\u672c\n        print(&quot;Enter some text that you want to speak &gt;&quot;)\n        text = input()\n        print(&quot;\u6539\u6210\u8bfb\u53d6 input.txt \u6587\u4ef6\u7f16\u7801 utf-8&quot;)\n        with open(&#039;input.txt&#039;, encoding=&quot;utf-8&quot;) as f:\n            text = f.read()\n        f.closed\n\n        # \u4fdd\u5b58\u5230\u672c\u5730\n        my_speech.run(text, my_speech.down_audio())\n\n        # \u8f93\u51fa\u8bed\u97f3\u64ad\u653e\n       # my_speech.run(text, my_speech.speak_txt())<\/code><\/pre>\n<h3>\u6253\u5f00\u63a7\u5236\u53f0\uff0c\u4f7f\u7528 <code>Python<\/code> \u6267\u884c\u811a\u672c\u628a\u6587\u672c\u8f6c\u6362\u8bed\u97f3<\/h3>\n<pre><code># \u8bbe\u7f6e\u97f3\u9891\u4fdd\u5b58\u672c\u5730\u5730\u5740, \u6e90\u7801\u4e2d\u4fee\u6539\nself.output_file = &quot;r:\\\\output.wav&quot;\n# \u8f93\u5165\u7684\u6587\u672c\u6587\u4ef6\uff0c\u4fdd\u5b58\u81ea\u5df1\u5199\u7684\u89c6\u9891\u6587\u6848\nwith open(&#039;input.txt&#039;, encoding=&quot;utf-8&quot;) as f:\n\n# \u6267\u884c\u8fd9\u4e2a\u547d\u4ee4\uff0c\u7a0b\u5e8f\u5de5\u4f5c\u8fd0\u884c\npython.exe .\\speech_synthesis.py<\/code><\/pre>\n<h2>\u4f7f\u7528FFMPEG\u628a\u8bed\u97f3\u5408\u5e76\u5230\u89c6\u9891\u4e2d<\/h2>\n<ul>\n<li>\u8981\u5c06\u65b0\u8bed\u97f3\u97f3\u9891\u5408\u5e76\u5230\u89c6\u9891\u4e2d\uff0c\u540c\u65f6\u4fdd\u7559\u539f\u89c6\u9891\u7684\u80cc\u666f\u97f3\u4e50\uff0c\u5e76\u5c06\u65b0\u58f0\u97f3\u7684\u97f3\u91cf\u8c03\u6574\u4e3a1.5\u500d\uff0c\u53ef\u4ee5\u4f7f\u7528\u4ee5\u4e0b <code>ffmpeg<\/code> \u547d\u4ee4\uff1a<\/li>\n<\/ul>\n<pre><code class=\"language-bash\">ffmpeg -filter_complex  &quot;[1:a]volume=1.2[a1];[0:a][a1]amix=inputs=2:duration=longest&quot; \\\n  -i 1.mp4  -i new.wav  -c:v copy  new.mp4<\/code><\/pre>\n<h3>\u547d\u4ee4\u89e3\u6790\uff1a<\/h3>\n<ul>\n<li><code>[1:a]volume=1.2[a1]<\/code>\uff1a\u5c06\u65b0\u97f3\u9891\u7684\u97f3\u91cf\u8c03\u6574\u4e3a1.2\u500d\uff0c\u5e76\u5c06\u7ed3\u679c\u547d\u540d\u4e3a <code>[a1]<\/code>\u3002<\/li>\n<li><code>[0:a][a1]amix=inputs=2:duration=longest<\/code>\uff1a\u5c06\u539f\u97f3\u9891\u548c\u8c03\u6574\u8fc7\u7684\u97f3\u9891\u6df7\u5408\u5728\u4e00\u8d77\u3002<\/li>\n<li><code>-c:v copy<\/code>\uff1a\u590d\u5236\u89c6\u9891\u6d41\uff0c\u4e0d\u8fdb\u884c\u8f6c\u7801\u3002<\/li>\n<li><code>new.mp4<\/code>\uff1a\u6307\u5b9a\u8f93\u51fa\u7684\u89c6\u9891\u6587\u4ef6\u540d\u3002<\/li>\n<\/ul>\n<h3>\u4e5f\u53ef\u4ee5\u628a\u80cc\u666f\u97f3\u4e50\u6df7\u5408\u5230\u89c6\u9891\u4e2d\uff0c<code>volume=0.3<\/code> \u6bd4\u8f83\u5408\u9002<\/h3>\n<pre><code class=\"language-bash\">ffmpeg -filter_complex  &quot;[1:a]volume=0.3[a1];[0:a][a1]amix=inputs=2:duration=longest&quot; \\\n  -i 1.mp4  -i bg.mp3  -c:v copy  new.mp4<\/code><\/pre>\n<h2>\u81ea\u5df1\u5199 MP4CUT \u5de5\u5177\uff0c\u8c03\u7528ffmpeg \u526a\u8f91\u89c6\u9891<\/h2>\n<pre><code class=\"language-bash\">$ mp4cut.exe\nUsage: mp4cut.exe  sample.mp4  00:08  01:18\n\n$ mp4cut new.MP4  0:08  0:18\n\nffmpeg version 5.0.1-full_build-www.gyan.dev Copyright (c) 2000-2022 the FFmpeg developers\n  built with gcc 11.2.0 (Rev7, Built by MSYS2 project)\n\nInput #0, mov,mp4,m4a,3gp,3g2,mj2, from &#039;new.MP4&#039;:\n  Metadata:\n    major_brand     : isom\n    minor_version   : 512\n    compatible_brands: isomiso2avc1mp41\n    encoder         : Lavf59.16.100\n  Duration: 00:02:45.00, start: 0.000000, bitrate: 6123 kb\/s\n  Stream #0:0[0x1](und): Audio: aac (LC) (mp4a \/ 0x6134706D), 48000 Hz, stereo, fltp, 128 kb\/s (default)\nStream mapping:\n  Stream #0:1 -&gt; #0:0 (copy)\n  Stream #0:0 -&gt; #0:1 (copy)\nPress [q] to stop, [?] for help\nframe=  300 fps=0.0 q=-1.0 Lsize=    7552kB time=00:00:10.00 bitrate=6181.3kbits\/s speed=1.07e+03x\nvideo:7385kB audio:156kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.131130%\n<\/code><\/pre>\n<h3>FFMPEG \u6309\u65f6\u95f4\u622a\u53d6\u89c6\u9891\u547d\u4ee4\u5982\u4e0b\uff0c\u6211\u4eec\u4f7f\u7528 C\/C++ \u8bed\u8a00\u7f16\u5199\u4ee3\u7801\u6765\u8c03\u7528 <code>ffmpeg<\/code><\/h3>\n<pre><code class=\"language-bash\">ffmpeg  -i .\/plutopr.mp4   \\\n-vcodec copy -acodec copy -ss 00:18:45 -to 00:19:36  \\\n.\/cutout1.mp4  -y<\/code><\/pre>\n<h3>MP4CUT\u5de5\u5177\u6e90\u7801:  mp4cut.cpp<\/h3>\n<pre><code class=\"language-cpp\">#include &lt;string&gt;\n#include &lt;cstring&gt;\n#include &lt;cstdio&gt;\n#include &lt;cctype&gt;\n\nvoid replace_colon(char* str)\n{\n    while (*str) {\n        if (*str == &#039;:&#039;)\n            *str = &#039;-&#039;;\n        str++;\n    }\n}\n\nint main(int argc, char* argv[])\n{\n    if (4 != argc) {\n        puts(&quot;Usage: mp4cut.exe  sample.mp4  00:08  01:18 &quot;);\n        return -1;\n    }\n\n    \/*******\n    FFMPEG  \u6309\u65f6\u95f4\u622a\u53d6\u89c6\u9891\n    ffmpeg  -i .\/plutopr.mp4   \\\n    -vcodec copy -acodec copy -ss 00:18:45 -to 00:19:36  \\\n     .\/cutout1.mp4  -y\n    *******\/\n\n    char cmdline[4096];\n    sprintf(cmdline, &quot;ffmpeg -i  \\&quot;%s\\&quot;  -vcodec copy -acodec copy -ss %s  -to %s  -y  \\&quot;%s\\&quot; &quot;,\n            argv[1], argv[2], argv[3], argv[1]);\n\n    char newfile[512];    \/\/ \u65f6\u95f4\u6233\u6587\u4ef6\u540d\u540e\u7f00\n    sprintf(newfile, &quot;.Cut_%s_%s.mp4\\&quot; &quot;, argv[2], argv[3]);\n    replace_colon(newfile);\n\n    char* pch = strrchr(cmdline, &#039;.&#039;);\n\n    FILE* pFile;\n    pFile = fopen(argv[1], &quot;r&quot;);\n    if (pch != NULL) {\n        strcpy(pch, newfile);\n\n        if (pFile != NULL) {\n            \/\/  puts(cmdline);\n\n            system(cmdline);\n        }\n\n    }\n}<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>\u4f7f\u7528\u5fae\u8f6f\u8bed\u97f3\u628a\u6587\u672c\u8f6c\u8bed\u97f3-\u524d\u63d0\u6761\u4ef6\uff0c\u6fc0\u6d3b\u514d\u8d39\u7684\u5fae\u8f6fAI\u8bed\u97f3 \u4f7f\u7528\u5b66\u751f\u8ba4\u8bc1\u6fc0\u6d3b\u5fae\u8f6fAzure\u5e10\u6237\u4e2d\u7684A [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":1605,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[12],"tags":[31,39,45],"class_list":["post-1604","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-learn","tag-cpp","tag-ffmpeg","tag-python"],"_links":{"self":[{"href":"https:\/\/lyvba.com\/index.php\/wp-json\/wp\/v2\/posts\/1604","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/lyvba.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/lyvba.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/lyvba.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/lyvba.com\/index.php\/wp-json\/wp\/v2\/comments?post=1604"}],"version-history":[{"count":2,"href":"https:\/\/lyvba.com\/index.php\/wp-json\/wp\/v2\/posts\/1604\/revisions"}],"predecessor-version":[{"id":1608,"href":"https:\/\/lyvba.com\/index.php\/wp-json\/wp\/v2\/posts\/1604\/revisions\/1608"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/lyvba.com\/index.php\/wp-json\/wp\/v2\/media\/1605"}],"wp:attachment":[{"href":"https:\/\/lyvba.com\/index.php\/wp-json\/wp\/v2\/media?parent=1604"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/lyvba.com\/index.php\/wp-json\/wp\/v2\/categories?post=1604"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/lyvba.com\/index.php\/wp-json\/wp\/v2\/tags?post=1604"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}