聊天机器人

相信很多人无聊的时候曾经调戏过手机上的语音助手，那么如何用Python实现一个属于自己的语音助手？

起因

突然刷到了一片博文：
【深度讲解】手把手教你python制作萝莉音智能对话语音机器人，附全部源码！速速学起来！！
版权声明：本文为CSDN博主「川川菜鸟」的原创文章，遵循CC 4.0 BY-SA版权协议，转载请附上原文出处链接及本声明。
原文链接：https://blog.csdn.net/weixin_46211269/article/details/119847830
对了，有个比较大的问题是playsound不能解除占用，虽然看着playsound的源码中加入了close，但实际上不起作用（我安装的playsound和原文中的貌似版本不太一样）
原文中用到的是自己写了个play.py实现重复播放语音，实际上也是根据playsound改的，但是用起来不是很好用，这里参考了一下这篇文章中的方法实现编码问题
Python playsound 播放MP3
原文链接：https://stackoverflow.com/questions/58659364/how-to-use-unicode-version-windows-api-mcisendstring-python#
增加一句代码实现解除占用

def _playsoundWin(sound, block = True):
    '''
    依据这里的修改意见进行修改以支持unicode
    https://stackoverflow.com/questions/58659364/how-to-use-unicode-version-windows-api-mcisendstring-python#
    Utilizes windll.winmm. Tested and known to work with MP3 and WAVE on
    Windows 7 with Python 2.7. Probably works with more file formats.
    Probably works on Windows XP thru Windows 10. Probably works with all
    versions of Python.
    Inspired by (but not copied from) Michael Gundlach <gundlach@gmail.com>'s mp3play:
    https://github.com/michaelgundlach/mp3play
    I never would have tried using windll.winmm without seeing his code.
    '''
    from ctypes import c_buffer, windll
    from random import random
    from time   import sleep
    from sys    import getfilesystemencoding
 
    '''
    依据这里的修改意见进行修改
    https://stackoverflow.com/questions/58659364/how-to-use-unicode-version-windows-api-mcisendstring-python#
    '''
    def winCommand(*command):
        buf = c_buffer(255)
        # command = ' '.join(command).encode(getfilesystemencoding())  # 这是原来的代码
        command = ' '.join(command)
        # errorCode = int(windll.winmm.mciSendStringA(command, buf, 254, 0)) # 这是原来的代码
        errorCode = int(windll.winmm.mciSendStringW(command, buf, 254, 0))
        if errorCode:
            errorBuffer = c_buffer(255)
            windll.winmm.mciGetErrorStringA(errorCode, errorBuffer, 254)
            exceptionMessage = ('\n    Error ' + str(errorCode) + ' for command:'
                                '\n        ' + command.decode() +
                                '\n    ' + errorBuffer.value.decode())
            raise PlaysoundException(exceptionMessage)
        return buf.value
 
    alias = 'playsound_' + str(random())
    winCommand('open "' + sound + '" alias', alias)
    # winCommand('set', alias, 'time format milliseconds')  
    # durationInMS = winCommand('status', alias, 'length')
    # winCommand('play', alias, 'from 0 to', durationInMS.decode()) # 注释掉
    winCommand('play', alias, 'wait') # 'wait' does the trick
 
    if block:
        winCommand('close', alias)  # 解除占用
        # sleep(float(durationInMS) / 1000.0) # 注释掉

最后发现还是有一点美中不足，虽然和我们聊天的是萌萌的萝莉音，但是我们自己输入的时候是手打的，这哪里有聊天的感觉嘛。

语音转文字

如何将自己的声音转化为文字？这里用的是百度的语音识别，参考的是如下文章：
python实现语音录入识别
https://www.cnblogs.com/angelyan/p/12005974.html
使用的时候发现我的麦好像不是特别好，或者说话的声音可能不够大，偶尔会出现识别不到的现象，这时候就会报错程序停止，修改了一下代码使用嵌套的方式来重复调用自己直至成功识别，代码如下：

def ASR():
    # 录音
    record()

    """ 你的 APPID AK SK """
    APP_ID = '你的 APPID'
    API_KEY = '你的API_KEY'
    SECRET_KEY = '你的SECRET_KEY'

    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

    # 读取文件
    def get_file_content(filePath):
        with open(filePath, 'rb') as fp:
            return fp.read()

    # 识别本地文件
    res=client.asr(get_file_content('output.wav'), 'wav', 16000, {
        'dev_pid': 1536,
    })
    if res['err_msg'] != 'success.':  # 返回的不是成功的信息则调用自身进行再次录制
        print('识别失败，请重新录制')
        res=ASR()
    return(res)

好了，那么从录制自身到识别语音再到传输到聊天机器人再到语音输出就齐活了。

完整代码

需要自己申请自己的百度智能云的APPID AK SK哦！

# python 
# -*- coding:utf-8 -*-
# 作者：dunpaiprince
import requests
import json
from aip import AipSpeech
from playsound import playsound
from record import ASR


APP_ID = '你的 APPID'
API_KEY = '你的API_KEY'
SECRET_KEY = '你的SECRET_KEY'

client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
url = 'https://api.ownthink.com/bot?appid=ce71072c2802ecfef630ea19d4b2436c&userid=	DItLcaO6&spoken='

def main():
    print("我们来聊天吧")
    while True:
        talk_man = ASR()['result'][0]
        print(talk_man)
        res_robot = requests.get(url + talk_man).text
        talk_robot = json.loads(res_robot)['data']['info']['text']
        result = client.synthesis(talk_robot, 'zh', 1, {
            'vol': 8,  # 音量
            'spd': 5,  # 语速
            'pit': 9,  # 语调
            'per': 0,  # 0：女 1：男 3：逍遥 4：小萝莉
        })
        if not isinstance(result, dict):  # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
            with open('audio.mp3', 'wb+') as f:
                f.write(result)
        print(talk_robot)
        try:
            playsound('audio.mp3')
        except Exception:
            pass

main()

这里的record.py如下：

import wave
import pyaudio
from aip import AipSpeech

def record():
    # 定义数据流块
    CHUNK = 1024
    FORMAT = pyaudio.paInt16
    # 想要百度识别，下面这两参数必须这样设置，使得比特率为256kbps
    CHANNELS = 1
    RATE = 16000
    # 录音时间
    RECORD_SECONDS = 5
    # 要写入的文件名
    WAVE_OUTPUT_FILENAME = "output.wav"
    # 创建PyAudio对象
    p = pyaudio.PyAudio()

    # 打开数据流
    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)


    # 开始录音
    frames = []
    print("* recording")
    for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        data = stream.read(CHUNK)
        frames.append(data)

    print("* done recording")
    # 停止数据流
    stream.stop_stream()
    stream.close()

    # 关闭PyAudio
    p.terminate()

    # 写入录音文件
    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))
    wf.close()



def ASR():
    # 录音
    record()

    """ 你的 APPID AK SK """
	APP_ID = '你的 APPID'
	API_KEY = '你的API_KEY'
	SECRET_KEY = '你的SECRET_KEY'

    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

    # 读取文件
    def get_file_content(filePath):
        with open(filePath, 'rb') as fp:
            return fp.read()

    # 识别本地文件
    res=client.asr(get_file_content('output.wav'), 'wav', 16000, {
        'dev_pid': 1536,
    })
    if res['err_msg'] != 'success.':  # 返回的不是成功的信息则调用自身进行再次录制
        print('识别失败，请重新录制')
        res=ASR()
    return(res)

if __name__ == '__main__':
    ASR()

人工智能最新文章

2022吴恩达机器学习课程——第二课（神经网

第十五章规则学习

FixMatch: Simplifying Semi-Supervised Le

数据挖掘Java——Kmeans算法的实现

大脑皮层的分割方法

【翻译】GPT-3是如何工作的

论文笔记:TEACHTEXT: CrossModal Generaliz

python从零学（六）

详解Python 3.x 导入(import)

【答读者问27】backtrader不支持最新版本的

加:2021-09-01 11:55:38 更:2021-09-01 11:57:43

360图书馆购物三丰科技阅读网日历万年历 2025年12日历

-2025/12/31 2:42:33-

图片自动播放器
↓图片自动播放器↓

TxT小说阅读器
↓语音阅读,小说下载,古典文学↓

一键清除垃圾
↓轻轻一点,清除系统垃圾↓

图片批量下载器
↓批量下载图片,美女图库↓

网站联系: qq:121756557 email:121756557@qq.com IT数码