Unity语音识别[百度AI语音识别&Unity原生短语语音识别]
一、百度AI语音识别
1.代码块讲解
(1)首先,初始化一些常量信息,我们在这个工程中需要API Key、Secret Key,这些会在下面流程中讲到,并通过这两个key获取URL请求地址中的Token参数,用于对百度AI语音识别API进行请求
void Start()
{
aipClient = new Asr(API_KEY, SECRET_KEY);
aipClient.Timeout = 6000;
accessToken = GetAccessToken();
listenBtn = GetComponentInChildren<ListenButton>();
listenBtn.OnStartRecordEvent += StartRecord;
listenBtn.OnStopRecordEvent += StopRecord;
recordSource = GetComponent<AudioSource>();
}
private string GetAccessToken()
{
HttpClient client = new HttpClient();
List<KeyValuePair<string, string>> paraList = new List<KeyValuePair<string, string>>();
paraList.Add(new KeyValuePair<string, string>("grant_type", "client_credentials"));
paraList.Add(new KeyValuePair<string, string>("client_id", API_KEY));
paraList.Add(new KeyValuePair<string, string>("client_secret", SECRET_KEY));
HttpResponseMessage response = client.PostAsync(authHost, new FormUrlEncodedContent(paraList)).Result;
string result = response.Content.ReadAsStringAsync().Result;
return result;
}
(2)第二步,使用和停止麦克风录制声音的方法
public void StartRecord()
{
if (Microphone.devices.Length > 0)
{
string device = Microphone.devices[0];
AudioClip clip = Microphone.Start(device, true, 60, 16000);
recordSource.clip = clip;
recordClip = clip;
}
else
{
SetRecognizeText(TipsReference.CANT_FIND_MICROPHONE);
listenBtn.ReleaseClickEvent(TipsReference.RECORD_TYPE.NoMicroPhone);
}
}
public void StopRecord()
{
Microphone.End(Microphone.devices[0]);
StartCoroutine(Recognition(recordClip));
}
(3)第二步,使用麦克风录制声音的方法
IEnumerator Recognition(AudioClip clip2Send)
{
float[] sample = new float[recordClip.samples];
recordClip.GetData(sample, 0);
short[] intData = new short[sample.Length];
byte[] byteData = new byte[intData.Length * 2];
for (int i = 0; i < sample.Length; i++)
{
intData[i] = (short)(sample[i] * short.MaxValue);
}
Buffer.BlockCopy(intData, 0, byteData, 0, byteData.Length);
var result = aipClient.Recognize(byteData, "pcm", 16000);
var speaking = result.GetValue("result");
if (speaking == null)
{
SetRecognizeText(TipsReference.NOTHING_RECORD);
StopAllCoroutines();
yield return null;
}
string usefulText = speaking.First.ToString();
SetRecognizeText(usefulText);
yield return 0;
}
2.操作流程
1.首先我们需要进入百度智能云官网,按顺序点击并在注册账号的前提下,再点击立即使用进入百度智能云的控制台。 2.点击创建应用,填入相应信息,相信这些都没什么难点。 3.这时我们获得了相应的key 4.将key填入代码相应的位置即可
3.主要功能完整代码
因为其中包含了按键相关的脚本,它们在其他脚本中,所以不直接下载相关工程的同学需要将其剔除,以方便自己使用(与listenButton相关的所有内容),或者使用第三部分中的内容,重写按钮方法。
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using Baidu.Aip.Speech;
using System.Net.Http;
using UnityEngine.UI;
using System;
[RequireComponent(typeof(AudioSource))]
public class AipController : MonoBehaviour
{
private ListenButton listenBtn;
private AudioSource recordSource;
private AudioClip recordClip;
#region UI面板控件
public Text recognizeText;
public Color tokenGotColor;
#endregion
private string accessToken;
#region 百度语音技术应用
private string API_KEY = "IBCbYFAbZBGkfbqpf6UBO1svY6WB6ISQ";
private string SECRET_KEY = "XcPMlGudUm8o558YCT6Rjz1pGRSqcHW7";
private string authHost = "https://aip.baidubce.com/oauth/2.0/token";
#endregion
private Asr aipClient;
void Start()
{
aipClient = new Asr(API_KEY, SECRET_KEY);
aipClient.Timeout = 6000;
accessToken = GetAccessToken();
listenBtn = GetComponentInChildren<ListenButton>();
listenBtn.OnStartRecordEvent += StartRecord;
listenBtn.OnStopRecordEvent += StopRecord;
recordSource = GetComponent<AudioSource>();
}
public void StartRecord()
{
if (Microphone.devices.Length > 0)
{
string device = Microphone.devices[0];
AudioClip clip = Microphone.Start(device, true, 60, 16000);
recordSource.clip = clip;
recordClip = clip;
}
else
{
SetRecognizeText(TipsReference.CANT_FIND_MICROPHONE);
listenBtn.ReleaseClickEvent(TipsReference.RECORD_TYPE.NoMicroPhone);
}
}
public void StopRecord()
{
Microphone.End(Microphone.devices[0]);
StartCoroutine(Recognition(recordClip));
}
public void SetRecognizeText(string result)
{
recognizeText.text = result;
}
IEnumerator Recognition(AudioClip clip2Send)
{
float[] sample = new float[recordClip.samples];
recordClip.GetData(sample, 0);
short[] intData = new short[sample.Length];
byte[] byteData = new byte[intData.Length * 2];
for (int i = 0; i < sample.Length; i++)
{
intData[i] = (short)(sample[i] * short.MaxValue);
}
Buffer.BlockCopy(intData, 0, byteData, 0, byteData.Length);
var result = aipClient.Recognize(byteData, "pcm", 16000);
var speaking = result.GetValue("result");
if (speaking == null)
{
SetRecognizeText(TipsReference.NOTHING_RECORD);
StopAllCoroutines();
yield return null;
}
string usefulText = speaking.First.ToString();
SetRecognizeText(usefulText);
yield return 0;
}
private string GetAccessToken()
{
HttpClient client = new HttpClient();
List<KeyValuePair<string, string>> paraList = new List<KeyValuePair<string, string>>();
paraList.Add(new KeyValuePair<string, string>("grant_type", "client_credentials"));
paraList.Add(new KeyValuePair<string, string>("client_id", API_KEY));
paraList.Add(new KeyValuePair<string, string>("client_secret", SECRET_KEY));
HttpResponseMessage response = client.PostAsync(authHost, new FormUrlEncodedContent(paraList)).Result;
string result = response.Content.ReadAsStringAsync().Result;
return result;
}
public void DisplayClip()
{
recordSource.Play();
}
}
二、Unity原生语音识别
主要功能完整代码
由于代码量较小且注释详细,所以直接放源码
因为其中包含了按键相关的脚本(与listenButton相关的所有内容),它们在其他脚本中,所以不直接下载相关工程的同学需要将其剔除,以方便自己使用,或者学习使用第三部分中的内容,重写按钮方法。
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine.Windows.Speech;
public class PhraseRecognition : MonoBehaviour
{
private ListenButton listenButton;
private PhraseRecognizer m_PhraseRecognizer;
public string[] keywords = { "开始测试", "长语音","结束测试" };
public ConfidenceLevel m_confidenceLevel = ConfidenceLevel.Medium;
void Start()
{
listenButton = GetComponentInChildren<ListenButton>();
listenButton.OnStartRecordEvent += StartRecognizePhrase;
listenButton.OnStopRecordEvent += StopRecognizePhrase;
}
void StartRecognizePhrase()
{
if (m_PhraseRecognizer == null)
{
m_PhraseRecognizer = new KeywordRecognizer(keywords, m_confidenceLevel);
m_PhraseRecognizer.OnPhraseRecognized += M_PhraseRecognizer_OnPhraseRecognized;
m_PhraseRecognizer.Start();
Debug.Log("正在监听");
}
else { m_PhraseRecognizer.Stop(); }
}
void StopRecognizePhrase()
{
if (m_PhraseRecognizer != null)
m_PhraseRecognizer.Stop();
Debug.Log("结束监听");
}
private void M_PhraseRecognizer_OnPhraseRecognized(PhraseRecognizedEventArgs args)
{
_SpeechRecognition(args.text);
print(args.text);
}
private void OnDestroy()
{
if (m_PhraseRecognizer != null)
m_PhraseRecognizer.Dispose();
}
void _SpeechRecognition(string msg)
{
switch (msg)
{
case "长语音":
Debug.Log("转换长语音识别");
break;
case "开始测试":
Debug.Log("开始测试");
break;
case "结束测试":
Debug.Log("结束测试");
break;
}
}
}
三、Button长按点击方法的重写
1. 主要功能完整代码
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine.EventSystems;
using UnityEngine.UI;
public class ListenButton : Button
{
public delegate void RecordDelegate();
public event RecordDelegate OnStartRecordEvent;
public event RecordDelegate OnStopRecordEvent;
public PointerEventData currentData;
private TipsReference.RECORD_TYPE _TYPE = TipsReference.RECORD_TYPE.None;
public void ReleaseClickEvent(TipsReference.RECORD_TYPE type)
{
_TYPE = type;
switch (type)
{
case TipsReference.RECORD_TYPE.Normal:
break;
case TipsReference.RECORD_TYPE.NoMicroPhone:
base.OnPointerUp(currentData);
break;
}
}
public override void OnPointerDown(PointerEventData eventData)
{
base.OnPointerDown(eventData);
currentData = eventData;
OnStartRecordEvent();
}
public override void OnPointerUp(PointerEventData eventData)
{
base.OnPointerUp(eventData);
if(_TYPE != TipsReference.RECORD_TYPE.NoMicroPhone)
OnStopRecordEvent();
}
}
2.使用方法
使用起来很简单只需要将Button原有的Button组件删除,替换成这个脚本即可,它会监听按下(OnPointerDown),抬起(OnPointerUp)的事件
三、工程下载链接
当然什么都没有比直接下载工程更易学习,工程链接点击此处 内容分别在两个Scenes中
|