语音识别控制(软件、硬件)

1. 环境

python版本：3.11.9

2. 完整代码

import sqlite3
import time
import wave  # 使用wave库可读、写wav类型的音频文件
from funasr import AutoModel
import sounddevice as sd
import numpy as np
from modelscope import pipeline, Tasks
from pypinyin import lazy_pinyin
import pyaudio  # 使用pyaudio库可以进行录音，播放，生成wav文件
# 模型参数设置
chunk_size = [0, 10, 5]
encoder_chunk_look_back = 7
decoder_chunk_look_back = 5
is_task_running= True
model = AutoModel(model="D:\SpeechRecognize\speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch")# 假设模型要求的采样率为 16000
fs = 16000
duration = 3 #时间
chunk_stride = chunk_size[1] * 960
cache = {}
window_size = 3# 连接到 SQLite 数据库，如果不存在则会创建新的数据库文件
conn = sqlite3.connect('speech_recognition.db')
cursor = conn.cursor()# 创建表格
cursor.execute('''CREATE TABLE IF NOT EXISTS speech_data(text TEXT, time_stamp TEXT, batch TEXT)
''')def record(time):  # 录音程序# 定义数据流块CHUNK = 1024  # 音频帧率（也就是每次读取的数据是多少，默认1024）FORMAT = pyaudio.paInt16  # 采样时生成wav文件正常格式CHANNELS = 1  # 音轨数（每条音轨定义了该条音轨的属性,如音轨的音色、音色库、通道数、输入/输出端口、音量等。可以多个音轨，不唯一）RATE = 16000  # 采样率（即每秒采样多少数据）RECORD_SECONDS = time  # 录音时间WAVE_OUTPUT_FILENAME = "./output.wav"  # 保存音频路径p = pyaudio.PyAudio()  # 创建PyAudio对象stream = p.open(format=FORMAT,  # 采样生成wav文件的正常格式channels=CHANNELS,  # 音轨数rate=RATE,  # 采样率input=True,  # Ture代表这是一条输入流，False代表这不是输入流frames_per_buffer=CHUNK)  # 每个缓冲多少帧print("* 开始录音")  # 开始录音标志frames = []  # 定义frames为一个空列表for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):  # 计算要读多少次，每秒的采样率/每次读多少数据*录音时间=需要读多少次data = stream.read(CHUNK)  # 每次读chunk个数据frames.append(data)  # 将读出的数据保存到列表中print("* 结束语音")  # 结束录音标志stream.stop_stream()  # 停止输入流stream.close()  # 关闭输入流p.terminate()  # 终止pyaudiowf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')  # 以'wb‘二进制流写的方式打开一个文件wf.setnchannels(CHANNELS)  # 设置音轨数wf.setsampwidth(p.get_sample_size(FORMAT))  # 设置采样点数据的格式，和FOMART保持一致wf.setframerate(RATE)  # 设置采样率与RATE要一致wf.writeframes(b''.join(frames))  # 将声音数据写入文件wf.close()  # 数据流保存完，关闭文件while is_task_running:start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())myrecording = sd.rec(int(fs * duration), samplerate=fs, channels=1)sd.wait()speech_chunk = myrecording.flatten()# 噪声处理filtered_chunk = np.convolve(speech_chunk, np.ones(window_size) / window_size, mode='same')speech_chunk = filtered_chunkis_final = Falseres = model.generate(input=speech_chunk, cache=cache, is_final=is_final, chunk_size=chunk_size,encoder_chunk_look_back=encoder_chunk_look_back,decoder_chunk_look_back=decoder_chunk_look_back)text_result=''.join(lazy_pinyin(str(res[0]['text']))).replace(" ", "")# 唤醒词s1=''.join(lazy_pinyin(str("小爱")))if s1 in text_result:#关闭循环is_task_running ==Falseprint("已唤醒，开始录音")record(5)  # 定义录音时间，单位/sinference_pipeline = pipeline(task=Tasks.auto_speech_recognition,model='D:/SpeechRecognize/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch',model_revision="v2.0.4")rec_result = inference_pipeline('./output.wav', hotword='')same = ''.join(lazy_pinyin(rec_result[0]["text"].replace(" ", "")))print("语音转文字" + same)#匹配字符关键词#关键词1 、、、、g1 = ''.join(lazy_pinyin(str("打开空调")))if g1 in same:#通讯发送消息，我会提供五种硬件通讯方式 MTTT、Socket、ModBusTcpIP、串口、HTTP请求print("发送给设备")is_task_running == Truecursor.execute("INSERT INTO speech_data (text, time_stamp, batch) VALUES (?,?,?)",(text_result, start_time, 'eerr'))conn.commit()

3. 硬件通讯

很多人搞不懂，如何用软件控制硬件，但是实际上没大家想的那么复杂，一般的硬件都会提供接口，只要找到厂家要他的通讯方式和通讯内容，就可以实现用软件控制硬件

3.1ModbusTCPIP

比较通用的工业通讯协议，读写PLC数据

from pymodbus.client import ModbusTcpClient
def read_data(ip, port, postion):# 创建 Modbus TCP 客户端并连接client = ModbusTcpClient(ip, port=port)  # 请替换为实际的设备 IP 和端口client.connect()try:# 读取保持寄存器num = 0result = client.read_holding_registers(postion, 1)for value in result.registers:print(value)num = valueexcept Exception as e:print("Exception:", e)finally:# 关闭连接client.close()return str(num)def send_data(ip, port, postion, num):client = ModbusTcpClient(ip, port=port)client.connect()try:# 发送数据到保持寄存器write_result = client.write_registers(postion, [num])  # 从地址 20 开始写入数据if not write_result.isError():print("Write Success")else:print("Write Error:", write_result)except Exception as e:print("Exception:", e)finally:client.close()return 2

3.2MQTT

最近比较流行的工业协议
读取消息

import paho.mqtt.client as mqttdef on_connect(client, userdata, flags, rc):print("Connected with result code "+str(rc))client.subscribe("your_topic")def on_message(client, userdata, msg):print(msg.topic+" "+str(msg.payload))client = mqtt.Client()
client.on_connect = on_connect
client.on_message = on_messageclient.connect("broker_ip_address", 1883, 60)client.loop_forever()

写入消息

import paho.mqtt.publish as publishpublish.single("your_topic", "your_message", hostname="broker_ip_address", port=1883)

3.3 Socket TCPIP

def socketddd(ip, port, code):# 要发送的内容content = str(code)+'\a\r\n'# 创建套接字s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)# 连接服务器server_address = (str(ip), int(port))s.connect(server_address)# 发送数据s.sendall(content.encode())# 关闭连接s.close()

3.4 串口

比较基础的串口

def task1():serials = serial.Serial('COM5', 9600, timeout=0.5)while is_task_running1:if serials.isOpen():print("open success")send_data_hex = bytes.fromhex('5A 06 00 00 60\r\n')serials.write(send_data_hex)  # 编码else:print("open failed")time.sleep(1)  # 每隔 5 秒执行一次