麦克风获取
stream = audio. open ( format = FORMAT, channels= CHANNELS, rate= RATE, input = True , frames_per_buffer= CHUNK)
设置参数
FORMAT = pyaudio. paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 1024
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
读取数据块
for i in range ( 0 , int ( RATE / CHUNK * RECORD_SECONDS) ) : data = stream. read( CHUNK) frames. append( data)
阿里语音识别模型加载
paraformer_path = "./iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn"
fsmn_path = "./iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"
ct_punc_path = "./iic/punc_ct-transformer_cn-en-common-vocab471067-large"
cam_path = "./iic/speech_campplus_sv_zh-cn_16k-common" model = AutoModel( model= paraformer_path, model_revision= "v2.0.4" , vad_model= fsmn_path, vad_model_revision= "v2.0.4" , punc_model= ct_punc_path, punc_model_revision= "v2.0.4" , spk_model= cam_path, spk_model_revision= "v2.0.2" , device= "cpu" )
阿里语音识别
res = model. generate( input = WAVE_OUTPUT_FILENAME, batch_size_s= 16000 , hotword= '魔搭' )
整体代码
import pyaudio
import wave
import threading
import keyboard
from funasr import AutoModel
FORMAT = pyaudio. paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 1024
WAVE_OUTPUT_FILENAME = "./wav_data/output.wav"
audio = pyaudio. PyAudio( ) frames = [ ]
stream = None
recording = False paraformer_path = "./iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn"
fsmn_path = "./iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"
ct_punc_path = "./iic/punc_ct-transformer_cn-en-common-vocab471067-large"
cam_path = "./iic/speech_campplus_sv_zh-cn_16k-common" model = AutoModel( model= paraformer_path, model_revision= "v2.0.4" , vad_model= fsmn_path, vad_model_revision= "v2.0.4" , punc_model= ct_punc_path, punc_model_revision= "v2.0.4" , spk_model= cam_path, spk_model_revision= "v2.0.2" , device= "cpu" ) print ( "加载模型完成!!!" ) def start_recording ( ) : """开始录音""" global stream, recordingif not recording: print ( "开始录音..." ) recording = True stream = audio. open ( format = FORMAT, channels= CHANNELS, rate= RATE, input = True , frames_per_buffer= CHUNK) record_thread = threading. Thread( target= record_audio) record_thread. start( ) def stop_recording ( ) : """停止录音并进行识别""" global stream, recordingif recording: print ( "录音结束." ) recording = False stream. stop_stream( ) stream. close( ) save_audio( ) audio. terminate( ) print ( "开始识别!!!" ) res = model. generate( input = WAVE_OUTPUT_FILENAME, batch_size_s= 16000 , hotword= '魔搭' ) print ( "识别结束!!!" ) print ( "识别结果:" , res) def record_audio ( ) : """录音功能实现""" while recording: data = stream. read( CHUNK) frames. append( data) def save_audio ( ) : """保存录音文件""" wf = wave. open ( WAVE_OUTPUT_FILENAME, 'wb' ) wf. setnchannels( CHANNELS) wf. setsampwidth( audio. get_sample_size( FORMAT) ) wf. setframerate( RATE) wf. writeframes( b'' . join( frames) ) wf. close( )
keyboard. add_hotkey( 'ctrl' , start_recording)
keyboard. add_hotkey( 'alt' , stop_recording) print ( "按 Ctrl 开始录音,按 Alt 结束录音" )
keyboard. wait( )