python调用百度语音识别实现大音频文件语音识别功能

作者:septwolves2015 时间:2023-11-29 00:59:53 

本文为大家分享了python实现大音频文件语音识别功能的具体代码,供大家参考,具体内容如下

实现思路:先用ffmpeg将其他非wav格式的音频转换为wav格式,并转换音频的声道(百度支持声道为1),采样率(值为8000),格式转换完成后,再用ffmpeg将音频切成百度。

支持的时长(30秒和60秒2种,本程序用的是30秒)。


# coding: utf-8
import json
import time
import base64
from inc import rtysdb
import urllib2
import requests
import os
import uuid
from inc import db_config

class BaiduRest:
 def __init__(self, cu_id, api_key, api_secert):
   self.token_url = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s"
   self.getvoice_url = "http://tsn.baidu.com/text2audio?tex=%s&lan=zh&cuid=%s&ctp=1&tok=%s"
   self.upvoice_url = 'http://vop.baidu.com/server_api'

self.cu_id = cu_id
   self.get_token(api_key, api_secert)
   return

def get_token(self, api_key, api_secert):
   token_url = self.token_url % (api_key, api_secert)
   r_str = urllib2.urlopen(token_url).read()
   token_data = json.loads(r_str)
   self.token_str = token_data['access_token']
   return True

# 语音合成
 def text2audio(self, text, filename):
   get_url = self.getvoice_url % (urllib2.quote(text), self.cu_id, self.token_str)
   voice_data = urllib2.urlopen(get_url).read()
   voice_fp = open(filename, 'wb+')
   voice_fp.write(voice_data)
   voice_fp.close()
   return True

##语音识别
 def audio2text(self, filename):
   data = {}
   data['format'] = 'wav'
   data['rate'] = 8000
   data['channel'] = 1
   data['cuid'] = self.cu_id
   data['token'] = self.token_str

wav_fp = open(filename, 'rb')
   voice_data = wav_fp.read()
   data['len'] = len(voice_data)
   # data['speech'] = base64.b64encode(voice_data).decode('utf-8')
   data['speech'] = base64.b64encode(voice_data).replace('\n', '')
   # post_data = json.dumps(data)
   result = requests.post(self.upvoice_url, json=data, headers={'Content-Type': 'application/json'})
   data_result = result.json()
   if(data_result['err_msg'] == 'success.'):
     return data_result['result'][0]
   else:
     return False

def test_voice(voice_file):
 api_key = "vossGHIgEETS6IMRxBDeahv8"
 api_secert = "3c1fe6a6312f41fa21fa2c394dad5510"
 bdr = BaiduRest("0-57-7B-9F-1F-A1", api_key, api_secert)

# 生成
 #start = time.time()
 #bdr.text2audio("你好啊", "out.wav")
 #using = time.time() - start
 #print using

# 识别
 #start = time.time()
 result = bdr.audio2text(voice_file)
 # result = bdr.audio2text("weather.pcm")
 #using = time.time() - start
 return result

def get_master_audio(check_status='cut_status'):
 if check_status == 'cut_status':
   sql = "SELECT id,url, time_long,sharps FROM ocenter_recognition WHERE status=0"
 elif check_status == 'finished_status':
   sql = "SELECT id,url, time_long,sharps FROM ocenter_recognition WHERE finished_status=0"
 else:
   return False
 data = rtysdb.select_data(sql,'more')
 if data:
   return data
 else:
   return False

def go_recognize(master_id):
 section_path = db_config.SYS_PATH
 sql = "SELECT id,rid,url,status FROM ocenter_section WHERE rid=%d AND status=0 order by id asc limit 10" % (master_id)
 #print sql
 record = rtysdb.select_data(sql,'more')
 #print record
 if not record:
   return False
 for rec in record:
   #print section_path+'/'+rec[1]
   voice_file = section_path+'/'+rec[2]
   if not os.path.exists(voice_file):
     continue
   result = test_voice(voice_file)
   print result
   exit(0)
   if result:
     #rtysdb.update_by_pk('ocenter_section',rec[0],{'content':result,'status':1})
     sql = "update ocenter_section set content='%s', status='%d' where id=%d" % (result,1,rec[0])      #print sql
     rtysdb.do_exec_sql(sql)
     parent_content = rtysdb.select_data("SELECT id,content FROM ocenter_recognition WHERE id=%d" % (rec[1]))
     #print parent_content
     if parent_content:
       new_content = parent_content[1]+result
       update_content_sql = "update ocenter_recognition set content='%s' where id=%d" % (new_content,rec[1])
       rtysdb.do_exec_sql(update_content_sql)
   else:
     rtysdb.do_exec_sql("update ocenter_section set status='%d' where id=%d" % (result,1,rec[0]))
   time.sleep(5)
 else:
   rtysdb.do_exec_sql("UPDATE ocenter_recognition SET finished_status=1 WHERE id=%d" % (master_id))
#对百度语音识别不了的音频文件进行转换
def ffmpeg_convert():
 section_path = db_config.SYS_PATH
 #print section_path
 used_audio = get_master_audio('cut_status')
 #print used_audio
 if used_audio:
   for audio in used_audio:
     audio_path = section_path+'/'+audio[1]
     new_audio = uuid.uuid1()
     command_line = "ffmpeg -i "+audio_path +" -ar 8000 -ac 1 -f wav "+section_path+"/Uploads/Convert/convert_" + str(new_audio) +".wav";
     #print command_line
     os.popen(command_line)
     if os.path.exists(section_path+"/Uploads/Convert/convert_" + str(new_audio) +".wav"):
       convert_name = "Uploads/Convert/convert_" + str(new_audio) +".wav"
       ffmpeg_cut(convert_name,audio[3],audio[0])
       sql = "UPDATE ocenter_recognition SET status=1,convert_name='%s' where id=%d" % (convert_name,audio[0])
       rtysdb.do_exec_sql(sql)
#将大音频文件切成碎片
def ffmpeg_cut(convert_name,sharps,master_id):
 section_path = db_config.SYS_PATH
 if sharps>0:
   for i in range(0,sharps):
     timeArray = time.localtime(i*30)
     h = time.strftime("%H", timeArray)
     h = int(h) - 8
     h = "0" + str(h)
     ms = time.strftime("%M:%S",timeArray)
     start_time = h+':'+str(ms)
     cut_name = section_path+'/'+convert_name
     db_store_name = "Uploads/Section/"+str(uuid.uuid1())+'-'+str(i+1)+".wav"
     section_name = section_path+"/"+db_store_name
     command_line = "ffmpeg.exe -i "+cut_name+" -vn -acodec copy -ss "+start_time+" -t 00:00:30 "+section_name
     #print command_line
     os.popen(command_line)
     data = {}
     data['rid'] = master_id
     data['url'] = db_store_name
     data['create_time'] = int(time.time())
     data['status'] = 0
     rtysdb.insert_one('ocenter_section',data)

if __name__ == "__main__":
 ffmpeg_convert()
 audio = get_master_audio('finished_status')
 if audio:
    for ad in audio:
     go_recognize(ad[0])

来源:https://blog.csdn.net/septwolves2015/article/details/78554524

标签:python,语音识别,百度语音
0
投稿

猜你喜欢

  • mysql 8.0.12 安装图文教程

    2024-01-28 05:19:04
  • Python入门之布尔值详解

    2023-01-17 06:29:58
  • Go语言学习网络编程与Http教程示例

    2024-02-08 10:33:12
  • Python+OpenCV实现将图像转换为二进制格式

    2021-06-25 08:10:33
  • Pycharm新建模板默认添加个人信息的实例

    2022-03-30 20:59:50
  • Golang切片Slice功能操作详情

    2024-04-29 13:06:54
  • python实现web方式logview的方法

    2023-12-23 17:07:54
  • MySQL无法启动几种常见问题小结

    2024-01-18 06:05:32
  • Vue内部渲染视图的方法

    2024-04-28 09:19:57
  • 详解MySQL中Order By排序和filesort排序的原理及实现

    2024-01-15 08:24:17
  • Python 如何在字符串中插入变量

    2023-02-16 11:06:27
  • python 实现socket服务端并发的四种方式

    2022-08-09 22:19:46
  • javascript中正则表达式语法详解

    2024-05-02 16:17:04
  • pycharm2020.2 配置使用的方法详解

    2022-10-01 12:23:23
  • Python根据字典值对字典进行排序的三种方法实例

    2022-07-29 16:24:21
  • fastapi与django异步的并发对比分析

    2023-01-03 19:49:30
  • Thinkphp模板中使用自定义函数的方法

    2024-05-13 09:57:29
  • MySql数据库基本命令集会

    2011-08-05 18:43:23
  • JS通过FSO将unicode字符写入文本

    2009-06-01 12:26:00
  • Python中json库的操作指南

    2021-10-11 11:04:43
  • asp之家 网络编程 m.aspxhome.com