摘要:本文将带你了解IOS开发入门iOS语音书写功能(语音转文本),希望本文对大家学IOS有所帮助。
本文将带你了解IOS开发入门iOS语音书写功能(语音转文本),希望本文对大家学IOS有所帮助。
Demo下载地址
最近在项目开发中,需要将语音识别转换成文本的功能。研究了下科大讯飞,附上Demo分享给大家。
研发前先得做一些准备。
1、注册科大讯飞开发者帐号(//www.xfyun.cn)
2、下载开发平台(iOS、或Android,或其他)所需要的SDK(SDK包含:说明文档、SDK即iflyMSC.framework、Demo)
3、项目中添加SDK(添加时,先将SDK复制粘贴到项目文件,再通过addframe的方法添加到项目引用),及相关联的framework
添加方法:TARGETS-Build Phases-Link Binary With Libraries-"+"-Choose frameworks and libraries to add-add other,或选择对应的framework-add
4、使用时要添加对应的头文件
特别说明:
1、使用SDK关联的APPID存在于下载的Demo中,如果SDK有替换的话APPID应该跟着一起替换。
2、添加其他framework:
libz.tbd
libc++.tbd
CoreGraphics.framework
QuartzCore.framework
AddressBook.framework
CoreLocation.framework
UIKit.framework
AudioToolbox.framework
Foundation.framework
SystemConfiguration.framework
AVFoundation.framework
CoreTelephoney.framework
3、Bitcode属性设置为NO(TARGETS-Build Settings-Build Options-Enable Bitcode-NO)
4、在使用前,务必在AppDelegate的方法中"
- (BOOL)application:(UIApplication*)application didFinishLaunchingWithOptions:(NSDictionary*)launchOptions {}"进行初始化操作。
5、需要有网络的情况下才能使用。
如图
下载的科大讯飞SDK文件
Demo中的APPID
添加SDK,及添加关联framework
设置Bitcode属性为 NO
语音转文件实现代码
.h文件
#import
//导入头文件
#import"iflyMSC.framework/Headers/IFlyMSC.h"
#import"iflyMSC.framework/Headers/IFlySpeechUtility.h"
#import"iflyMSC/IFlySpeechConstant.h"
#pragmamark-初始化参数类
/**************************************************************************/
@interfaceIATConfig:NSObject
+(IATConfig*)sharedInstance;
+(NSString*)mandarin;
+(NSString*)cantonese;
+(NSString*)henanese;
+(NSString*)chinese;
+(NSString*)english;
+(NSString*)lowSampleRate;
+(NSString*)highSampleRate;
+(NSString*)isDot;
+(NSString*)noDot;
/**
以下参数,需要通过iFlySpeechRecgonizer进行设置
*/
@property(nonatomic,strong)NSString*speechTimeout;
@property(nonatomic,strong)NSString*vadEos;
@property(nonatomic,strong)NSString*vadBos;
@property(nonatomic,strong)NSString*language;
@property(nonatomic,strong)NSString*accent;
@property(nonatomic,strong)NSString*dot;
@property(nonatomic,strong)NSString*sampleRate;
/**
以下参数无需设置不必关
*/
@property(nonatomic,assign)BOOLhaveView;
@property(nonatomic,strong)NSArray*accentIdentifer;
@property(nonatomic,strong)NSArray*accentNickName;
@end
/**************************************************************************/
#pragmamark-语音听写类
@interfaceVoiceConversion:NSObject
///启动初始化语音程序
+(void)VoiceInitialize;
///开始录音
-(void)voiceStart:(void(^)(BOOLisStart))startListening
speechBegin:(void(^)(void))begin
speechEnd:(void(^)(void))end
speechError:(void(^)(BOOLisSuccess))error
speechResult:(void(^)(NSString*text))result
speechVolume:(void(^)(intvolume))volume;
///取消录音
-(void)voiceCancel;
///停止录音
-(void)voiceStop;
@end
.m文件
#import"VoiceConversion.h"
#pragmamark-初始化参数类
/**************************************************************************/
staticNSString*constPUTONGHUA=@"mandarin";
staticNSString*constYUEYU=@"cantonese";
staticNSString*constHENANHUA=@"henanese";
staticNSString*constENGLISH=@"en_us";
staticNSString*constCHINESE=@"zh_cn";
@implementationIATConfig
-(id)init
{
self=[superinit];
if(self)
{
[selfdefaultSetting];
returnself;
}
returnnil;
}
+(IATConfig*)sharedInstance
{
staticIATConfig*instance=nil;
staticdispatch_once_tpredict;
dispatch_once(&predict,^{
instance=[[IATConfigalloc]init];
});
returninstance;
}
-(void)defaultSetting
{
_speechTimeout=@"30000";
_vadEos=@"3000";
_vadBos=@"3000";
_dot=@"1";
_sampleRate=@"16000";
_language=CHINESE;
_accent=PUTONGHUA;
_haveView=NO;//默认是不dai界面的
_accentNickName=[[NSArrayalloc]initWithObjects:@"粤语",@"普通话",@"河南话",@"英文",nil];
}
+(NSString*)mandarin
{
returnPUTONGHUA;
}
+(NSString*)cantonese
{
returnYUEYU;
}
+(NSString*)henanese
{
returnHENANHUA;
}
+(NSString*)chinese
{
returnCHINESE;
}
+(NSString*)english
{
returnENGLISH;
}
+(NSString*)lowSampleRate
{
return@"8000";
}
+(NSString*)highSampleRate
{
return@"16000";
}
+(NSString*)isDot
{
return@"1";
}
+(NSString*)noDot
{
return@"0";
}
@end
/**************************************************************************/
#pragmamark-语音听写类
staticNSString*constVoiceAPPID=@"572016e4";
staticNSString*constVoiceTimeOut=@"20000";
@interfaceVoiceConversion()
@property(nonatomic,strong)NSMutableString*resultText;
@property(nonatomic,strong)IFlySpeechRecognizer*iFlySpeechRecognizer;
@property(nonatomic,copy)void(^beginSpeech)(void);
@property(nonatomic,copy)void(^endSpeech)(void);
@property(nonatomic,copy)void(^errorSpeech)(BOOLisSuccess);
@property(nonatomic,copy)void(^resultSpeech)(NSString*text);
@property(nonatomic,copy)void(^volumeSpeech)(intvolume);
@end
@implementationVoiceConversion
#pragmamark初始化------------
///启动初始化语音程序
+(void)VoiceInitialize
{
//设置sdk的log等级,log保存在下面设置的工作路径中
[IFlySettingsetLogFile:LVL_ALL];
//打开输出在console的log开关
[IFlySettingshowLogcat:YES];
//设置sdk的工作路径
NSArray*paths=NSSearchPathForDirectoriesInDomains(NSCachesDirectory,NSUserDomainMask,YES);
NSString*cachePath=[pathsobjectAtIndex:0];
[IFlySettingsetLogFilePath:cachePath];
//Appid是应用的身份信息,具有唯一性,初始化时必须要传入Appid。初始化是一个异步过程,可放在App启动时执行初始化,具体代码可以参照Demo的MSCAppDelegate.m。未初始化时使用服务,一般会返回错误码10111.
NSString*initString=[[NSStringalloc]initWithFormat:@"appid=%@",VoiceAPPID];
[IFlySpeechUtilitycreateUtility:initString];
}
#pragmamark实例化------------
-(void)dealloc
{
[selfvoiceCancel];
}
-(NSMutableString*)resultText
{
if(!_resultText)
{
_resultText=[[NSMutableStringalloc]init];
}
return_resultText;
}
-(IFlySpeechRecognizer*)iFlySpeechRecognizer
{
if(_iFlySpeechRecognizer==nil)
{
_iFlySpeechRecognizer=[IFlySpeechRecognizersharedInstance];
[_iFlySpeechRecognizersetParameter:@""forKey:[IFlySpeechConstantPARAMS]];
//设置听写模式
[_iFlySpeechRecognizersetParameter:@"iat"forKey:[IFlySpeechConstantIFLY_DOMAIN]];
}
return_iFlySpeechRecognizer;
}
-(void)initializeVoice
{
self.iFlySpeechRecognizer.delegate=self;
IATConfig*instance=[IATConfigsharedInstance];
//设置最长录音时间
[self.iFlySpeechRecognizersetParameter:instance.speechTimeoutforKey:[IFlySpeechConstantSPEECH_TIMEOUT]];
//设置后端点
[self.iFlySpeechRecognizersetParameter:instance.vadEosforKey:[IFlySpeechConstantVAD_EOS]];
//设置前端点
[self.iFlySpeechRecognizersetParameter:instance.vadBosforKey:[IFlySpeechConstantVAD_BOS]];
//网络等待时间
[self.iFlySpeechRecognizersetParameter:@"20000"forKey:[IFlySpeechConstantNET_TIMEOUT]];
//设置采样率,推荐使用16K
[self.iFlySpeechRecognizersetParameter:instance.sampleRateforKey:[IFlySpeechConstantSAMPLE_RATE]];
if([instance.languageisEqualToString:[IATConfigchinese]])
{
//设置语言
[self.iFlySpeechRecognizersetParameter:instance.languageforKey:[IFlySpeechConstantLANGUAGE]];
//设置方言
[self.iFlySpeechRecognizersetParameter:instance.accentforKey:[IFlySpeechConstantACCENT]];
}
elseif([instance.languageisEqualToString:[IATConfigenglish]])
{
[self.iFlySpeechRecognizersetParameter:instance.languageforKey:[IFlySpeechConstantLANGUAGE]];
}
//设置是否返回标点符号
[self.iFlySpeechRecognizersetParameter:instance.dotforKey:[IFlySpeechConstantASR_PTT]];
}
#pragmamark语音听写方法------------
///开始录音
-(void)voiceStart:(void(^)(BOOLisStart))startListening
speechBegin:(void(^)(void))begin
speechEnd:(void(^)(void))end
speechError:(void(^)(BOOLisSuccess))error
speechResult:(void(^)(NSString*text))result
speechVolume:(void(^)(intvolume))volume
{
[self.resultTextsetString:@""];
//回调设置
self.beginSpeech=[begincopy];
self.endSpeech=[endcopy];
self.errorSpeech=[errorcopy];
self.resultSpeech=[resultcopy];
self.volumeSpeech=[volumecopy];
//初始化设置
[selfinitializeVoice];
[self.iFlySpeechRecognizercancel];
//设置音频来源为麦克风
[self.iFlySpeechRecognizersetParameter:IFLY_AUDIO_SOURCE_MICforKey:@"audio_source"];
//设置听写结果格式为json
[self.iFlySpeechRecognizersetParameter:@"json"forKey:[IFlySpeechConstantRESULT_TYPE]];
//保存录音文件,保存在sdk工作路径中,如未设置工作路径,则默认保存在library/cache下
[self.iFlySpeechRecognizersetParameter:@"asr.pcm"forKey:[IFlySpeechConstantASR_AUDIO_PATH]];
BOOLisStart=[self.iFlySpeechRecognizerstartListening];
if(startListening)
{
//如果开始录音失败,可能是上次请求未结束,暂不支持多路并发
startListening(isStart);
}
}
///取消听写
-(void)voiceCancel
{
[self.iFlySpeechRecognizercancel];
}
///停止录音
-(void)voiceStop
{
[self.iFlySpeechRecognizerstopListening];
}
#pragmamarkIFlySpeechRecognizerDelegate------------
/**
识别结果返回代理
@param:results识别结果
@param:isLast表示是否最后一次结果
*/
-(void)onResults:(NSArray*)resultsisLast:(BOOL)isLast
{
NSMutableString*resultString=[[NSMutableStringalloc]init];
NSDictionary*dic=results[0];
for(NSString*keyindic)
{
[resultStringappendFormat:@"%@",key];
}
NSString*resultFromJson=[[selfclass]stringFromJson:resultString];
NSString*resultTextTemp=[NSStringstringWithFormat:@"%@%@",self.resultText,resultFromJson];
[self.resultTextsetString:resultTextTemp];
if(isLast)
{
if(self.resultSpeech)
{
//去掉最后一个句号
NSRangerange=[self.resultTextrangeOfString:@"。"options:NSBackwardsSearch];
if(range.location!=NSNotFound)
{
resultTextTemp=[self.resultTextsubstringToIndex:range.location];
[self.resultTextsetString:resultTextTemp];
}
self.resultSpeech(self.resultText);
}
}
[selfvoiceCancel];
}
/**
识别会话结束返回代理
@paramerror错误码,error.errorCode=0表示正常结束,非0表示发生错误。
*/
-(void)onError:(IFlySpeechError*)error
{
if(self.errorSpeech)
{
BOOLisSuccess=(0==error.errorCode);
self.errorSpeech(isSuccess);
}
}
/**
停止录音回调
*/
-(void)onEndOfSpeech
{
if(self.endSpeech)
{
self.endSpeech();
}
}
/**
开始识别回调
*/
-(void)onBeginOfSpeech
{
if(self.beginSpeech)
{
self.beginSpeech();
}
}
/**
音量回调函数volume0-30
*/
-(void)onVolumeChanged:(int)volume
{
if(self.volumeSpeech)
{
self.volumeSpeech(volume);
}
}
#pragmamark解析方法------------
/**************************************************************************/
/**
解析命令词返回的结果
*/
+(NSString*)stringFromAsr:(NSString*)params;
{
NSMutableString*resultString=[[NSMutableStringalloc]init];
NSString*inputString=nil;
NSArray*array=[paramscomponentsSeparatedByString:@"\n"];
for(intindex=0;index<array.count;index++)
{
NSRangerange;
NSString*line=[arrayobjectAtIndex:index];
NSRangeidRange=[linerangeOfString:@"id="];
NSRangenameRange=[linerangeOfString:@"name="];
NSRangeconfidenceRange=[linerangeOfString:@"confidence="];
NSRangegrammarRange=[linerangeOfString:@"grammar="];
NSRangeinputRange=[linerangeOfString:@"input="];
if(confidenceRange.length==0||grammarRange.length==0||inputRange.length==0)
{
continue;
}
//checknomatch
if(idRange.length!=0)
{
NSUIntegeridPosX=idRange.location+idRange.length;
NSUIntegeridLength=nameRange.location-idPosX;
range=NSMakeRange(idPosX,idLength);
NSString*subString=[linesubstringWithRange:range];
NSCharacterSet*subSet=[NSCharacterSetwhitespaceAndNewlineCharacterSet];
NSString*idValue=[subStringstringByTrimmingCharactersInSet:subSet];
if([idValueisEqualToString:@"nomatch"])
{
return@"";
}
}
//GetConfidenceValue
NSUIntegerconfidencePosX=confidenceRange.location+confidenceRange.length;
NSUIntegerconfidenceLength=grammarRange.location-confidencePosX;
range=NSMakeRange(confidencePosX,confidenceLength);
NSString*score=[linesubstringWithRange:range];
NSUIntegerinputStringPosX=inputRange.location+inputRange.length;
NSUIntegerinputStringLength=line.length-inputStringPosX;
range=NSMakeRange(inputStringPosX,inputStringLength);
inputString=[linesubstringWithRange:range];
[resultStringappendFormat:@"%@置信度%@\n",inputString,score];
}
returnresultString;
}
/**
解析听写json格式的数据
params例如:
{"sn":1,"ls":true,"bg":0,"ed":0,"ws":[{"bg":0,"cw":[{"w":"白日","sc":0}]},{"bg":0,"cw":[{"w":"依山","sc":0}]},{"bg":0,"cw":[{"w":"尽","sc":0}]},{"bg":0,"cw":[{"w":"黄河入海流","sc":0}]},{"bg":0,"cw":[{"w":"。","sc":0}]}]}
*/
+(NSString*)stringFromJson:(NSString*)params
{
if(params==NULL)
{
returnnil;
}
NSMutableString*tempStr=[[NSMutableStringalloc]init];
//返回的格式必须为utf8的,否则发生未知错误
NSData*dataJSON=[paramsdataUsingEncoding:NSUTF8StringEncoding];
NSDictionary*resultDic=[NSJSONSerializationJSONObjectWithData:dataJSONoptions:kNilOptionserror:nil];
if(resultDic!=nil)
{
NSArray*wordArray=[resultDicobjectForKey:@"ws"];
for(inti=0;i<[wordArraycount];i++)
{
NSDictionary*wsDic=[wordArrayobjectAtIndex:i];
NSArray*cwArray=[wsDicobjectForKey:@"cw"];
for(intj=0;j<[cwArraycount];j++)
{
NSDictionary*wDic=[cwArrayobjectAtIndex:j];
NSString*str=[wDicobjectForKey:@"w"];
[tempStrappendString:str];
}
}
}
returntempStr;
}
/**
解析语法识别返回的结果
*/
+(NSString*)stringFromABNFJson:(NSString*)params
{
if(params==NULL)
{
returnnil;
}
NSMutableString*tempStr=[[NSMutableStringalloc]init];
NSData*dataJSON=[paramsdataUsingEncoding:NSUTF8StringEncoding];
NSDictionary*resultDic=[NSJSONSerializationJSONObjectWithData:dataJSONoptions:kNilOptionserror:nil];
NSArray*wordArray=[resultDicobjectForKey:@"ws"];
for(inti=0;i<[wordArraycount];i++)
{
NSDictionary*wsDic=[wordArrayobjectAtIndex:i];
NSArray*cwArray=[wsDicobjectForKey:@"cw"];
for(intj=0;j<[cwArraycount];j++)
{
NSDictionary*wDic=[cwArrayobjectAtIndex:j];
NSString*str=[wDicobjectForKey:@"w"];
NSString*score=[wDicobjectForKey:@"sc"];
[tempStrappendString:str];
[tempStrappendFormat:@"置信度:%@",score];
[tempStrappendString:@"\n"];
}
}
returntempStr;
}
/**************************************************************************/
@end
使用
初始化方法
///启动初始化语音程序
+(void)VoiceInitialize
{
//设置sdk的log等级,log保存在下面设置的工作路径中
[IFlySettingsetLogFile:LVL_ALL];
//打开输出在console的log开关
[IFlySettingshowLogcat:YES];
//设置sdk的工作路径
NSArray*paths=NSSearchPathForDirectoriesInDomains(NSCachesDirectory,NSUserDomainMask,YES);
NSString*cachePath=[pathsobjectAtIndex:0];
[IFlySettingsetLogFilePath:cachePath];
//Appid是应用的身份信息,具有唯一性,初始化时必须要传入Appid。初始化是一个异步过程,可放在App启动时执行初始化,具体代码可以参照Demo的MSCAppDelegate.m。未初始化时使用服务,一般会返回错误码10111.
NSString*initString=[[NSStringalloc]initWithFormat:@"appid=%@",VoiceAPPID];
[IFlySpeechUtilitycreateUtility:initString];
}
初始化调用
-(BOOL)application:(UIApplication*)applicationdidFinishLaunchingWithOptions:(NSDictionary*)launchOptions
{
//Overridepointforcustomizationafterapplicationlaunch.
[VoiceConversionVoiceInitialize];
returnYES;
}
#import"VoiceConversion.h"
@interfaceViewController()
@property(nonatomic,strong)VoiceConversion*voiceConversion;
@property(nonatomic,strong)UILabel*messageLabel;
@end
@implementationViewController
-(void)viewDidLoad{
[superviewDidLoad];
//Doanyadditionalsetupafterloadingtheview,typicallyfromanib.
UIBarButtonItem*startItem=[[UIBarButtonItemalloc]initWithTitle:@"start"style:UIBarButtonItemStyleDonetarget:selfaction:@selector(startItemClick:)];
UIBarButtonItem*stopItem=[[UIBarButtonItemalloc]initWithTitle:@"stop"style:UIBarButtonItemStyleDonetarget:selfaction:@selector(stopItemClick:)];
UIBarButtonItem*cancelItem=[[UIBarButtonItemalloc]initWithTitle:@"cancel"style:UIBarButtonItemStyleDonetarget:selfaction:@selector(cancelItemClick:)];
self.navigationItem.rightBarButtonItems=@[startItem,stopItem,cancelItem];
self.title=@"科大讯飞语音";
[selfsetUI];
}
-(void)didReceiveMemoryWarning{
[superdidReceiveMemoryWarning];
//Disposeofanyresourcesthatcanberecreated.
}
#pragmamark-视图
-(void)setUI
{
if([selfrespondsToSelector:@selector(setEdgesForExtendedLayout:)])
{
[selfsetEdgesForExtendedLayout:UIRectEdgeNone];
}
self.messageLabel=[[UILabelalloc]initWithFrame:CGRectMake(10.0,10.0,CGRectGetWidth(self.view.bounds)-10.0*2,40.0)];
[self.viewaddSubview:self.messageLabel];
self.messageLabel.backgroundColor=[UIColorcolorWithWhite:0.5alpha:0.3];
self.messageLabel.textAlignment=NSTextAlignmentCenter;
}
#pragmamark-响应
-(void)startItemClick:(UIBarButtonItem*)item
{
ViewController__weak*weakSelf=self;
[self.voiceConversionvoiceStart:^(BOOLisStart){
NSLog(@"1start");
if(isStart)
{
weakSelf.messageLabel.text=@"正在录音";
}
else
{
weakSelf.messageLabel.text=@"启动识别服务失败,请稍后重试";
}
}speechBegin:^{
NSLog(@"2begin");
}speechEnd:^{
NSLog(@"3end");
}speechError:^(BOOLisSuccess){
NSLog(@"4error");
}speechResult:^(NSString*text){
NSLog(@"5result");
weakSelf.messageLabel.text=text;
}speechVolume:^(intvolume){
NSLog(@"6volume");
NSString*volumeString=[NSStringstringWithFormat:@"音量:%d",volume];
weakSelf.messageLabel.text=volumeString;
}];
}
-(void)stopItemClick:(UIBarButtonItem*)item
{
[self.voiceConversionvoiceStop];
self.messageLabel.text=@"停止录音";
}
-(void)cancelItemClick:(UIBarButtonItem*)item
{
[self.voiceConversionvoiceCancel];
self.messageLabel.text=@"取消识别";
}
#pragmamark-getter
-(VoiceConversion*)voiceConversion
{
if(!_voiceConversion)
{
_voiceConversion=[[VoiceConversionalloc]init];
}
return_voiceConversion;
}
@end
本文由职坐标整理并发布,希望对同学们有所帮助。了解更多详情请关注职坐标移动开发之IOS频道!
您输入的评论内容中包含违禁敏感词
我知道了
请输入正确的手机号码
请输入正确的验证码
您今天的短信下发次数太多了,明天再试试吧!
我们会在第一时间安排职业规划师联系您!
您也可以联系我们的职业规划师咨询:
版权所有 职坐标-一站式IT培训就业服务领导者 沪ICP备13042190号-4
上海海同信息科技有限公司 Copyright ©2015 www.zhizuobiao.com,All Rights Reserved.
沪公网安备 31011502005948号