From 9283589bb084c4b97de6ff7836af67e5551dda93 Mon Sep 17 00:00:00 2001 From: jax <273277355@qq.com> Date: Sun, 5 Mar 2023 15:40:11 +0800 Subject: [PATCH 1/7] #8 complete #8 complete --- Example/Podfile.lock | 4 +- OSSSpeechKit/Classes/OSSSpeech.swift | 57 +++++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/Example/Podfile.lock b/Example/Podfile.lock index 5a657c7..560f642 100644 --- a/Example/Podfile.lock +++ b/Example/Podfile.lock @@ -11,6 +11,6 @@ EXTERNAL SOURCES: SPEC CHECKSUMS: OSSSpeechKit: ea0fd8151e7e338bc6ddc6bb749455fc3b33cfde -PODFILE CHECKSUM: 619c7767d93bbf8bc7a5c2d0a1d118e435561c49 +PODFILE CHECKSUM: 74abb7e61e1f9880a3040420923d3dad8dfbc311 -COCOAPODS: 1.11.3 +COCOAPODS: 1.12.0 diff --git a/OSSSpeechKit/Classes/OSSSpeech.swift b/OSSSpeechKit/Classes/OSSSpeech.swift index e20b6b1..12dbb32 100755 --- a/OSSSpeechKit/Classes/OSSSpeech.swift +++ b/OSSSpeechKit/Classes/OSSSpeech.swift @@ -150,6 +150,8 @@ public enum OSSSpeechRecognitionTaskType: Int { public protocol OSSSpeechDelegate: AnyObject { /// When the microphone has finished accepting audio, this delegate will be called with the final best text output. func didFinishListening(withText text: String) + ///When the microphone has finished accepting audio, this delegate will be called with the final best text output or voice file path. + func didFinishListening(withAudioFileURL url:URL,withText text:String) /// Handle returning authentication status to user - primary use is for non-authorized state. func authorizationToMicrophone(withAuthentication type: OSSSpeechKitAuthorizationStatus) /// If the speech recogniser and request fail to set up, this method will be called. @@ -165,6 +167,9 @@ public class OSSSpeech: NSObject { // MARK: - Private Properties + private var audioRecorder:AVAudioRecorder? + private var audioFileURL:URL! + /// An object that produces synthesized speech from text utterances and provides controls for monitoring or controlling ongoing speech. private var speechSynthesizer: AVSpeechSynthesizer! @@ -344,8 +349,9 @@ public class OSSSpeech: NSObject { #if !os(macOS) do { let category: AVAudioSession.Category = isRecording ? .playAndRecord : .playback - try audioSession.setCategory(category, options: .duckOthers) + try audioSession.setCategory(category, options: isRecording ? .defaultToSpeaker : .duckOthers) try audioSession.setActive(true, options: .notifyOthersOnDeactivation) + isRecording ? try audioSession.setActive(true) : try audioSession.setActive(true, options: .notifyOthersOnDeactivation) return true } catch { if isRecording { @@ -422,6 +428,9 @@ public class OSSSpeech: NSObject { } let node = engine.inputNode node.removeTap(onBus: 0) + + audioRecorder?.stop() + if node.inputFormat(forBus: 0).channelCount == 0 { node.reset() } @@ -535,6 +544,40 @@ public class OSSSpeech: NSObject { delegate?.didFailToCommenceSpeechRecording() delegate?.didFailToProcessRequest(withError: OSSSpeechKitErrorType.invalidSpeechRequest.error) } + + readyToRecord() + } + + func readyToRecord() + { + let dateFormatter = DateFormatter() + dateFormatter.dateFormat = "yyyy-MM-dd HH:mm:ss" + let dateString = dateFormatter.string(from: Date()) + + self.audioFileURL = getDocumentsDirectory().appendingPathComponent("\(dateString).m4a") + + let audioSettings = [AVFormatIDKey: Int(kAudioFormatMPEG4AAC), + AVSampleRateKey: 12000, + AVNumberOfChannelsKey: 1, + AVEncoderAudioQualityKey: AVAudioQuality.high.rawValue + ] + + do { + audioRecorder = try AVAudioRecorder(url: audioFileURL, settings: audioSettings) + audioRecorder?.delegate = self + audioRecorder?.record() + + } + catch + { + print(error.localizedDescription) + } + } + + func getDocumentsDirectory() -> URL { + let paths = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask) + let documentsDirectory = paths[0] + return documentsDirectory } } @@ -547,6 +590,7 @@ extension OSSSpeech: SFSpeechRecognitionTaskDelegate, SFSpeechRecognizerDelegate public func speechRecognitionTask(_ task: SFSpeechRecognitionTask, didFinishSuccessfully successfully: Bool) { recognitionTask = nil delegate?.didFinishListening(withText: spokenText) + delegate?.didFinishListening(withAudioFileURL: audioFileURL, withText: spokenText) setSession(isRecording: false) } @@ -568,6 +612,17 @@ extension OSSSpeech: SFSpeechRecognitionTaskDelegate, SFSpeechRecognizerDelegate /// Docs available by Google searching for SFSpeechRecognizerDelegate public func speechRecognizer(_ speechRecognizer: SFSpeechRecognizer, availabilityDidChange available: Bool) {} +} +//MARK: AVAudioRecorderDelegate +extension OSSSpeech:AVAudioRecorderDelegate +{ + public func audioRecorderDidFinishRecording(_ recorder: AVAudioRecorder, successfully flag: Bool) { + if flag + { + audioRecorder?.stop() + print("Audio file save") + } + } } #endif From 0eb9f3980e6dd01e2caf2ff832c139e60c5d4b1e Mon Sep 17 00:00:00 2001 From: jax <273277355@qq.com> Date: Tue, 7 Mar 2023 00:21:14 +0800 Subject: [PATCH 2/7] =?UTF-8?q?=E4=BF=AE=E5=BE=A9=E4=B8=80=E4=BA=9B?= =?UTF-8?q?=E6=98=86=E8=9F=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 修復一些昆蟲 --- .../OSSSpeechKit.xcodeproj/project.pbxproj | 8 +-- ...untryLanguageListTableViewController.swift | 3 ++ Example/Tests/OSSSpeechTests.swift | 4 ++ OSSSpeechKit/Classes/OSSSpeech.swift | 53 ++++++++++++------- 4 files changed, 44 insertions(+), 24 deletions(-) diff --git a/Example/OSSSpeechKit.xcodeproj/project.pbxproj b/Example/OSSSpeechKit.xcodeproj/project.pbxproj index 969bc43..2a96887 100644 --- a/Example/OSSSpeechKit.xcodeproj/project.pbxproj +++ b/Example/OSSSpeechKit.xcodeproj/project.pbxproj @@ -520,7 +520,7 @@ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; CODE_SIGN_IDENTITY = "iPhone Developer"; CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = ""; + DEVELOPMENT_TEAM = JL4FNX8668; GCC_OPTIMIZATION_LEVEL = 0; INFOPLIST_FILE = OSSSpeechKit/Info.plist; IPHONEOS_DEPLOYMENT_TARGET = 13.0; @@ -544,7 +544,7 @@ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; CODE_SIGN_IDENTITY = "iPhone Developer"; CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = ""; + DEVELOPMENT_TEAM = JL4FNX8668; GCC_OPTIMIZATION_LEVEL = 0; INFOPLIST_FILE = OSSSpeechKit/Info.plist; IPHONEOS_DEPLOYMENT_TARGET = 13.0; @@ -567,7 +567,7 @@ buildSettings = { CODE_SIGN_IDENTITY = "iPhone Developer"; CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = ""; + DEVELOPMENT_TEAM = JL4FNX8668; FRAMEWORK_SEARCH_PATHS = ( "$(SDKROOT)/Developer/Library/Frameworks", "$(inherited)", @@ -599,7 +599,7 @@ buildSettings = { CODE_SIGN_IDENTITY = "iPhone Developer"; CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = ""; + DEVELOPMENT_TEAM = JL4FNX8668; FRAMEWORK_SEARCH_PATHS = ( "$(SDKROOT)/Developer/Library/Frameworks", "$(inherited)", diff --git a/Example/OSSSpeechKit/CountryLanguageListTableViewController.swift b/Example/OSSSpeechKit/CountryLanguageListTableViewController.swift index a318c06..440eade 100644 --- a/Example/OSSSpeechKit/CountryLanguageListTableViewController.swift +++ b/Example/OSSSpeechKit/CountryLanguageListTableViewController.swift @@ -112,6 +112,9 @@ extension CountryLanguageListTableViewController { } extension CountryLanguageListTableViewController: OSSSpeechDelegate { + func didFinishListening(withAudioFileURL url: URL, withText text: String) { + print("Translation completed: \(text). And user voice file path: \(url.absoluteString)") + } func didCompleteTranslation(withText text: String) { print("Translation completed: \(text)") diff --git a/Example/Tests/OSSSpeechTests.swift b/Example/Tests/OSSSpeechTests.swift index 24c3e81..23cacb3 100644 --- a/Example/Tests/OSSSpeechTests.swift +++ b/Example/Tests/OSSSpeechTests.swift @@ -313,6 +313,10 @@ class OSSSpeechTests: XCTestCase { } extension OSSSpeechTests: OSSSpeechDelegate { + func didFinishListening(withAudioFileURL url: URL, withText text: String) { + print("Translation completed with text: \(text). And user voice file path: \(url.absoluteString)") + } + func didCompleteTranslation(withText text: String) { print("Translation completed with text: \(text)") } diff --git a/OSSSpeechKit/Classes/OSSSpeech.swift b/OSSSpeechKit/Classes/OSSSpeech.swift index 12dbb32..4dfa614 100755 --- a/OSSSpeechKit/Classes/OSSSpeech.swift +++ b/OSSSpeechKit/Classes/OSSSpeech.swift @@ -68,6 +68,10 @@ public enum OSSSpeechKitErrorType: Int { case invalidAudioEngine = -6 /// Voice recognition is unavailable. case recogniserUnavailble = -7 + /// Voice record is invalid + case invalidRecordVoice = -8 + /// Voice record file path is Invalid + case invalidVoiceFilePath = -9 /// The OSSSpeechKit error message string. /// @@ -88,6 +92,10 @@ public enum OSSSpeechKitErrorType: Int { return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_messageInvalidAudioEngine", defaultValue: "The audio engine is unavailable. Please try again soon.") case .recogniserUnavailble: return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_messageRecogniserUnavailable", defaultValue: "The Speech Recognition service is currently unavailable.") + case .invalidRecordVoice: + return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_messageInvalidRecordVoice", defaultValue: "The user voice recoeder service is not working.") + case .invalidVoiceFilePath: + return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_messageInvalidVoiceFolePath", defaultValue: "The user voice file path can not create.") } } @@ -97,7 +105,8 @@ public enum OSSSpeechKitErrorType: Int { public var errorRequestType: String { switch self { case .noMicrophoneAccess, - .invalidAudioEngine: + .invalidAudioEngine, + .invalidRecordVoice: return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_requestTypeNoMicAccess", defaultValue: "Recording") case .invalidUtterance: return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_requestTypeInvalidUtterance", defaultValue: "Speech or Recording") @@ -106,6 +115,8 @@ public enum OSSSpeechKitErrorType: Int { .invalidSpeechRequest, .recogniserUnavailble: return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_requestTypeInvalidSpeech", defaultValue: "Speech") + case .invalidVoiceFilePath: + return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_requestTypeInvalidFilePath", defaultValue: "File") } } @@ -150,8 +161,8 @@ public enum OSSSpeechRecognitionTaskType: Int { public protocol OSSSpeechDelegate: AnyObject { /// When the microphone has finished accepting audio, this delegate will be called with the final best text output. func didFinishListening(withText text: String) - ///When the microphone has finished accepting audio, this delegate will be called with the final best text output or voice file path. - func didFinishListening(withAudioFileURL url:URL,withText text:String) + ///When the microphone has finished accepting recording, this function will be called with the final best text output or voice file path. + func didFinishListening(withAudioFileURL url: URL,withText text: String) /// Handle returning authentication status to user - primary use is for non-authorized state. func authorizationToMicrophone(withAuthentication type: OSSSpeechKitAuthorizationStatus) /// If the speech recogniser and request fail to set up, this method will be called. @@ -162,13 +173,16 @@ public protocol OSSSpeechDelegate: AnyObject { func didFailToProcessRequest(withError error: Error?) } + /// Speech is the primary interface. To use, set the voice and then call `.speak(string: "your string")` public class OSSSpeech: NSObject { // MARK: - Private Properties - private var audioRecorder:AVAudioRecorder? - private var audioFileURL:URL! + /// A user voice recoder + private var audioRecorder: AVAudioRecorder? + /// When we record the user voice and success,so return audio URL options. + private var audioFileURL: URL? /// An object that produces synthesized speech from text utterances and provides controls for monitoring or controlling ongoing speech. private var speechSynthesizer: AVSpeechSynthesizer! @@ -548,13 +562,17 @@ public class OSSSpeech: NSObject { readyToRecord() } - func readyToRecord() + /// When we use the speech function then record the user voice + private func readyToRecord() { let dateFormatter = DateFormatter() dateFormatter.dateFormat = "yyyy-MM-dd HH:mm:ss" let dateString = dateFormatter.string(from: Date()) - self.audioFileURL = getDocumentsDirectory().appendingPathComponent("\(dateString).m4a") + guard audioFileURL == getDocumentsDirectory().appendingPathComponent("\(dateString).m4a") else { + delegate?.didFailToProcessRequest(withError: OSSSpeechKitErrorType.invalidVoiceFilePath.error) + return + } let audioSettings = [AVFormatIDKey: Int(kAudioFormatMPEG4AAC), AVSampleRateKey: 12000, @@ -563,17 +581,15 @@ public class OSSSpeech: NSObject { ] do { - audioRecorder = try AVAudioRecorder(url: audioFileURL, settings: audioSettings) + audioRecorder = try AVAudioRecorder(url: self.audioFileURL!, settings: audioSettings) audioRecorder?.delegate = self audioRecorder?.record() - - } - catch - { - print(error.localizedDescription) + } catch { + delegate?.didFailToProcessRequest(withError: OSSSpeechKitErrorType.invalidRecordVoice.error) } } + /// Get documents directory func getDocumentsDirectory() -> URL { let paths = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask) let documentsDirectory = paths[0] @@ -590,7 +606,7 @@ extension OSSSpeech: SFSpeechRecognitionTaskDelegate, SFSpeechRecognizerDelegate public func speechRecognitionTask(_ task: SFSpeechRecognitionTask, didFinishSuccessfully successfully: Bool) { recognitionTask = nil delegate?.didFinishListening(withText: spokenText) - delegate?.didFinishListening(withAudioFileURL: audioFileURL, withText: spokenText) + delegate?.didFinishListening(withAudioFileURL: audioFileURL!, withText: spokenText) setSession(isRecording: false) } @@ -614,14 +630,11 @@ extension OSSSpeech: SFSpeechRecognitionTaskDelegate, SFSpeechRecognizerDelegate public func speechRecognizer(_ speechRecognizer: SFSpeechRecognizer, availabilityDidChange available: Bool) {} } -//MARK: AVAudioRecorderDelegate -extension OSSSpeech:AVAudioRecorderDelegate -{ +// MARK: AVAudioRecorderDelegate +extension OSSSpeech: AVAudioRecorderDelegate { public func audioRecorderDidFinishRecording(_ recorder: AVAudioRecorder, successfully flag: Bool) { - if flag - { + if flag { audioRecorder?.stop() - print("Audio file save") } } } From 596deeca2fb24e44e8ea1a9a8320f7d33d7a7766 Mon Sep 17 00:00:00 2001 From: jax <273277355@qq.com> Date: Thu, 9 Mar 2023 12:11:07 +0800 Subject: [PATCH 3/7] Can delete voice file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1.主要添加删除用户声音的方法/主要添加刪除用戶聲音的方法/Add methods to delete user voice file(s)/Añadir métodos para eliminar el(los) archivo(s) de voz del usuario 2.添加了可以选择候录音的选项/添加了可以選擇錄音的選項/Added the option to wait for recording/Se ha añadido la opción de esperar la grabación 3.修复了一些昆虫/修復了一些昆蟲/Fix some bugs/Corregir algunos errores --- OSSSpeechKit/Classes/OSSSpeech.swift | 59 ++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 11 deletions(-) diff --git a/OSSSpeechKit/Classes/OSSSpeech.swift b/OSSSpeechKit/Classes/OSSSpeech.swift index 4dfa614..ae35e2a 100755 --- a/OSSSpeechKit/Classes/OSSSpeech.swift +++ b/OSSSpeechKit/Classes/OSSSpeech.swift @@ -72,6 +72,8 @@ public enum OSSSpeechKitErrorType: Int { case invalidRecordVoice = -8 /// Voice record file path is Invalid case invalidVoiceFilePath = -9 + /// Voice record file path can not delete + case invalidDeleteVoiceFilePath = -10 /// The OSSSpeechKit error message string. /// @@ -96,6 +98,8 @@ public enum OSSSpeechKitErrorType: Int { return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_messageInvalidRecordVoice", defaultValue: "The user voice recoeder service is not working.") case .invalidVoiceFilePath: return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_messageInvalidVoiceFolePath", defaultValue: "The user voice file path can not create.") + case .invalidDeleteVoiceFilePath: + return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_messageInvalidDeleteVoiceFilePath", defaultValue: "The user voice file path can not delete.") } } @@ -115,7 +119,7 @@ public enum OSSSpeechKitErrorType: Int { .invalidSpeechRequest, .recogniserUnavailble: return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_requestTypeInvalidSpeech", defaultValue: "Speech") - case .invalidVoiceFilePath: + case .invalidVoiceFilePath,.invalidDeleteVoiceFilePath: return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_requestTypeInvalidFilePath", defaultValue: "File") } } @@ -171,6 +175,8 @@ public protocol OSSSpeechDelegate: AnyObject { func didCompleteTranslation(withText text: String) /// Error handling function. func didFailToProcessRequest(withError error: Error?) + /// When delete some voice file,this delegate will be return success or not + func deleteVoiceFile(withFinish finish: Bool ,withError error: Error?) } @@ -182,7 +188,9 @@ public class OSSSpeech: NSObject { /// A user voice recoder private var audioRecorder: AVAudioRecorder? /// When we record the user voice and success,so return audio URL options. - private var audioFileURL: URL? + private var audioFileURL: URL! + /// User can save audio record or not defult true + public var saveRecord:Bool = true /// An object that produces synthesized speech from text utterances and provides controls for monitoring or controlling ongoing speech. private var speechSynthesizer: AVSpeechSynthesizer! @@ -559,20 +567,19 @@ public class OSSSpeech: NSObject { delegate?.didFailToProcessRequest(withError: OSSSpeechKitErrorType.invalidSpeechRequest.error) } - readyToRecord() + if self.saveRecord { + readyToRecord() + } } /// When we use the speech function then record the user voice private func readyToRecord() { let dateFormatter = DateFormatter() - dateFormatter.dateFormat = "yyyy-MM-dd HH:mm:ss" + dateFormatter.dateFormat = "yyyy-MM-dd-HH:mm:ss" let dateString = dateFormatter.string(from: Date()) - - guard audioFileURL == getDocumentsDirectory().appendingPathComponent("\(dateString).m4a") else { - delegate?.didFailToProcessRequest(withError: OSSSpeechKitErrorType.invalidVoiceFilePath.error) - return - } + + audioFileURL = getDocumentsDirectory().appendingPathComponent("\(dateString)-osKit.m4a") let audioSettings = [AVFormatIDKey: Int(kAudioFormatMPEG4AAC), AVSampleRateKey: 12000, @@ -590,11 +597,39 @@ public class OSSSpeech: NSObject { } /// Get documents directory - func getDocumentsDirectory() -> URL { + private func getDocumentsDirectory() -> URL { let paths = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask) let documentsDirectory = paths[0] return documentsDirectory } + + ///Delete one voice file(s) + public func deleteVoiceFolderItem(url:URL?) { + + let fileManager = FileManager.default + let folderURL = getDocumentsDirectory() + do { + let contents = try fileManager.contentsOfDirectory(at: folderURL, includingPropertiesForKeys: nil,options: .skipsHiddenFiles) + for fileURL in contents { + guard let pathUrl = url else { + if fileURL.absoluteString.contains("-osKit.m4a") { + try fileManager.removeItem(at: fileURL) + } + return + } + if fileURL.absoluteString == pathUrl.absoluteString { + try fileManager.removeItem(at: fileURL) + delegate?.deleteVoiceFile(withFinish: true, withError: nil) + } + } + guard let pathUrl = url else { + delegate?.deleteVoiceFile(withFinish: true, withError: nil) + return + } + } catch { + delegate?.deleteVoiceFile(withFinish: false, withError: OSSSpeechKitErrorType.invalidDeleteVoiceFilePath.error) + } + } } /// Extension to handle the SFSpeechRecognitionTaskDelegate and SFSpeechRecognizerDelegate methods. @@ -606,7 +641,9 @@ extension OSSSpeech: SFSpeechRecognitionTaskDelegate, SFSpeechRecognizerDelegate public func speechRecognitionTask(_ task: SFSpeechRecognitionTask, didFinishSuccessfully successfully: Bool) { recognitionTask = nil delegate?.didFinishListening(withText: spokenText) - delegate?.didFinishListening(withAudioFileURL: audioFileURL!, withText: spokenText) + if saveRecord{ + delegate?.didFinishListening(withAudioFileURL: audioFileURL!, withText: spokenText) + } setSession(isRecording: false) } From a51a3d063cc01866ed77cd09fb3a29e99eed99f5 Mon Sep 17 00:00:00 2001 From: jax <273277355@qq.com> Date: Sat, 11 Mar 2023 11:02:49 +0800 Subject: [PATCH 4/7] Transcription voice file path Transcription voice file path --- OSSSpeechKit/Classes/OSSSpeech.swift | 47 +++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/OSSSpeechKit/Classes/OSSSpeech.swift b/OSSSpeechKit/Classes/OSSSpeech.swift index ae35e2a..6b7d8b6 100755 --- a/OSSSpeechKit/Classes/OSSSpeech.swift +++ b/OSSSpeechKit/Classes/OSSSpeech.swift @@ -74,6 +74,8 @@ public enum OSSSpeechKitErrorType: Int { case invalidVoiceFilePath = -9 /// Voice record file path can not delete case invalidDeleteVoiceFilePath = -10 + /// Voice record file path can not transcription + case invalidTranscriptionFilePath = -11 /// The OSSSpeechKit error message string. /// @@ -100,6 +102,8 @@ public enum OSSSpeechKitErrorType: Int { return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_messageInvalidVoiceFolePath", defaultValue: "The user voice file path can not create.") case .invalidDeleteVoiceFilePath: return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_messageInvalidDeleteVoiceFilePath", defaultValue: "The user voice file path can not delete.") + case .invalidTranscriptionFilePath: + return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_messageInvalidTranscriptionFilePath", defaultValue: "Voice record file path can not transcription.") } } @@ -121,6 +125,8 @@ public enum OSSSpeechKitErrorType: Int { return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_requestTypeInvalidSpeech", defaultValue: "Speech") case .invalidVoiceFilePath,.invalidDeleteVoiceFilePath: return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_requestTypeInvalidFilePath", defaultValue: "File") + case .invalidTranscriptionFilePath: + return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_requestTypeInvalidTranscriptionFilePath", defaultValue: "Transcription") } } @@ -177,6 +183,8 @@ public protocol OSSSpeechDelegate: AnyObject { func didFailToProcessRequest(withError error: Error?) /// When delete some voice file,this delegate will be return success or not func deleteVoiceFile(withFinish finish: Bool ,withError error: Error?) + /// Get the content according to the path of the voice file + func voiceFilePathTranscription(withText text:String) } @@ -220,7 +228,7 @@ public class OSSSpeech: NSObject { /// The object used to enable translation of strings to synthsized voice. public var utterance: OSSUtterance? - #if !os(macOS) +#if !os(macOS) /// An AVAudioSession that ensure volume controls are correct in various scenarios private var session: AVAudioSession? @@ -236,7 +244,7 @@ public class OSSSpeech: NSObject { session = newValue } } - #endif +#endif /// This property handles permission authorization. /// This property is intentionally named vaguely to prevent accidental overriding. @@ -416,10 +424,10 @@ public class OSSSpeech: NSObject { private func requestMicPermission() { #if !os(macOS) audioSession.requestRecordPermission {[weak self] allowed in - guard let self = self else { return } + guard let self = self else { return } if !allowed { - self.debugLog(object: self, message: "Microphone permission was denied.") - self.delegate?.authorizationToMicrophone(withAuthentication: .denied) + self.debugLog(object: self, message: "Microphone permission was denied.") + self.delegate?.authorizationToMicrophone(withAuthentication: .denied) return } self.getMicroPhoneAuthorization() @@ -603,7 +611,7 @@ public class OSSSpeech: NSObject { return documentsDirectory } - ///Delete one voice file(s) + /// Delete one voice file(s) public func deleteVoiceFolderItem(url:URL?) { let fileManager = FileManager.default @@ -622,7 +630,7 @@ public class OSSSpeech: NSObject { delegate?.deleteVoiceFile(withFinish: true, withError: nil) } } - guard let pathUrl = url else { + guard url != nil else { delegate?.deleteVoiceFile(withFinish: true, withError: nil) return } @@ -630,6 +638,31 @@ public class OSSSpeech: NSObject { delegate?.deleteVoiceFile(withFinish: false, withError: OSSSpeechKitErrorType.invalidDeleteVoiceFilePath.error) } } + + /// Transcription voice file path + public func recognizeSpeech(filePath: URL,finalBlock:((_ text:String)->Void)? = nil) { + let identifier = voice?.voiceType.rawValue ?? OSSVoiceEnum.UnitedStatesEnglish.rawValue + speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: identifier)) + guard let audioFile = try? AVAudioFile(forReading: filePath) else { + return + } + let request = SFSpeechURLRecognitionRequest(url: audioFile.url) + speechRecognizer!.recognitionTask(with: request, resultHandler: { (result, error) in + if let result = result { + if result.isFinal { + let transcription = result.bestTranscription.formattedString + if finalBlock != nil { + finalBlock!(transcription) + } + else { + self.delegate?.voiceFilePathTranscription(withText: transcription) + } + } + } else if let error = error { + self.delegate?.didFailToProcessRequest(withError: OSSSpeechKitErrorType.invalidTranscriptionFilePath.error) + } + }) + } } /// Extension to handle the SFSpeechRecognitionTaskDelegate and SFSpeechRecognizerDelegate methods. From 8379a20fc8ff4f54f9a4d0dde835024630562a7a Mon Sep 17 00:00:00 2001 From: jax <273277355@qq.com> Date: Sat, 11 Mar 2023 11:20:52 +0800 Subject: [PATCH 5/7] bug fix --- OSSSpeechKit/Classes/OSSSpeech.swift | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/OSSSpeechKit/Classes/OSSSpeech.swift b/OSSSpeechKit/Classes/OSSSpeech.swift index 6b7d8b6..8cbb5c9 100755 --- a/OSSSpeechKit/Classes/OSSSpeech.swift +++ b/OSSSpeechKit/Classes/OSSSpeech.swift @@ -228,7 +228,7 @@ public class OSSSpeech: NSObject { /// The object used to enable translation of strings to synthsized voice. public var utterance: OSSUtterance? -#if !os(macOS) + #if !os(macOS) /// An AVAudioSession that ensure volume controls are correct in various scenarios private var session: AVAudioSession? @@ -244,7 +244,7 @@ public class OSSSpeech: NSObject { session = newValue } } -#endif + #endif /// This property handles permission authorization. /// This property is intentionally named vaguely to prevent accidental overriding. @@ -605,7 +605,7 @@ public class OSSSpeech: NSObject { } /// Get documents directory - private func getDocumentsDirectory() -> URL { + public func getDocumentsDirectory() -> URL { let paths = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask) let documentsDirectory = paths[0] return documentsDirectory @@ -658,7 +658,7 @@ public class OSSSpeech: NSObject { self.delegate?.voiceFilePathTranscription(withText: transcription) } } - } else if let error = error { + } else if error != nil { self.delegate?.didFailToProcessRequest(withError: OSSSpeechKitErrorType.invalidTranscriptionFilePath.error) } }) From 114bf79025ecbf2662c5bf87b414a2fd6cca2ca3 Mon Sep 17 00:00:00 2001 From: jax <273277355@qq.com> Date: Wed, 22 Mar 2023 02:26:55 +0800 Subject: [PATCH 6/7] Add display sound ripple parameter callback for more UI display --- ...untryLanguageListTableViewController.swift | 8 ++++ Example/Tests/OSSSpeechTests.swift | 8 ++++ OSSSpeechKit/Classes/OSSSpeech.swift | 38 ++++++++++++++++++- 3 files changed, 53 insertions(+), 1 deletion(-) diff --git a/Example/OSSSpeechKit/CountryLanguageListTableViewController.swift b/Example/OSSSpeechKit/CountryLanguageListTableViewController.swift index 440eade..fd171ea 100644 --- a/Example/OSSSpeechKit/CountryLanguageListTableViewController.swift +++ b/Example/OSSSpeechKit/CountryLanguageListTableViewController.swift @@ -112,6 +112,14 @@ extension CountryLanguageListTableViewController { } extension CountryLanguageListTableViewController: OSSSpeechDelegate { + func deleteVoiceFile(withFinish finish: Bool, withError error: Error?) { + + } + + func voiceFilePathTranscription(withText text: String) { + + } + func didFinishListening(withAudioFileURL url: URL, withText text: String) { print("Translation completed: \(text). And user voice file path: \(url.absoluteString)") } diff --git a/Example/Tests/OSSSpeechTests.swift b/Example/Tests/OSSSpeechTests.swift index 23cacb3..1a00e4b 100644 --- a/Example/Tests/OSSSpeechTests.swift +++ b/Example/Tests/OSSSpeechTests.swift @@ -313,6 +313,14 @@ class OSSSpeechTests: XCTestCase { } extension OSSSpeechTests: OSSSpeechDelegate { + func deleteVoiceFile(withFinish finish: Bool, withError error: Error?) { + + } + + func voiceFilePathTranscription(withText text: String) { + + } + func didFinishListening(withAudioFileURL url: URL, withText text: String) { print("Translation completed with text: \(text). And user voice file path: \(url.absoluteString)") } diff --git a/OSSSpeechKit/Classes/OSSSpeech.swift b/OSSSpeechKit/Classes/OSSSpeech.swift index 8cbb5c9..6da8461 100755 --- a/OSSSpeechKit/Classes/OSSSpeech.swift +++ b/OSSSpeechKit/Classes/OSSSpeech.swift @@ -199,7 +199,13 @@ public class OSSSpeech: NSObject { private var audioFileURL: URL! /// User can save audio record or not defult true public var saveRecord:Bool = true - + /// Sound wave values + private var soundSamples = [Float]() + /// Show sound wave timer + private var levelTimer:Timer? + /// Show sound wave value + public var onUpdate: (([Float]) -> Void)? + /// An object that produces synthesized speech from text utterances and provides controls for monitoring or controlling ongoing speech. private var speechSynthesizer: AVSpeechSynthesizer! @@ -460,6 +466,7 @@ public class OSSSpeech: NSObject { node.removeTap(onBus: 0) audioRecorder?.stop() + stopVisualizerTimer() if node.inputFormat(forBus: 0).channelCount == 0 { node.reset() @@ -597,13 +604,41 @@ public class OSSSpeech: NSObject { do { audioRecorder = try AVAudioRecorder(url: self.audioFileURL!, settings: audioSettings) + audioRecorder?.isMeteringEnabled = true audioRecorder?.delegate = self + audioRecorder?.prepareToRecord() audioRecorder?.record() + soundSamples.removeAll() + visualizerTimer() } catch { delegate?.didFailToProcessRequest(withError: OSSSpeechKitErrorType.invalidRecordVoice.error) } } + /// Get sound wave values + private func visualizerTimer() { + let interval:Double = 0.01 + audioRecorder?.record(forDuration: interval) + + levelTimer = Timer(timeInterval: interval, repeats: true, block: { [weak self] _ in + self?.audioRecorder?.updateMeters() + let decibels = self?.audioRecorder?.averagePower(forChannel: 0) ?? -160 + let normalizedValue = pow(10, decibels / 20) + self?.soundSamples.append(normalizedValue) + self?.onUpdate?(self?.soundSamples ?? []) + self?.audioRecorder?.record(forDuration: interval) + }) + + RunLoop.current.add(levelTimer!, forMode: .default) + } + + /// Stop get sound wave + private func stopVisualizerTimer() { + onUpdate?(soundSamples) + soundSamples.removeAll() + levelTimer?.invalidate() + } + /// Get documents directory public func getDocumentsDirectory() -> URL { let paths = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask) @@ -705,6 +740,7 @@ extension OSSSpeech: AVAudioRecorderDelegate { public func audioRecorderDidFinishRecording(_ recorder: AVAudioRecorder, successfully flag: Bool) { if flag { audioRecorder?.stop() + stopVisualizerTimer() } } } From feea042263459a9f7d298dfdff92f27dc7b1e155 Mon Sep 17 00:00:00 2001 From: jax <273277355@qq.com> Date: Sat, 1 Apr 2023 12:27:44 +0800 Subject: [PATCH 7/7] bug fix --- OSSSpeechKit/Classes/OSSSpeech.swift | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/OSSSpeechKit/Classes/OSSSpeech.swift b/OSSSpeechKit/Classes/OSSSpeech.swift index 6da8461..3a839da 100755 --- a/OSSSpeechKit/Classes/OSSSpeech.swift +++ b/OSSSpeechKit/Classes/OSSSpeech.swift @@ -571,9 +571,11 @@ public class OSSSpeech: NSObject { return } if let audioRequest = request { - if recogniser.supportsOnDeviceRecognition { - audioRequest.requiresOnDeviceRecognition = shouldUseOnDeviceRecognition - } + if #available(iOS 13, *) { + if recogniser.supportsOnDeviceRecognition { + audioRequest.requiresOnDeviceRecognition = shouldUseOnDeviceRecognition + } + } recogniser.delegate = self recogniser.defaultTaskHint = recognitionTaskType.taskType recognitionTask = recogniser.recognitionTask(with: audioRequest, delegate: self)