voice_analysis/voice_analysis.py at main · Sookpeech/voice_analysis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# -*- coding: utf-8 -*-

from calendar import c
import pause_detection as pd
import transcribe as ts
import tone_analysis as tone
import chars_analysis as ca
import print_to_user as pr
import wave

import time

def getDurationSec(path):
    audio = wave.open(path)
    frames = audio.getnframes()
    rate = audio.getframerate()
    duration = frames/float(rate)
    return duration

# start time check
start_time = time.time()

# measure pitch from wav file
wav_file_title = "practice"
wav_file_path = ".\\audio_files\\" # wav file path
wav_file_duration = getDurationSec(wav_file_path+wav_file_title+".wav")

gender = "W"

if gender == "W":
    shimmer, jitter = tone.measurePitch(155, 334, "Hertz", wav_file_title, wav_file_path)
else:
    shimmer, jitter = tone.measurePitch(85, 196, "Hertz", wav_file_title, wav_file_path)


# split wav file using pause_detection
chunk_count = pd.splitByPause(wav_file_path, wav_file_title) + 1

# upload all splited wav files to s3
saved_file_count = ts.uploadTos3(wav_file_title, wav_file_path, chunk_count)

# start transcribe
transcripts = ts.return_transcripts_async(saved_file_count, wav_file_title)

# preprocessing transcripts
result = ca.adjustSpacing(transcripts)

# check speech speed & closing remarks
words_count = 0 # count num of characters
closing_remark_count = 0 # count num of sentences with appropriate closing remarks
for i in range(len(result)):
    words_count += ca.countNumOfWords(result[i].checked)
    closing_remark_count += ca.checkClosingRemarks(result[i].checked)

# end time check
end_time = time.time()
print()
print(f">>> 소요시간: {end_time-start_time}")

# print notice for user
print("[1] 말하기 속도")
pr.printWPM(words_count, wav_file_duration)
print("[2] 맺음말")
pr.printClosingRemarks(closing_remark_count, chunk_count)
print("[3] 목소리 강조")
pr.printTone(shimmer*100, jitter*100, gender)

# delete s3 files and transcribe job
print(">>> deleting transcribe jobs and s3 objects")
ts.deleteTranscribeJob(wav_file_title, saved_file_count)
ts.deleteS3WavFile(wav_file_title)
print(">>> complete!")