-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparseCMU.js
59 lines (54 loc) · 1.27 KB
/
parseCMU.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
// GLOBAL VARS
var fs = require("fs");
var cmudictFile = readCmudictFile('./cmudict.txt');
var bySyll = [];
var byWords = {};
//FUNCTIONS
function readCmudictFile(file){
return fs.readFileSync(file).toString();
}
function formatData(data){
var lines = data.toString().split("\n");
var lineSplit, word, pronunciation, syllcount = 0;
var syllArr = [], wordObj = {};
// Initialize empy syllArr of arrays.
for(var i = 0; i <= 7; i++) {
syllArr[i] = [];
}
lines.forEach(function(line){
// Count syllables.
lineSplit = line.split(" ");
word = cropWord(lineSplit[0]);
pronunciation = lineSplit[1];
syllcount = countSyllables(pronunciation);
// Add to lookup objects.
if(syllcount <= 7) {
syllArr[syllcount].push(word);}
wordObj[word] = syllcount;
}
);
bySyll = syllArr;
byWord = wordObj;
}
function cropWord(word) {
if(word[word.length - 1] === ")")
return word.slice(0, -3)
else
return word;
}
function countSyllables(pronunciation) {
var count = 0;
pronunciation.split(" ").forEach(function(syll) {
if(syll.match(/\d/))
count++;
});
return count;
}
// EXPORT
module.exports = function() {
formatData(cmudictFile);
return {
bySyll: bySyll,
byWord: byWords
};
}