@@ -7,22 +7,22 @@ import {
7
7
cheerioLoad
8
8
} from './common' ;
9
9
10
- import dictionary from './lists/stopwords.en' ;
11
-
12
- /**
13
- * https://www.ranks.nl/stopwords
14
- * http://xpo6.com/list-of-english-stop-words/
15
- */
16
- const stopWords = new Set ( dictionary ) ;
10
+ import dictionary from './lists/stopwords.fr' ;
11
+ import dictionaryEn from './lists/stopwords.en' ;
17
12
18
13
function normalize ( input ) {
19
14
const dto = String ( input ) || '' ;
20
15
return dto . replace ( / [ ^ \w \s ] | _ / g, '' ) . toLowerCase ( ) ;
21
16
}
22
17
23
18
async function extractWords ( recv , archivable ) {
24
- const loaded = cheerioLoad ( recv ) ;
25
- return loaded . then ( shard => {
19
+ /**
20
+ * https://www.ranks.nl/stopwords
21
+ * http://xpo6.com/list-of-english-stop-words/
22
+ */
23
+ const stopWordsSet = new Set ( [ ...dictionary , ...dictionaryEn ] ) ;
24
+
25
+ return cheerioLoad ( recv ) . then ( shard => {
26
26
const truncate = archivable . truncate ;
27
27
shard ( truncate ) . remove ( ) ;
28
28
const text = shard . text ( ) . split ( ' ' ) ;
@@ -31,7 +31,7 @@ async function extractWords(recv, archivable) {
31
31
for ( let i = 0 ; i < text . length ; i ++ ) {
32
32
const word = normalize ( text [ i ] ) ;
33
33
const withinCharRange = / ^ [ a - z A - Z À - Ö Ø - ö ø - ÿ ] + $ / . test ( word ) ;
34
- const isNotStopWord = stopWords . has ( word ) === false ;
34
+ const isNotStopWord = stopWordsSet . has ( word ) === false ;
35
35
const hasAtLeastTwo = word . length > 1 ;
36
36
if ( withinCharRange && isNotStopWord && hasAtLeastTwo ) {
37
37
if ( foundOnce . has ( word ) === false ) {
@@ -117,7 +117,7 @@ async function write(file, data = {}, boolOverwrite = true) {
117
117
export default async archivable => {
118
118
const slug = archivable . slug ;
119
119
const path = `archive/${ slug } ` ;
120
- const cacheFile = `${ path } /document .html` ;
120
+ const cacheFile = `${ path } /cache .html` ;
121
121
const file = `${ path } /analyze.json` ;
122
122
return Promise . resolve ( cacheFile )
123
123
. then ( cacheFile => analyze ( cacheFile , archivable ) )
0 commit comments