Skip to content
This repository was archived by the owner on Sep 21, 2021. It is now read-only.

Commit 31aa398

Browse files
committed
only count email addresses as one word
1 parent bbbe6d6 commit 31aa398

File tree

3 files changed

+29
-2
lines changed

3 files changed

+29
-2
lines changed

Diff for: README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ JavaScript port of [TextStatistics.php](https://github.com/DaveChild/Text-Statis
66
I've done what I think is a reasonably faithful port. Documentation incoming!
77
I removed a lot of the original comments during the port, but seeing as the API remained largely the same, I'll add them in shortly.
88

9-
Same goes for a test suite - I'll get something working in node in a bit. :)
9+
The beginning of a test suite in [Mocha](https://mochajs.org/) is here, covering cleaning the text and some cases of word and sentence counting.
1010

1111
## Installation
1212

Diff for: index.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
.replace(/[,:;()\/&+]|\-\-/g, " ") // Replace commas, hyphens etc (count them as spaces)
2020
.replace(/[\.!?]/g, ".") // Unify terminators
2121
.replace(/^\s+/, "") // Strip leading whitespace
22+
.replace(/[\.]?(\w+)[\.]?(\w+)@(\w+)[\.](\w+)[\.]?/g, "$1$2@$3$4") // strip periods in email addresses (so they remain counted as one word)
2223
.replace(/[ ]*(\n|\r\n|\r)[ ]*/g, ".") // Replace new lines with periods
2324
.replace(/([\.])[\.]+/g, ".") // Check for duplicated terminators
2425
.replace(/[ ]*([\.])/g, ". ") // Pad sentence terminators
@@ -28,7 +29,6 @@
2829
if(text.slice(-1) != '.') {
2930
text += "."; // Add final terminator, just in case it's missing.
3031
}
31-
3232
return text;
3333
}
3434

Diff for: test/testCleanText.js

+27
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,33 @@ describe('TextStatistics', function() {
102102
});
103103
});
104104

105+
context('stripping periods from email addresses', function() {
106+
it('should replace a single period', function() {
107+
var ts = TextStatistics('[email protected]');
108+
assert.equal(ts.text, 'textstatistics@examplecom.');
109+
});
110+
111+
it('should replace a single period in the first part', function() {
112+
var ts = TextStatistics('[email protected]');
113+
assert.equal(ts.text, 'textstatistics@examplecom.');
114+
});
115+
116+
it('should replace two periods in the first part', function() {
117+
var ts = TextStatistics('[email protected]');
118+
assert.equal(ts.text, 'textstatistics@examplecom.');
119+
});
120+
121+
it('should replace periods with a subdomain', function() {
122+
var ts = TextStatistics('[email protected]');
123+
assert.equal(ts.text, 'textstatistics@testexamplecom.');
124+
});
125+
126+
it('should replace periods with a subdomain and before the @', function() {
127+
var ts = TextStatistics('[email protected]');
128+
assert.equal(ts.text, 'textstatistics@testexamplecom.');
129+
});
130+
});
131+
105132
context('replacing non-terminator punctuation', function() {
106133
it('should replace commas with spaces', function() {
107134
var ts = TextStatistics('Hello, hi, friend.');

0 commit comments

Comments
 (0)