Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion harper-comments/tests/language_support.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ create_test!(merged_lines.ts, 1);
create_test!(javadoc_clean_simple.java, 0);
create_test!(javadoc_complex.java, 5);
create_test!(issue_132.rs, 1);
create_test!(laravel_app.php, 2);
create_test!(laravel_app.php, 3);
create_test!(ignore_shebang_1.sh, 0);
create_test!(ignore_shebang_2.sh, 0);
create_test!(ignore_shebang_3.sh, 0);
Expand All @@ -65,6 +65,8 @@ create_test!(basic.clj, 12);

// Checks that some comments are masked out
create_test!(ignore_comments.rs, 1);
// Both spell_check and split_words linters flag this now
create_test!(ignore_comments.c, 2);
create_test!(ignore_comments.c, 1);
create_test!(ignore_comments.sol, 1);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public static void main(String[] args) {
}

/**
* This doc has a link in it: {@link this sould b ignor} but not tis
* This doc has a link in it: {@link this sould b ignor} but not thsi
*
* @param name this is anoher test.
*/
Expand Down
6 changes: 3 additions & 3 deletions harper-comments/tests/language_support_sources/jsdoc.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
/** This is a doc comment.
* Since there are no keywords it _sould_ be checked. */
* Since there are no keywords it _shuld_ be checked. */
function test(){}

/** This is also a doc comment.
* @class this sould be unchecked. */
class Clazz { }

/** Here is another example: {@link this sould also b unchecked}. But this _sould_ be.*/
/** Here is another example: {@link this sould also b unchecked}. But this _shuold_ be.*/

/** However, tis should be checked, while {@link tis should not} */
/** However, thsi should be checked, while {@link tis should not} */

/**
* The following examples should be ignored by Harper.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ int test() {}
*/
int arbitrary() {}

/// Let's aadd a cuple spelling errors for good measure.
/// Let's putin a cuple spelling errors for good measure.
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ function test() {}
*/
function arbitrary() {}

// Let's aadd a cuple spelling errors for good measure.
// Let's putin a cuple spelling errors for good measure.

2 changes: 2 additions & 0 deletions harper-core/src/linting/lint_group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ use super::sought_after::SoughtAfter;
use super::spaces::Spaces;
use super::spell_check::SpellCheck;
use super::spelled_numbers::SpelledNumbers;
use super::split_words::SplitWords;
use super::that_than::ThatThan;
use super::that_which::ThatWhich;
use super::the_how_why::TheHowWhy;
Expand Down Expand Up @@ -524,6 +525,7 @@ impl LintGroup {
insert_expr_rule!(SoughtAfter, true);
insert_struct_rule!(Spaces, true);
insert_struct_rule!(SpelledNumbers, false);
insert_struct_rule!(SplitWords, true);
insert_expr_rule!(ThatThan, true);
insert_expr_rule!(ThatWhich, true);
insert_expr_rule!(TheHowWhy, true);
Expand Down
2 changes: 2 additions & 0 deletions harper-core/src/linting/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ mod sought_after;
mod spaces;
mod spell_check;
mod spelled_numbers;
mod split_words;
mod suggestion;
mod take_serious;
mod that_than;
Expand Down Expand Up @@ -268,6 +269,7 @@ pub use sought_after::SoughtAfter;
pub use spaces::Spaces;
pub use spell_check::SpellCheck;
pub use spelled_numbers::SpelledNumbers;
pub use split_words::SplitWords;
pub use suggestion::Suggestion;
pub use take_serious::TakeSerious;
pub use that_than::ThatThan;
Expand Down
128 changes: 128 additions & 0 deletions harper-core/src/linting/split_words.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
use std::sync::Arc;

use crate::{CharString, Dictionary, Document, FstDictionary};

Check failure on line 3 in harper-core/src/linting/split_words.rs

View workflow job for this annotation

GitHub Actions / just test-obsidian

unresolved imports `crate::Dictionary`, `crate::FstDictionary`

Check failure on line 3 in harper-core/src/linting/split_words.rs

View workflow job for this annotation

GitHub Actions / just build-obsidian

unresolved imports `crate::Dictionary`, `crate::FstDictionary`

use super::{Lint, LintKind, Linter, Suggestion};

pub struct SplitWords {
dict: Arc<FstDictionary>,
}

impl SplitWords {
pub fn new() -> Self {
Self {
dict: FstDictionary::curated(),
}
}
}

impl Default for SplitWords {
fn default() -> Self {
Self::new()
}
}

impl Linter for SplitWords {
fn lint(&mut self, document: &Document) -> Vec<Lint> {
let mut lints = Vec::new();

let (mut word1, mut word2) = (CharString::new(), CharString::new());

for w in document.tokens() {
if !w.kind.is_word() {
continue;
}

if w.span.len() < 2 {
continue;
}

let w_chars = document.get_span_content(&w.span);

if self.dict.contains_word(w_chars) {
continue;
}

let mut found = false;

for i in 1..w_chars.len() {
let midpoint = w_chars.len() / 2;
let midpoint = if i & 1 == 0 {
midpoint + i / 2
} else {
midpoint - i / 2
};

let first_half = &w_chars[..midpoint];
let second_half = &w_chars[midpoint..];

word1.clear();
word1.extend_from_slice(first_half);
word2.clear();
word2.extend_from_slice(second_half);

if self.dict.contains_exact_word(&word1) && self.dict.contains_exact_word(&word2) {
let mut open = word1.clone();
open.push(' ');
open.extend_from_slice(second_half);

lints.push(Lint {
span: w.span,
lint_kind: LintKind::WordChoice,
suggestions: vec![Suggestion::ReplaceWith(open.to_vec())],
message: "It seems this is actually two words joined together.".to_owned(),
priority: 63,
});
found = true;
}

// The following logic won't be useful unless and until hyphenated words are added to the dictionary

let mut hyphenated = word1.clone();
hyphenated.push('-');
hyphenated.extend_from_slice(second_half);

if self.dict.contains_exact_word(&hyphenated) {
lints.push(Lint {
span: w.span,
lint_kind: LintKind::WordChoice,
suggestions: vec![Suggestion::ReplaceWith(hyphenated.to_vec())],
message: "It seems this is actually two words joined together.".to_owned(),
priority: 63,
});
found = true;
}

if found {
break;
}
}
}
lints
}

fn description(&self) -> &str {
"Accidentally forgetting a space between words is common. This rule looks for valid words that are joined together without whitespace."
}
}

#[cfg(test)]
mod tests {
use crate::linting::tests::{assert_lint_count, assert_suggestion_result};

use super::SplitWords;

#[test]
fn heretofore() {
assert_lint_count(
"onetwo threefour fivesix seveneight nineten.",
SplitWords::default(),
5,
);
}

#[test]
fn foobar() {
assert_suggestion_result("moreso", SplitWords::default(), "more so");
}
}
2 changes: 1 addition & 1 deletion harper-core/tests/test_sources/chinese_lorem_ipsum.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
The following text was generated using [a Chinese lorem ipsum generator](https://pinkylam.me/generator/chinese-lorem-ipsum/).
The following text was generated using [a Chinese lorrm ipsum generator](https://pinkylam.me/generator/chinese-lorem-ipsum/).

食棵支每躲種。奶象打星爪子二細喜才記行在發像原斤!頁固點子衣點豆看身蝴看苗急午公何足,筆娘經色蝶行元香也要。麻了綠尼固世,色北書目登功;因告黑。

Expand Down
2 changes: 1 addition & 1 deletion harper-core/tests/test_sources/pr_504.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ These say "This is in Greek/Georgian/Thai" in those languages:
ეს ქართულად.
นี่มันภาษาไทย

This is English with misstakes.
This is English with erors.
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,13 @@
#titleblock(
title: "A fluid dynamic model for glaier flow",
authors: ("Grant Lemons", "John Doe", "Jane Doe"),
abstract: lorem(80),
abstract: lorrm(80),
doc,
)
]

= Introduction
#lorem(300)

= Related ork
= Related wrk
#lorem(200)
Loading