Skip to content
This repository has been archived by the owner on May 15, 2024. It is now read-only.

Implement Trie Filter for special words #10

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 0 additions & 11 deletions .gitignore

This file was deleted.

94 changes: 29 additions & 65 deletions pom.xml
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,66 +1,30 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<scm.uri>scm:git:[email protected]:rkapsi/patricia-trie.git</scm.uri>
</properties>

<groupId>org.ardverk</groupId>
<artifactId>patricia-trie</artifactId>
<version>0.7-SNAPSHOT</version>
<packaging>jar</packaging>
<url>https://github.com/rkapsi/patricia-trie</url>

<distributionManagement>
<repository>
<id>ardverk-release</id>
<url>scp://mvn.ardverk.org/repository/release</url>
</repository>
<snapshotRepository>
<id>ardverk-snapshot</id>
<url>scp://mvn.ardverk.org/repository/snapshot</url>
</snapshotRepository>
</distributionManagement>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<scm>
<connection>${scm.uri}</connection>
<developerConnection>${scm.uri}</developerConnection>
<url>${project.url}</url>
</scm>

<build>
<extensions>
<extension>
<groupId>org.apache.maven.wagon</groupId>
<artifactId>wagon-ssh</artifactId>
<version>1.0</version>
</extension>
</extensions>

<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.6</source>
<target>1.6</target>
</configuration>
</plugin>

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-release-plugin</artifactId>
</plugin>
</plugins>
</build>

<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>[4.8.2,)</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>
<groupId>com.kim</groupId>
<artifactId>patricia-trie</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>

<name>patricia-trie</name>
<url>http://maven.apache.org</url>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>

<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.7</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.4</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>
Empty file modified src/main/java/org/ardverk/collection/AbstractKeyAnalyzer.java
100644 → 100755
Empty file.
Empty file modified src/main/java/org/ardverk/collection/AbstractPatriciaTrie.java
100644 → 100755
Empty file.
Empty file modified src/main/java/org/ardverk/collection/AbstractTrie.java
100644 → 100755
Empty file.
Empty file modified src/main/java/org/ardverk/collection/ByteArrayKeyAnalyzer.java
100644 → 100755
Empty file.
Empty file modified src/main/java/org/ardverk/collection/ByteKeyAnalyzer.java
100644 → 100755
Empty file.
Empty file modified src/main/java/org/ardverk/collection/CharArrayKeyAnalyzer.java
100644 → 100755
Empty file.
Empty file modified src/main/java/org/ardverk/collection/CharacterKeyAnalyzer.java
100644 → 100755
Empty file.
Empty file modified src/main/java/org/ardverk/collection/Cursor.java
100644 → 100755
Empty file.
Empty file modified src/main/java/org/ardverk/collection/DefaultKeyAnalyzer.java
100644 → 100755
Empty file.
Empty file modified src/main/java/org/ardverk/collection/IntegerKeyAnalyzer.java
100644 → 100755
Empty file.
Empty file modified src/main/java/org/ardverk/collection/Key.java
100644 → 100755
Empty file.
Empty file modified src/main/java/org/ardverk/collection/KeyAnalyzer.java
100644 → 100755
Empty file.
Empty file modified src/main/java/org/ardverk/collection/LongKeyAnalyzer.java
100644 → 100755
Empty file.
Empty file modified src/main/java/org/ardverk/collection/PatriciaTrie.java
100644 → 100755
Empty file.
Empty file modified src/main/java/org/ardverk/collection/ShortKeyAnalyzer.java
100644 → 100755
Empty file.
Empty file modified src/main/java/org/ardverk/collection/StringKeyAnalyzer.java
100644 → 100755
Empty file.
Empty file modified src/main/java/org/ardverk/collection/Trie.java
100644 → 100755
Empty file.
Empty file modified src/main/java/org/ardverk/collection/Tries.java
100644 → 100755
Empty file.
11 changes: 11 additions & 0 deletions src/main/java/org/ardverk/filter/TrieCounter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package org.ardverk.filter;

/**
* @author kim 2014年9月2日
*/
public interface TrieCounter {

public String source();

public int filtered();
}
9 changes: 9 additions & 0 deletions src/main/java/org/ardverk/filter/TrieFilter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package org.ardverk.filter;

/**
* @author kim 2014年9月2日
*/
public interface TrieFilter {

public TrieCounter filter(String source) throws Exception;
}
123 changes: 123 additions & 0 deletions src/main/java/org/ardverk/filter/impl/SimpleTrieFilter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
package org.ardverk.filter.impl;

import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;

import org.ardverk.collection.PatriciaTrie;
import org.ardverk.collection.StringKeyAnalyzer;
import org.ardverk.filter.TrieCounter;
import org.ardverk.filter.TrieFilter;

/**
* @author kim 2014年9月2日
*/
public class SimpleTrieFilter implements TrieFilter {

private final static int ONE_STEP = 1;

private final PatriciaTrie<String, String> trie = new PatriciaTrie<String, String>(StringKeyAnalyzer.CHAR);

public SimpleTrieFilter(Map<String, String> properies) {
this.trie.putAll(properies);
}

public MergedTrieCounter filter(String source) throws Exception {
StringBuffer buffer = new StringBuffer(source);
Judgment judgment = new Judgment();
for (int index = 0; index < buffer.length(); index++) {
this.step(buffer, judgment, index);
}
return new MergedTrieCounter(buffer.toString(), judgment.count());
}

private void step(StringBuffer buffer, Judgment judgment, int index) {
String current = String.valueOf(buffer.charAt(index));
if (this.trie.select(current).getKey().startsWith(current)) {
for (int step = SimpleTrieFilter.ONE_STEP; step <= (buffer.length() - index); step++) {
String fragement = buffer.substring(index, index + step);
judgment.reset(this.trie.select(fragement), fragement);
if (judgment.same()) {
buffer.replace(index, index + step, judgment.replace());
return;
} else if (!judgment.challenge()) {
return;
}
}
}
}

protected class MergedTrieCounter implements TrieCounter {

private final StringBuffer buffer = new StringBuffer();

private final AtomicInteger counter = new AtomicInteger();

protected MergedTrieCounter(String source, int counter) {
this.buffer.append(source);
this.counter.addAndGet(counter);
}

public MergedTrieCounter merge(MergedTrieCounter counter) {
this.append(counter.source());
this.incr(counter.filtered());
return this;
}

public MergedTrieCounter incr() {
this.counter.incrementAndGet();
return this;
}

public MergedTrieCounter incr(int count) {
this.counter.addAndGet(count);
return this;
}

public MergedTrieCounter append(String buffer) {
this.buffer.append(buffer);
return this;
}

@Override
public String source() {
return this.buffer.toString();
}

@Override
public int filtered() {
return this.counter.get();
}
}

private class Judgment {

private final AtomicInteger counter = new AtomicInteger();

private Map.Entry<String, String> selected;

private String fragment;

private Judgment reset(Map.Entry<String, String> selected, String fragment) {
this.selected = selected;
this.fragment = fragment;
this.counter.addAndGet(this.same() ? 1 : 0);
return this;
}

public int count() {
return this.counter.get();
}

public String replace() {
return this.selected.getValue();
}

public boolean same() {
return this.selected.getKey().equals(this.fragment);
}

public boolean challenge() {
return this.selected.getKey().startsWith(this.fragment);
}
}
}
Empty file.
Empty file modified src/test/java/org/ardverk/collection/SerializationTest.java
100644 → 100755
Empty file.
Loading