Skip to content

Commit

Permalink
Merge pull request #7 from siro53/pysan3
Browse files Browse the repository at this point in the history
ahocorasick
  • Loading branch information
pysan3 authored Dec 3, 2019
2 parents c61255d + 948f102 commit 8bf78ef
Show file tree
Hide file tree
Showing 13 changed files with 136 additions and 42 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
grpwk
distance
result
*.sh
*.py
!analyze.py
Expand Down
13 changes: 3 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,14 +1,7 @@
PROG = grpwk
OBJS = test_ahocorasick.o ahocorasick.o ahotrie.o ahotext.o queue.o
CC = gcc
CFLAGS = -W -Wall -Wextra -Wconversion -Wshadow
LDFLAGS =

.SUFFIXES: .c

$(PROG): $(OBJS)
$(CC) $(LDFLAGS) -o $(PROG) $^
.c.o:
$(CC) $(CFLAGS) -c $<
clean:
rm $(OBJS) $(PROG)
FILES = $(shell gcc test_distance.c -o distance | find ./ \( -name "*.c" \) \( -not -name "*test*" \))
default:
$(CC) $(CFLAGS) -o $(PROG) $(FILES)
2 changes: 0 additions & 2 deletions ahotext.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#pragma once

#include <stdlib.h>
#include <string.h>

Expand Down
6 changes: 6 additions & 0 deletions ahotrie.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ void trie_connect(aho_trie * restrict t) {
while (connect_link(tmp, child) == FALSE) tmp = tmp->failure_link;
}
}

que_destroy(&que);
}

void trie_delete(aho_trie * restrict t) {
Expand All @@ -99,6 +101,8 @@ void trie_delete(aho_trie * restrict t) {

if (node->parent == NULL) continue;
}

que_destroy(&que);
}

int find_node(aho_node ** restrict node, const char text) {
Expand Down Expand Up @@ -137,4 +141,6 @@ void trie_print(aho_trie * restrict t) {

printf("%c, refs:%d, fail: %p, output:%p\n", node->data + 'a', node->ref_count, node->failure_link, node->output_link);
}

que_destroy(&que);
}
Binary file added distance
Binary file not shown.
11 changes: 11 additions & 0 deletions example/grpwk.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#include <stdio.h>
#include <stdlib.h>

#include "grpwk.h"

char *grpwk(const string_s t, const string_s s[], int len) {
char *str = (char *)malloc(sizeof(char) * 100);
sprintf(str, "I got %d data!", len);

return str;
}
5 changes: 5 additions & 0 deletions example/grpwk.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#pragma once

#include "../string_s.h"

char *grpwk(const string_s t, const string_s s[], int len);
45 changes: 45 additions & 0 deletions input_win.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "string_s.h"

// change here to include your files!!
#include "example/grpwk.h"

int main_prg(int, char **);

int main(int argc, char **argv)
{
__clock_t c_start, c_end;

c_start = clock();
main_prg(argc, argv);
c_end = clock();

printf("%f\n", (double)(c_end - c_start) / CLOCKS_PER_SEC);
return (0);
}

int main_prg(int argc, char **argv)
{
assert(argc == 3);
FILE *fp_in = fopen(argv[1], "r");
assert(fp_in != NULL);
FILE *fp_out = fopen(argv[2], "w");
assert(fp_out != NULL);

string_s t, s[50000];

// input t
fscanf(fp_in, "%s", t.str);
t.len = strlen(t.str);

// input s[]
int i = 0;
while (~fscanf(fp_in, "%s", s[i].str)) s[i++].len = strlen(s[i].str);

fprintf(fp_out, "%s\n", grpwk(t, s, i));

return 0;
}
27 changes: 16 additions & 11 deletions queue.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@

void que_init(queue *que)
{
que->data = (QUE_TYPE *)malloc(sizeof(QUE_TYPE) * MAX_SIZE);
que->front = 0;
que->back = 0;
que->size = 0;
}

void que_push(queue *que, QUE_TYPE value)
{
if (que->back >= MAX_SIZE)
if (que->size >= MAX_SIZE)
{
printf("キューの容量がいっぱいです\n");
return;
Expand All @@ -24,7 +25,7 @@ void que_push(queue *que, QUE_TYPE value)

QUE_TYPE que_pop(queue *que)
{
if (que->front >= que->back)
if (que->size == 0)
{
printf("キューの中身は空です\n");
return NULL;
Expand All @@ -35,13 +36,17 @@ QUE_TYPE que_pop(queue *que)
return top;
}

void printQue(queue *que)
{
int i;
printf("data : ");
for (i = que->front; i < que->back; i++)
{
printf("%p ", que->data[i]);
}
printf("\n");
void que_destroy(queue *que) {
free(que->data);
}

// void printQue(queue *que)
// {
// int i;
// printf("data : ");
// for (i = que->front; i < que->back; i++)
// {
// printf("%p ", que->data[i]);
// }
// printf("\n");
// }
5 changes: 3 additions & 2 deletions queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

typedef struct
{
QUE_TYPE data[MAX_SIZE];
QUE_TYPE *data;
int front;
int back;
int size;
Expand All @@ -19,6 +19,7 @@ typedef struct
void que_init(queue *);
void que_push(queue *, QUE_TYPE);
QUE_TYPE que_pop(queue *);
void printQue(queue *);
void que_destroy(queue *);
// void printQue(queue *);

#endif
6 changes: 6 additions & 0 deletions string_s.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#pragma once

typedef struct {
char str[120];
int len;
} string_s;
33 changes: 16 additions & 17 deletions test_ahocorasick.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,21 @@

int test_trie(void) {
char *s[] = {
// "ab",
// "abc",
// "aaaaabddbdaaacccaacbaabacbaadb",
"ab",
"abc",
"aaaaabddbdaaacccaacbaabacbaadb",
"aaacdbdbcbcdbdbadaacbaadbdbdaacaaaaaadacababdadddacaacbaaaabdacdadadbabbbddaaddaaaaa",
"abacbadaadbcaaabaaacbbaabadbababdbcadbd",
// "dbbbdaabaaabaabab",
// "daaadaaa",
// "dbaac",
// "ad",
// "bdadaabbaaadaabdd",
// "ddaabdd",
// "bdbabb",
// "abdb",
// "adbab",
// "aaabaaabcadba",
"dbbbdaabaaabaabab",
"daaadaaa",
"dbaac",
"ad",
"bdadaabbaaadaabdd",
"ddaabdd",
"bdbabb",
"abdb",
"adbab",
"aaabaaabcadba",
};
aho_text text[sizeof(s)/sizeof(s[0])];
for (int i=0; i<sizeof(s)/sizeof(s[0]); i++) {
Expand All @@ -32,7 +32,7 @@ int test_trie(void) {
for (int i=0; i<sizeof(text)/sizeof(text[0]); i++) if (!trie_add(t, &text[i])) printf("error (unexpected input [^a-d]\n");
trie_connect(t);

// trie_print(t);
trie_print(t);

return 0;
}
Expand Down Expand Up @@ -65,7 +65,6 @@ int test_ahocora(void) {
printf("total match: %d\n", aho_search(&aho, test, strlen(test)));

aho_destroy(&aho);
printf("end of func\n");

return 0;
}
Expand All @@ -78,8 +77,8 @@ int test_input(void) {
FILE *fp = fopen("data/dat0_in", "r");
fscanf(fp, "%s", s);
while (~fscanf(fp, "%s", s))
printf("%d\n", aho_add_match_text(&aho, s, strlen(s)));
printf("here\n");
if (strlen(s) >= 60)
aho_add_match_text(&aho, s, strlen(s));

fp = fopen("data/dat0_ref", "r");
fscanf(fp, "%s", s);
Expand Down
23 changes: 23 additions & 0 deletions test_distance.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main(int argc, char *argv[]) {
FILE *fp_in = fopen(argv[1], "r");
assert(fp_in != NULL);
FILE *fp_out = fopen(argv[2], "r");
assert(fp_out != NULL);

char in[500000], out[500000];
fscanf(fp_in, "%s", in);
fscanf(fp_out, "%s", out);

int counter = 0;
for (int i=0; i<strlen(in); i++) counter += (in[i] != out[i]);
counter += abs(strlen(in) - strlen(out));

printf("edit distance: %d\n", counter);

return 0;
}

0 comments on commit 8bf78ef

Please sign in to comment.