Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Middlesub #8

Merged
merged 40 commits into from
Jan 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
7048d9f
compile files
pysan3 Dec 4, 2019
094baf6
test env created
pysan3 Dec 4, 2019
69a8f5d
file name changed: string_s.h -> string_info.h
pysan3 Dec 4, 2019
344aed1
test
pysan3 Dec 4, 2019
1259ed3
revert Makefile
pysan3 Dec 4, 2019
c090a01
test
pysan3 Dec 5, 2019
08b61b1
analyze
pysan3 Dec 5, 2019
ac88b3d
algo ver1 O(2^n)
pysan3 Dec 6, 2019
b7b4bef
changed aho_text to string_info
pysan3 Dec 6, 2019
2858d33
deleted "const"
pysan3 Dec 6, 2019
58b14cb
bug fixed
pysan3 Dec 6, 2019
5a6bf90
compiler changed
pysan3 Dec 9, 2019
a84ce0a
test_ahocorasick.c fixed
pysan3 Dec 10, 2019
aff8ed3
コメント、説明を追加
pysan3 Dec 10, 2019
d6f2994
small changes
pysan3 Dec 10, 2019
75a0500
small bug fixed
pysan3 Dec 10, 2019
e392cbf
count options
pysan3 Dec 15, 2019
c3addc2
testing...
pysan3 Dec 15, 2019
c55d807
small changes
pysan3 Dec 20, 2019
ce8acdf
aho_search to linked list
pysan3 Dec 20, 2019
9da074f
ahocora finished?
pysan3 Dec 20, 2019
6f56ae5
bug not fixed
pysan3 Dec 21, 2019
7fba9bc
no bugs but slow
pysan3 Dec 21, 2019
8511d4d
function names changed
pysan3 Dec 21, 2019
f1e80c6
ready to connect with BM
pysan3 Dec 21, 2019
eeebf16
準備完了
udemegane Dec 21, 2019
d967041
Merge
pysan3 Dec 21, 2019
20eb382
bm test
pysan3 Dec 21, 2019
9157170
バグ修正
udemegane Dec 22, 2019
bb98d4c
Merge branch 'pysan_tests' of https://github.com/siro53/grpwk into py…
udemegane Dec 22, 2019
fd6aa65
some bug fixed
pysan3 Dec 22, 2019
6c98129
aho added
pysan3 Dec 22, 2019
7c6b918
バグ治った?
udemegane Dec 22, 2019
1aa4156
final ver
pysan3 Dec 22, 2019
ffdbb60
中間計測用(checked by itoi)
pysan3 Dec 22, 2019
025064d
Makefile
pysan3 Dec 22, 2019
68bd06e
提出
udemegane Dec 22, 2019
3998589
Merge branch 'pysan_tests' of https://github.com/siro53/grpwk into py…
udemegane Dec 22, 2019
c78a1e3
Update 4.tar
udemegane Dec 22, 2019
f8f17d9
middle submission
pysan3 Dec 27, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
# exec files
grpwk
distance
result
*.sh
*.py
!analyze.py
.*/
*.o
Makefile
*.exe
*.out

ahocorasick/
*.vscode
*.o
*.exe
# analyze and others
*.py

# shell scripts
*.sh
!compile.sh

# for vscode users
*.vscode
12 changes: 9 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
PROG = grpwk
SRCS = input_win.c ahocorasick.c ahotrie.c constructions.c linked_list.c queue.c middlesub/BM+.c middlesub/grpwk.c middlesub/itoi.c
CC = gcc
CFLAGS = -W -Wall -Wextra -Wconversion -Wshadow
LDFLAGS =
OBJS = $(SRCS:.c=.o)

FILES = $(shell gcc test_distance.c -o distance | find ./ \( -name "*.c" \) \( -not -name "*test*" \))
default:
$(CC) $(CFLAGS) -o $(PROG) $(FILES)
.c.o:
$(CC) $(CFLAGS) -c $< -o $@
$(PROG): $(OBJS)
$(CC) $(LDFLAGS) -o $(PROG) $^
clean:
rm $(OBJS) $(PROG)
104 changes: 36 additions & 68 deletions ahocorasick.c
Original file line number Diff line number Diff line change
@@ -1,106 +1,74 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>

#include "ahocorasick.h"

void aho_init(ahocorasick * restrict aho) {
// ahocorasick型の変数の初期化
void aho_init(ahocorasick * aho, string_s *s) {
memset(aho, 0, sizeof(ahocorasick));
aho->s = s;
}

void aho_destroy(ahocorasick * restrict aho) {
aho_clear_match_text(aho);
void aho_destroy(ahocorasick * aho) {
aho_clear_trie(aho);
}

int aho_add_match_text(ahocorasick * restrict aho, const char *data, int len) {
if (aho->text_id == INT_MAX) return -1;

// printf("%d ", len);
aho_text *text = text_init(aho->text_id++, data, len);
if (text == NULL || text->data == NULL) return -1;

if (aho->head == NULL) {
aho->head = text;
aho->tail = text;
aho->text_count++;
return text->id;
}

aho->tail->next = text;
text->prev = aho->tail;
aho->tail = text;
aho->text_count++;
return text->id;
/** キー(string_s *s_i)を木に追加
* return 挿入成功:TRUE、容量を超過:FALSE
*/
int aho_add_match_text(ahocorasick * aho, string_s *text) {
trie_add(&aho->trie, text, text->str);
return TRUE;
}

int aho_del_match_text(ahocorasick * restrict aho, const int id) {
for (aho_text *iter = aho->head; iter != NULL; iter = iter->next) {
if (iter->id == id) {
if (iter == aho->head) {
aho->head = iter->next;
free(iter->data);
} else if (iter == aho->tail) {
aho->tail = iter->prev;
free(iter->data);
} else {
iter->prev->next = iter->next;
iter->next->prev = iter->prev;
free(iter->data);
}
free(iter);
aho->text_count--;
return TRUE;
}
}
return FALSE;
/** 曖昧検索用文字列挿入用(木に挿入する文字列と探索成功時の返す文字列が違う場合に使用)
* 木に挿入する文字列をメモリに保存せずに済む
* data:曖昧の文字列、original:虫食い前のデータ
*/
int aho_add_similar_text(ahocorasick * aho, char * data, string_s * original) {
trie_add(&aho->trie, original, data);
return TRUE;
}

void aho_clear_match_text(ahocorasick * restrict aho) {
for (int i=0; i<aho->text_id; i++) aho_del_match_text(aho, i);

aho->text_id = 0;
}

void aho_create_trie(ahocorasick * restrict aho) {
// トライ木を生成
void aho_create_trie(ahocorasick * aho) {
trie_init(&aho->trie);
}

for (aho_text *iter = aho->head; iter != NULL; iter = iter->next) if (!trie_add(&aho->trie, iter)) printf("error (unexpected input [^a-d])\n");
// トライ木からアホコラを作る
void aho_connect_trie(ahocorasick * aho) {
trie_connect(&aho->trie);
}

void aho_clear_trie(ahocorasick * restrict aho) {
void aho_clear_trie(ahocorasick * aho) {
trie_destroy(&aho->trie);
}

int aho_search(ahocorasick * restrict aho, const char *data, int len) {
/** 検索関数(data:t'、len:len(t'))
* return マッチしたキーの数
*/
int aho_search(ahocorasick * aho, char *data, int len) {
int counter = 0;
aho_node *current = &aho->trie.root;

for (int i=0; i<len; i++) {
aho_match_t match;
aho_text *result = trie_find(&current, data[i]);
linked_list *result = trie_find(&current, data[i]);
if (result == NULL) continue;

match.id = result->id;
match.len = result->len;
match.pos = i - result->len + 1;

counter++;
if (aho->callback_match) aho->callback_match(aho->callback_arg, &match);
// printf("substitute to %s from ", result->data);
aho->callback_match(aho, result, i);
}

return counter;
}

inline void aho_register_match_callback(ahocorasick * restrict aho, void (*callback_match)(void *arg, aho_match_t *), void *arg) {
aho->callback_arg = arg;
// 探索が成功した際に実行される関数を設定する関数
inline void aho_register_match_callback(ahocorasick * aho, void (*callback_match)(ahocorasick * aho, linked_list* l, int pos)) {
aho->callback_match = callback_match;
}

void aho_print_match_text(ahocorasick * restrict aho) {
for (aho_text *iter = aho->head; iter != NULL; iter = iter->next) {
printf("id: %d, text:%s, len:%d\n", iter->id, iter->data, iter->len);
}
}
void aho_register_option_lists(ahocorasick * aho, linked_list *t_opt, linked_list *s_count) {
aho->t_opt = t_opt;
aho->s_count = s_count;
}
40 changes: 18 additions & 22 deletions ahocorasick.h
Original file line number Diff line number Diff line change
@@ -1,35 +1,31 @@
#pragma once

#include "ahotrie.h"
#include "ahotext.h"
#include "string_info.h"
#include "linked_list.h"

typedef struct {
int id, pos, len;
} aho_match_t;

typedef struct {
int text_id;
aho_text *head, *tail;
int text_count;
#define AHO_SIZE 45000

typedef struct _ahocorasick {
aho_trie trie;
string_s *s;

void (*callback_match)(void *arg, aho_match_t *m);
void *callback_arg;
void (*callback_match)(struct _ahocorasick * aho, linked_list* l, int pos);
linked_list *t_opt;
linked_list *s_count;
} ahocorasick;

void aho_init(ahocorasick * restrict aho);
void aho_destroy(ahocorasick * restrict aho);

int aho_add_match_text(ahocorasick * restrict aho, const char *data, int len);
int aho_del_match_text(ahocorasick * restrict aho, const int id);
void aho_clear_match_text(ahocorasick * restrict aho);
void aho_init(ahocorasick * aho, string_s *s);
void aho_destroy(ahocorasick * aho);

void aho_create_trie(ahocorasick * restrict aho);
void aho_clear_trie(ahocorasick * restrict aho);
int aho_add_match_text(ahocorasick * aho, string_s *text);
int aho_add_similar_text(ahocorasick * aho, char * data, string_s * original);

int aho_search(ahocorasick * restrict aho, const char *text, int len);
void aho_create_trie(ahocorasick * aho);
void aho_connect_trie(ahocorasick * aho);
void aho_clear_trie(ahocorasick * aho);

void aho_register_match_callback(ahocorasick * restrict aho, void (*callback_match)(void* arg, aho_match_t* m), void *arg);
int aho_search(ahocorasick * aho, char *text, int len);

void aho_print_match_text(ahocorasick * restrict aho);
void aho_register_match_callback(ahocorasick * aho, void (*callback_match)(ahocorasick * aho, linked_list* l, int pos));
void aho_register_option_lists(ahocorasick * aho, linked_list *t_opt, linked_list *s_count);
15 changes: 0 additions & 15 deletions ahotext.c

This file was deleted.

10 changes: 0 additions & 10 deletions ahotext.h

This file was deleted.

Loading