Skip to content

Commit

Permalink
Merge pull request #8 from siro53/middlesub
Browse files Browse the repository at this point in the history
Middlesub
  • Loading branch information
pysan3 authored Jan 7, 2020
2 parents 8bf78ef + f8f17d9 commit e47aa59
Show file tree
Hide file tree
Showing 33 changed files with 1,102 additions and 326 deletions.
21 changes: 13 additions & 8 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
# exec files
grpwk
distance
result
*.sh
*.py
!analyze.py
.*/
*.o
Makefile
*.exe
*.out

ahocorasick/
*.vscode
*.o
*.exe
# analyze and others
*.py

# shell scripts
*.sh
!compile.sh

# for vscode users
*.vscode
12 changes: 9 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
PROG = grpwk
SRCS = input_win.c ahocorasick.c ahotrie.c constructions.c linked_list.c queue.c middlesub/BM+.c middlesub/grpwk.c middlesub/itoi.c
CC = gcc
CFLAGS = -W -Wall -Wextra -Wconversion -Wshadow
LDFLAGS =
OBJS = $(SRCS:.c=.o)

FILES = $(shell gcc test_distance.c -o distance | find ./ \( -name "*.c" \) \( -not -name "*test*" \))
default:
$(CC) $(CFLAGS) -o $(PROG) $(FILES)
.c.o:
$(CC) $(CFLAGS) -c $< -o $@
$(PROG): $(OBJS)
$(CC) $(LDFLAGS) -o $(PROG) $^
clean:
rm $(OBJS) $(PROG)
104 changes: 36 additions & 68 deletions ahocorasick.c
Original file line number Diff line number Diff line change
@@ -1,106 +1,74 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>

#include "ahocorasick.h"

void aho_init(ahocorasick * restrict aho) {
// ahocorasick型の変数の初期化
void aho_init(ahocorasick * aho, string_s *s) {
memset(aho, 0, sizeof(ahocorasick));
aho->s = s;
}

void aho_destroy(ahocorasick * restrict aho) {
aho_clear_match_text(aho);
void aho_destroy(ahocorasick * aho) {
aho_clear_trie(aho);
}

int aho_add_match_text(ahocorasick * restrict aho, const char *data, int len) {
if (aho->text_id == INT_MAX) return -1;

// printf("%d ", len);
aho_text *text = text_init(aho->text_id++, data, len);
if (text == NULL || text->data == NULL) return -1;

if (aho->head == NULL) {
aho->head = text;
aho->tail = text;
aho->text_count++;
return text->id;
}

aho->tail->next = text;
text->prev = aho->tail;
aho->tail = text;
aho->text_count++;
return text->id;
/** キー(string_s *s_i)を木に追加
* return 挿入成功:TRUE、容量を超過:FALSE
*/
int aho_add_match_text(ahocorasick * aho, string_s *text) {
trie_add(&aho->trie, text, text->str);
return TRUE;
}

int aho_del_match_text(ahocorasick * restrict aho, const int id) {
for (aho_text *iter = aho->head; iter != NULL; iter = iter->next) {
if (iter->id == id) {
if (iter == aho->head) {
aho->head = iter->next;
free(iter->data);
} else if (iter == aho->tail) {
aho->tail = iter->prev;
free(iter->data);
} else {
iter->prev->next = iter->next;
iter->next->prev = iter->prev;
free(iter->data);
}
free(iter);
aho->text_count--;
return TRUE;
}
}
return FALSE;
/** 曖昧検索用文字列挿入用(木に挿入する文字列と探索成功時の返す文字列が違う場合に使用)
* 木に挿入する文字列をメモリに保存せずに済む
* data:曖昧の文字列、original:虫食い前のデータ
*/
int aho_add_similar_text(ahocorasick * aho, char * data, string_s * original) {
trie_add(&aho->trie, original, data);
return TRUE;
}

void aho_clear_match_text(ahocorasick * restrict aho) {
for (int i=0; i<aho->text_id; i++) aho_del_match_text(aho, i);

aho->text_id = 0;
}

void aho_create_trie(ahocorasick * restrict aho) {
// トライ木を生成
void aho_create_trie(ahocorasick * aho) {
trie_init(&aho->trie);
}

for (aho_text *iter = aho->head; iter != NULL; iter = iter->next) if (!trie_add(&aho->trie, iter)) printf("error (unexpected input [^a-d])\n");
// トライ木からアホコラを作る
void aho_connect_trie(ahocorasick * aho) {
trie_connect(&aho->trie);
}

void aho_clear_trie(ahocorasick * restrict aho) {
void aho_clear_trie(ahocorasick * aho) {
trie_destroy(&aho->trie);
}

int aho_search(ahocorasick * restrict aho, const char *data, int len) {
/** 検索関数(data:t'、len:len(t'))
* return マッチしたキーの数
*/
int aho_search(ahocorasick * aho, char *data, int len) {
int counter = 0;
aho_node *current = &aho->trie.root;

for (int i=0; i<len; i++) {
aho_match_t match;
aho_text *result = trie_find(&current, data[i]);
linked_list *result = trie_find(&current, data[i]);
if (result == NULL) continue;

match.id = result->id;
match.len = result->len;
match.pos = i - result->len + 1;

counter++;
if (aho->callback_match) aho->callback_match(aho->callback_arg, &match);
// printf("substitute to %s from ", result->data);
aho->callback_match(aho, result, i);
}

return counter;
}

inline void aho_register_match_callback(ahocorasick * restrict aho, void (*callback_match)(void *arg, aho_match_t *), void *arg) {
aho->callback_arg = arg;
// 探索が成功した際に実行される関数を設定する関数
inline void aho_register_match_callback(ahocorasick * aho, void (*callback_match)(ahocorasick * aho, linked_list* l, int pos)) {
aho->callback_match = callback_match;
}

void aho_print_match_text(ahocorasick * restrict aho) {
for (aho_text *iter = aho->head; iter != NULL; iter = iter->next) {
printf("id: %d, text:%s, len:%d\n", iter->id, iter->data, iter->len);
}
}
void aho_register_option_lists(ahocorasick * aho, linked_list *t_opt, linked_list *s_count) {
aho->t_opt = t_opt;
aho->s_count = s_count;
}
40 changes: 18 additions & 22 deletions ahocorasick.h
Original file line number Diff line number Diff line change
@@ -1,35 +1,31 @@
#pragma once

#include "ahotrie.h"
#include "ahotext.h"
#include "string_info.h"
#include "linked_list.h"

typedef struct {
int id, pos, len;
} aho_match_t;

typedef struct {
int text_id;
aho_text *head, *tail;
int text_count;
#define AHO_SIZE 45000

typedef struct _ahocorasick {
aho_trie trie;
string_s *s;

void (*callback_match)(void *arg, aho_match_t *m);
void *callback_arg;
void (*callback_match)(struct _ahocorasick * aho, linked_list* l, int pos);
linked_list *t_opt;
linked_list *s_count;
} ahocorasick;

void aho_init(ahocorasick * restrict aho);
void aho_destroy(ahocorasick * restrict aho);

int aho_add_match_text(ahocorasick * restrict aho, const char *data, int len);
int aho_del_match_text(ahocorasick * restrict aho, const int id);
void aho_clear_match_text(ahocorasick * restrict aho);
void aho_init(ahocorasick * aho, string_s *s);
void aho_destroy(ahocorasick * aho);

void aho_create_trie(ahocorasick * restrict aho);
void aho_clear_trie(ahocorasick * restrict aho);
int aho_add_match_text(ahocorasick * aho, string_s *text);
int aho_add_similar_text(ahocorasick * aho, char * data, string_s * original);

int aho_search(ahocorasick * restrict aho, const char *text, int len);
void aho_create_trie(ahocorasick * aho);
void aho_connect_trie(ahocorasick * aho);
void aho_clear_trie(ahocorasick * aho);

void aho_register_match_callback(ahocorasick * restrict aho, void (*callback_match)(void* arg, aho_match_t* m), void *arg);
int aho_search(ahocorasick * aho, char *text, int len);

void aho_print_match_text(ahocorasick * restrict aho);
void aho_register_match_callback(ahocorasick * aho, void (*callback_match)(ahocorasick * aho, linked_list* l, int pos));
void aho_register_option_lists(ahocorasick * aho, linked_list *t_opt, linked_list *s_count);
15 changes: 0 additions & 15 deletions ahotext.c

This file was deleted.

10 changes: 0 additions & 10 deletions ahotext.h

This file was deleted.

Loading

0 comments on commit e47aa59

Please sign in to comment.