Skip to content

Commit

Permalink
Merge pull request #1 from MSeal/master
Browse files Browse the repository at this point in the history
OS Independence
  • Loading branch information
twrodriguez committed Jun 16, 2014
2 parents afac20e + af75fac commit ead13ed
Show file tree
Hide file tree
Showing 14 changed files with 299 additions and 151 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,9 @@ docs/_build/

# Cython-generated
hunspell.c*

# Downloaded content
external
libs/gcc
libs/unix
libs/tmp
5 changes: 0 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@ cython to link between the C++ and Python code, with some additional features. T
library will cache any corrections, you can use persistent caching by adding the `use_disk_cache`
argument to a Hunspell constructor, otherwise it uses in-memory caching.

NOTE: This repository only works on Unix environments until pthreads can be replicated with an
mthread implementaiton.

## Dependencies
cacheman -- for persistent caching

Expand All @@ -33,8 +30,6 @@ All unit tests for the repo.
* Object Oriented (with a few exceptions)

## TODO
* Add mthreads alongside pthreads for bulk operations
* Fix blocking issues for Windows usage (see above)
* Remove cacheman dependency

## Author
Expand Down
120 changes: 96 additions & 24 deletions find_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@
import platform
import re
import commands
import sys
import shutil
from subprocess import check_call
from tar_download import download_and_extract

if __name__ == '__main__':
download_and_extract('http://downloads.sourceforge.net/hunspell/hunspell-1.3.3.tar.gz', 'external')

def get_architecture():
return 'x64' if sys.maxsize > 2**32 else 'x86'

def form_possible_names(lib, exts):
ret = []
Expand All @@ -27,9 +37,10 @@ def do_search(paths, names=[], test_fn=None):
for filepath in globbed:
if test_fn:
if test_fn(filepath):
return filepath
return filepath, pn
elif os.path.exists(filepath):
return filepath
return filepath, pn
return None, None

def is_library(filepath, acceptable_exts):
# TODO - This is broken for ".dll.a"
Expand All @@ -39,13 +50,17 @@ def is_header(filepath):
return os.path.isfile(filepath)

def include_dirs():
# TODO - Windows?
dirs = [
os.path.abspath(os.curdir),
"/usr/local/include",
"/opt/include",
"/usr/include",
os.path.abspath(os.path.join(os.curdir, 'hunspell')),
# Download path for windows if missing
os.path.abspath(os.path.join(os.curdir, 'external', 'hunspell-1.3.3', 'src')),
]
if platform.system() != 'Windows':
dirs.extend([
'/usr/local/include',
'/opt/include',
'/usr/include'
])
return [path for path in dirs if os.path.isdir(path)]

def library_dirs():
Expand All @@ -57,11 +72,14 @@ def library_dirs():
os.path.join(os.environ.get('SystemRoot'), 'system'),
os.path.join(os.environ.get('SystemRoot'), 'system32'),
os.environ.get('SystemRoot'),
# Built binaries home
os.path.join(os.path.dirname(__file__), 'libs', 'msvc')
])
dirs.extend(list(set(os.environ.get('PATH').split(os.path.pathsep))))
dirs = [os.path.abspath(path) for path in dirs]
else:
dirs.extend([
os.path.join(os.path.dirname(__file__), 'libs', 'unix'),
'/usr/local/lib64',
'/usr/local/lib',
'/usr/local/libdata',
Expand Down Expand Up @@ -90,25 +108,73 @@ def get_library_path(lib):
if platform.system() == 'Windows':
acceptable_exts = [
'',
'.dll',
'.dll.a'
'.lib'
]
elif platform.system() == 'Darwin':
acceptable_exts.append('.dylib')

names = form_possible_names(lib, acceptable_exts)

return do_search(paths, names, lambda filepath: is_library(filepath, acceptable_exts))
found_lib, found_path = do_search(paths, names, lambda filepath: is_library(filepath, acceptable_exts))
if found_lib and platform.system() == 'Windows':
found_lib = os.path.splitext(found_lib)[0]
return found_lib, found_path

def get_library_linker_name(lib):
lib_path = get_library_path('hunspell')
if lib_path:
return re.sub(r'^lib|.dylib$|.so$|.dll$|.dll.a$|.a$', '', lib_path.split(os.path.sep)[-1])
found_lib, found_path = get_library_path(lib)
if not found_lib:
# Try x86 or x64
found_lib, found_path = get_library_path(lib + get_architecture())

if found_lib:
found_lib = re.sub(r'.dylib$|.so$|.dll$|.dll.a$|.a$', '', found_lib.split(os.path.sep)[-1])
if platform.system() != 'Windows':
found_lib = re.sub(r'^lib|', '', found_lib)

return found_lib, found_path

def package_found(package, include_dirs):
for idir in include_dirs:
package_path = os.path.join(idir, package)
if os.path.exists(package_path) and os.access(package_path, os.R_OK):
return True
return False

def build_package(package, directory):
tmp_lib_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'libs', 'tmp'))
if not os.path.exists(tmp_lib_path):
os.makedirs(tmp_lib_path)

olddir = os.getcwd()
try:
os.chdir(directory)
check_call(['./configure', '--prefix='+tmp_lib_path])
check_call('make')
check_call(['make', 'install'])
finally:
os.chdir(olddir)

lib_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'libs', 'unix'))
if not os.path.exists(lib_path):
os.makedirs(lib_path)

if package == 'hunspell':
shutil.copyfile(
os.path.join(tmp_lib_path, 'lib', 'libhunspell-1.3.so.0.0.0'),
os.path.join(lib_path, 'libhunspell.so'))
shutil.rmtree(tmp_lib_path)

def append_links(pkg, kw):
linker_name, linker_path = get_library_linker_name(pkg)
if linker_name:
kw['libraries'].append(linker_name)
if linker_path:
kw['library_dirs'].append(linker_path)
return linker_name

def pkgconfig(*packages, **kw):
try:
flag_map = {'-I': 'include_dirs', '-L': 'library_dirs', '-l': 'libraries'}
status, response = commands.getstatusoutput("pkg-config --libs --cflags %s" % ' '.join(packages))
status, response = commands.getstatusoutput("pkg-config --libs --cflags {}".format(' '.join(packages)))
if status != 0:
raise Exception(response)
for token in response.split():
Expand All @@ -122,14 +188,20 @@ def pkgconfig(*packages, **kw):
kw['extra_link_args'] = list(set(kw['extra_link_args']))
except:
kw['include_dirs'] = include_dirs()
kw['library_dirs'] = library_dirs()
libraries = [get_library_linker_name(pkg) for pkg in packages]
try:
while True:
libraries.remove(None)
except ValueError:
pass

kw['libraries'] = libraries
kw['library_dirs'] = []
kw['libraries'] = []

if 'hunspell' in packages and not package_found('hunspell', kw['include_dirs']):
# Prepare for hunspell if it's missing
download_and_extract('http://downloads.sourceforge.net/hunspell/hunspell-1.3.3.tar.gz', 'external')

for pkg in packages:
if not append_links(pkg, kw):
if pkg == 'hunspell' and platform.system() != 'Windows':
build_package(pkg, os.path.join('external', 'hunspell-1.3.3'))
if not append_links(pkg, kw):
print "Couldn't find lib dependency after building: {}".format(pkg)
else:
print "Couldn't find lib dependency: {}".format(pkg)

return kw
28 changes: 14 additions & 14 deletions hunspell/hunspell.pxd
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
cdef extern from "hunspell/hunspell.hxx":
cdef cppclass Hunspell:
Hunspell(const char *affpath, const char *dpath, const char *key = NULL)
Hunspell(const char *affpath, const char *dpath, const char *key = NULL) nogil

# load extra dictionaries (only dic files)
int add_dic(const char * dpath, const char * key = NULL)
int add_dic(const char * dpath, const char * key = NULL) nogil

# spell(word) - spellcheck word
# output: 0 = bad word, not 0 = good word
Expand All @@ -14,7 +14,7 @@ cdef extern from "hunspell/hunspell.hxx":
# SPELL_FORBIDDEN = an explicit forbidden word
# root: root (stem), when input is a word with affix(es)

bint spell(const char * word, int * info = NULL, char ** root = NULL)
bint spell(const char * word, int * info = NULL, char ** root = NULL) nogil

# suggest(suggestions, word) - search suggestions
# input: pointer to an array of strings pointer and the (bad) word
Expand All @@ -23,35 +23,35 @@ cdef extern from "hunspell/hunspell.hxx":
# a newly allocated array of strings (*slts will be NULL when number
# of suggestion equals 0.)

int suggest(char*** slst, const char * word)
int suggest(char*** slst, const char * word) nogil

# deallocate suggestion lists

void free_list(char *** slst, int n)
void free_list(char *** slst, int n) nogil

char * get_dic_encoding()
char * get_dic_encoding() nogil

# morphological functions

# analyze(result, word) - morphological analysis of the word

int analyze(char*** slst, const char * word)
int analyze(char*** slst, const char * word) nogil

# stem(result, word) - stemmer function

int stem(char*** slst, const char * word)
int stem(char*** slst, const char * word) nogil

# stem(result, analysis, n) - get stems from a morph. analysis
# example:
# char ** result, result2;
# int n1 = analyze(&result, "words");
# int n2 = stem(&result2, result, n1);

int stem(char*** slst, char ** morph, int n)
int stem(char*** slst, char ** morph, int n) nogil

# generate(result, word, word2) - morphological generation by example(s)

int generate(char*** slst, const char * word, const char * word2)
int generate(char*** slst, const char * word, const char * word2) nogil

# generate(result, word, desc, n) - generation by morph. description(s)
# example:
Expand All @@ -60,22 +60,22 @@ cdef extern from "hunspell/hunspell.hxx":
# int n = generate(&result, "word", &affix, 1);
# for (int i = 0; i < n; i++) printf("%s\n", result[i]);

int generate(char*** slst, const char * word, char ** desc, int n)
int generate(char*** slst, const char * word, char ** desc, int n) nogil

#
# functions for run-time modification of the dictionary
#

# add word to the run-time dictionary

int add(const char * word)
int add(const char * word) nogil

# add word to the run-time dictionary with affix flags of
# the example (a dictionary word): Hunspell will recognize
# affixed forms of the new word, too.

int add_with_affix(const char * word, const char * example)
int add_with_affix(const char * word, const char * example) nogil

# remove word from the run-time dictionary

int remove(const char * word)
int remove(const char * word) nogil
28 changes: 13 additions & 15 deletions hunspell/hunspell.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ from libc.stdio cimport *
from cython.operator cimport dereference as deref

# Use full path for cimport ONLY!
from hunspell.pthread cimport *
from hunspell.thread cimport *

#//////////////////////////////////////////////////////////////////////////////
# General Utilities
Expand Down Expand Up @@ -80,7 +80,7 @@ cdef struct ThreadWorkerArgs:
# Thread Worker Functions
#//////////////////////////////////////////////////////////////////////////////

cdef void *hunspell_suggest_worker(void *argument):
cdef void *hunspell_suggest_worker(void *argument) nogil:
cdef ThreadWorkerArgs args
cdef int i
args = deref(<ThreadWorkerArgs *>argument)
Expand All @@ -90,7 +90,7 @@ cdef void *hunspell_suggest_worker(void *argument):

return NULL

cdef void *hunspell_stem_worker(void *argument):
cdef void *hunspell_stem_worker(void *argument) nogil:
cdef ThreadWorkerArgs args
cdef int i
args = deref(<ThreadWorkerArgs *>argument)
Expand Down Expand Up @@ -249,13 +249,11 @@ cdef class HunspellWrap(object):
# C realm thread dispatcher
#
cdef int _c_bulk_action(self, basestring action, char **word_array, char ***output_array, int n_words, int *output_counts) except +:
cdef pthread_t *threads
cdef ThreadWorkerArgs *thread_args
# Allocate all memory per thread
cdef thread_t **threads = <thread_t **>calloc(self.n_cpus, sizeof(thread_t *))
cdef ThreadWorkerArgs *thread_args = <ThreadWorkerArgs *>calloc(self.n_cpus, sizeof(ThreadWorkerArgs))
cdef int rc, i, stride

# Allocate all memory per thread
thread_args = <ThreadWorkerArgs *>calloc(self.n_cpus, sizeof(ThreadWorkerArgs))
threads = <pthread_t *>calloc(self.n_cpus, sizeof(pthread_t))
if thread_args is NULL or threads is NULL:
raise MemoryError()

Expand Down Expand Up @@ -288,26 +286,26 @@ cdef class HunspellWrap(object):

# Create thread
if action == "stem":
rc = pthread_create(&threads[i], NULL, hunspell_stem_worker, <void *> &thread_args[i])
threads[i] = thread_create(&hunspell_stem_worker, <void *> &thread_args[i])
else: # suggest
rc = pthread_create(&threads[i], NULL, hunspell_suggest_worker, <void *> &thread_args[i])
if rc:
raise OSError(rc, "Could not create pthread")
threads[i] = thread_create(&hunspell_suggest_worker, <void *> &thread_args[i])
if threads[i] is NULL:
raise OSError("Could not create thread")

# wait for each thread to complete
for i from 0 <= i < self.n_cpus:
# block until thread i completes
rc = pthread_join(threads[i], NULL)
rc = thread_join(threads[i])
if rc:
raise OSError(rc, "Could not join pthread")
raise OSError(rc, "Could not join thread")

# Free Hunspell Dict
del thread_args[i].hspell
return 1
finally:
# Free top level stuff
free(thread_args)
free(threads)
dealloc_threads(threads, self.n_cpus)

# Parse the return of a bulk action
cdef void _parse_bulk_results(self, dict ret_dict, list unknown_words, int *output_counts, char ***output_array) except +:
Expand Down
Loading

0 comments on commit ead13ed

Please sign in to comment.