|
1 | | -import logging |
2 | 1 | import sys |
3 | | -from pathlib import Path |
4 | | - |
5 | | -from clldutils.clilib import ArgumentParser, command, ParserError |
| 2 | +import logging |
| 3 | +import pathlib |
| 4 | +import argparse |
6 | 5 |
|
7 | 6 | from segments import Tokenizer, Profile |
8 | 7 |
|
9 | 8 |
|
10 | | -def _write(args, line): |
11 | | - print('%s' % line) |
12 | | - |
13 | | - |
14 | | -def _read(args): |
15 | | - string = args.args[0] if args.args else sys.stdin.read() |
16 | | - if not isinstance(string, str): |
17 | | - string = string.decode(args.encoding) |
18 | | - return string.strip() |
| 9 | +class ParserError(Exception): |
| 10 | + pass |
19 | 11 |
|
20 | 12 |
|
21 | | -@command() |
22 | 13 | def tokenize(args): |
23 | 14 | """ |
24 | 15 | Tokenize a string (passed as argument or read from stdin) |
25 | 16 |
|
26 | 17 | segments [--profile=PATH/TO/PROFILE] tokenize [STRING] |
27 | 18 | """ |
28 | | - if args.profile and not Path(args.profile).exists(): # pragma: no cover |
| 19 | + if args.profile and not pathlib.Path(args.profile).exists(): # pragma: no cover |
29 | 20 | raise ParserError('--profile must be a path for an existing file') |
30 | | - _write(args, Tokenizer(profile=args.profile)(_read(args), column=args.mapping)) |
| 21 | + print(Tokenizer(profile=args.profile)(_read(args), column=args.mapping)) |
31 | 22 |
|
32 | 23 |
|
33 | | -@command() |
34 | 24 | def profile(args): |
35 | 25 | """ |
36 | 26 | Create an orthography profile for a string (passed as argument or read from stdin) |
37 | 27 |
|
38 | 28 | segments profile [STRING] |
39 | 29 | """ |
40 | | - _write(args, Profile.from_text(_read(args))) |
| 30 | + print(Profile.from_text(_read(args))) |
41 | 31 |
|
42 | 32 |
|
43 | | -def main(): # pragma: no cover |
| 33 | +def _read(args): |
| 34 | + string = args.args[0] if args.args else sys.stdin.read() |
| 35 | + if not isinstance(string, str): |
| 36 | + string = string.decode(args.encoding) |
| 37 | + return string.strip() |
| 38 | + |
| 39 | + |
| 40 | +def main(parsed_args=None): |
| 41 | + commands = {'tokenize': tokenize, 'profile': profile} |
44 | 42 | logging.basicConfig() |
45 | | - parser = ArgumentParser('segments') |
| 43 | + parser = argparse.ArgumentParser( |
| 44 | + description="Main command line interface of the segments package.", |
| 45 | + epilog="Use '%(prog)s help <cmd>' to get help about individual commands.") |
| 46 | + parser.add_argument("--verbosity", help="increase output verbosity") |
| 47 | + parser.add_argument('command', help=' | '.join(commands)) |
| 48 | + parser.add_argument('args', nargs=argparse.REMAINDER) |
46 | 49 | parser.add_argument("--encoding", help='input encoding', default="utf8") |
47 | 50 | parser.add_argument("--profile", help='path to an orthography profile', default=None) |
48 | 51 | parser.add_argument( |
49 | 52 | "--mapping", |
50 | 53 | help='column name in ortho profile to map graphemes', |
51 | 54 | default=Profile.GRAPHEME_COL) |
52 | | - sys.exit(parser.main()) |
| 55 | + |
| 56 | + args = parsed_args or parser.parse_args() |
| 57 | + if args.command == 'help' and len(args.args): |
| 58 | + # As help text for individual commands we simply re-use the docstrings of the |
| 59 | + # callables registered for the command: |
| 60 | + print(commands[args.args[0]].__doc__.strip() |
| 61 | + if args.args[0] in commands else "Invalid command: '{}'".format(args.args[0])) |
| 62 | + else: |
| 63 | + if args.command not in commands: |
| 64 | + print('invalid command') |
| 65 | + parser.print_help() |
| 66 | + sys.exit(64) |
| 67 | + try: |
| 68 | + commands[args.command](args) |
| 69 | + except ParserError as e: |
| 70 | + print(e) |
| 71 | + print(commands[args.command].__doc__.strip()) |
| 72 | + sys.exit(64) |
| 73 | + except Exception as e: # pragma: no cover |
| 74 | + print(e) |
| 75 | + sys.exit(1) |
| 76 | + sys.exit(0) |
53 | 77 |
|
54 | 78 |
|
55 | 79 | if __name__ == '__main__': # pragma: no cover |
|
0 commit comments