-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerator.py
91 lines (78 loc) · 3.2 KB
/
generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import argparse
from collections import defaultdict
from random import choice
def read_cfg(cfg):
"""
This function takes in a text version of a CFG and returns
a usable dictionary version of it.
:param cfg: name of a text file containing a context-free grammar
:return grammar: a dictionary representing a context-free grammar
"""
grammar = defaultdict(list)
with open(cfg, 'r') as f:
for line in f:
line = line.split(':')
lhs = line[0]
rhs = line[1].rstrip('\n').split(',')
grammar[lhs].append(rhs)
return grammar
def make_sentence_from_grammar(grammar_dict):
"""
This function randomly generates a sentence based on the provided CFG
:param grammar_dict: a dictionary representing a context-free grammar
:return output_sentence: a list of words in a sentence
"""
# start default sentence
output_sentence = ['S']
rules_left = True
while rules_left:
for index in range(len(output_sentence)):
# get possible choices from non-terminal
rhs_choices = grammar_dict[output_sentence[index]]
if not rhs_choices:
continue
# choose random rule
rhs = choice(rhs_choices)
# insert result of rule and delete
# non-terminal that led to rule
for token in reversed(rhs):
output_sentence.insert(index, token)
del output_sentence[index + len(rhs)]
# check if only words remain in output sentence
rules_left = False
for token in output_sentence:
# words are lowercase, non-terminals are upper
if token.isupper():
rules_left = True
break
return output_sentence
if __name__ == "__main__":
# get arguments from command line
PARSE = argparse.ArgumentParser(description="""
Recreation of Yngve's Random Sentence Generator
""", add_help=True)
PARSE.add_argument("-c", "--cfg", required=True,
help="Text file containing rules of CFG in format RHS:LHS",
dest="cfg")
PARSE.add_argument("-o", "--output", required=False,
help="Name for output file",
dest="output")
PARSE.add_argument("-n", "--number", required=False,
help="Number of sentences to generate",
dest="num")
ARGS = vars(PARSE.parse_args())
# give default values if not provided via command line
outfile = ARGS['output'] if ARGS['output'] else 'output.txt'
num = ARGS['num'] if ARGS['num'] else 100
# read the cfg into a dictionary
grammar_dictionary = read_cfg(ARGS['cfg'])
# write generated sentences to output file
with open(outfile, 'w') as o:
for i in range(num):
sentence = make_sentence_from_grammar(grammar_dictionary)
# capitalize first word
o.write(sentence[0].capitalize() + " ")
for word in sentence[1:-1]:
o.write(word + " ")
# put a period and no space after the word
o.write(sentence[-1] + ".\n")