From a7f95277dd75a09accf8833d947bc1de08cc8a11 Mon Sep 17 00:00:00 2001 From: Gaby Clarke Date: Wed, 23 Mar 2016 21:04:58 -0400 Subject: [PATCH] toolbox complete --- evolve_text.py | 53 +++++++++++++++++++++++++++++++++++++++++++------- results.txt | 3 +++ 2 files changed, 49 insertions(+), 7 deletions(-) create mode 100644 results.txt diff --git a/evolve_text.py b/evolve_text.py index e0202d2..8b428e2 100644 --- a/evolve_text.py +++ b/evolve_text.py @@ -19,7 +19,6 @@ from deap import base from deap import tools - #----------------------------------------------------------------------------- # Global variables #----------------------------------------------------------------------------- @@ -31,6 +30,7 @@ # Control whether all Messages are printed as they are evaluated VERBOSE = True +cache = dict() #----------------------------------------------------------------------------- # Message object to use in evolutionary algorithm @@ -92,8 +92,39 @@ def get_text(self): # Genetic operators #----------------------------------------------------------------------------- -# TODO: Implement levenshtein_distance function (see Day 9 in-class exercises) -# HINT: Now would be a great time to implement memoization if you haven't +def levenshtein(a, b, cache): + """ Returns the levenshtein distance between a and b. + >>> cache = {} + >>> levenshtein('apple', '', cache) + 5 + >>> levenshtein('', 'software', cache) + 8 + >>> levenshtein('cat', 'cat', cache) + 0 + >>> levenshtein('beta', 'pedal', cache) + 3 + >>> levenshtein('battle', 'bet', cache) + 4 + """ + + if (a, b) in cache: + return cache[(a, b)] + if a == b: + return 0 + elif not a or not b: + return len(a) or len(b) + elif a[0] == b[0]: + option1 = levenshtein(a[1:], b[1:], cache) + else: + option1 = 1 + levenshtein(a[1:], b[1:], cache) # change 1st char to match + + option2 = 1 + levenshtein(a, b[1:], cache) # insert b[0] as first character of a + option3 = 1 + levenshtein(a[1:], b, cache) # remove first character of a + + minimum = min(option1, option2, option3) + cache[(a, b)] = minimum + return minimum + def evaluate_text(message, goal_text, verbose=VERBOSE): """ @@ -101,7 +132,7 @@ def evaluate_text(message, goal_text, verbose=VERBOSE): between the Message and the goal_text as a length 1 tuple. If verbose is True, print each Message as it is evaluated. """ - distance = levenshtein_distance(message.get_text(), goal_text) + distance = levenshtein(message.get_text(), goal_text, cache) if verbose: print "{msg:60}\t[Distance: {dst}]".format(msg=message, dst=distance) return (distance, ) # Length 1 tuple, required by DEAP @@ -120,9 +151,16 @@ def mutate_text(message, prob_ins=0.05, prob_del=0.05, prob_sub=0.05): (legal) character """ + index = random.choice(range(len(message) - 1)) + if random.random() < prob_ins: - # TODO: Implement insertion-type mutation - pass + message.insert(index, random.choice(VALID_CHARS)) + if random.random() < prob_del: + del message[index] + if random.random() < prob_sub: + del message[index] + message.insert(index, random.choice(VALID_CHARS)) + # TODO: Also implement deletion and substitution mutations # HINT: Message objects inherit from list, so they also inherit @@ -170,7 +208,8 @@ def evolve_string(text): # Get configured toolbox and create a population of random Messages toolbox = get_toolbox(text) - pop = toolbox.population(n=300) + # pop = toolbox.population(n=300) + pop = toolbox.population(n=250) # Collect statistics as the EA runs stats = tools.Statistics(lambda ind: ind.fitness.values) diff --git a/results.txt b/results.txt new file mode 100644 index 0000000..cb67a62 --- /dev/null +++ b/results.txt @@ -0,0 +1,3 @@ +I played around with the population size. Interestingly enough, the algorithm achieved the goal within approximately the same number of generations at pop=150 as it did at pop=300, failed to achieve the goal with pop=200, and achieved the goal much more quickly with pop=250. + +There didn't seem to be a whole lot of correlation between the initial population size and the number of generations required to achieve the goal, though if I ran the program enough times, I might see a correlation. \ No newline at end of file