Skip to content

Toolbox submission #11

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 46 additions & 7 deletions evolve_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from deap import base
from deap import tools


#-----------------------------------------------------------------------------
# Global variables
#-----------------------------------------------------------------------------
Expand All @@ -31,6 +30,7 @@
# Control whether all Messages are printed as they are evaluated
VERBOSE = True

cache = dict()

#-----------------------------------------------------------------------------
# Message object to use in evolutionary algorithm
Expand Down Expand Up @@ -92,16 +92,47 @@ def get_text(self):
# Genetic operators
#-----------------------------------------------------------------------------

# TODO: Implement levenshtein_distance function (see Day 9 in-class exercises)
# HINT: Now would be a great time to implement memoization if you haven't
def levenshtein(a, b, cache):
""" Returns the levenshtein distance between a and b.
>>> cache = {}
>>> levenshtein('apple', '', cache)
5
>>> levenshtein('', 'software', cache)
8
>>> levenshtein('cat', 'cat', cache)
0
>>> levenshtein('beta', 'pedal', cache)
3
>>> levenshtein('battle', 'bet', cache)
4
"""

if (a, b) in cache:
return cache[(a, b)]
if a == b:
return 0
elif not a or not b:
return len(a) or len(b)
elif a[0] == b[0]:
option1 = levenshtein(a[1:], b[1:], cache)
else:
option1 = 1 + levenshtein(a[1:], b[1:], cache) # change 1st char to match

option2 = 1 + levenshtein(a, b[1:], cache) # insert b[0] as first character of a
option3 = 1 + levenshtein(a[1:], b, cache) # remove first character of a

minimum = min(option1, option2, option3)
cache[(a, b)] = minimum
return minimum


def evaluate_text(message, goal_text, verbose=VERBOSE):
"""
Given a Message and a goal_text string, return the Levenshtein distance
between the Message and the goal_text as a length 1 tuple.
If verbose is True, print each Message as it is evaluated.
"""
distance = levenshtein_distance(message.get_text(), goal_text)
distance = levenshtein(message.get_text(), goal_text, cache)
if verbose:
print "{msg:60}\t[Distance: {dst}]".format(msg=message, dst=distance)
return (distance, ) # Length 1 tuple, required by DEAP
Expand All @@ -120,9 +151,16 @@ def mutate_text(message, prob_ins=0.05, prob_del=0.05, prob_sub=0.05):
(legal) character
"""

index = random.choice(range(len(message) - 1))

if random.random() < prob_ins:
# TODO: Implement insertion-type mutation
pass
message.insert(index, random.choice(VALID_CHARS))
if random.random() < prob_del:
del message[index]
if random.random() < prob_sub:
del message[index]
message.insert(index, random.choice(VALID_CHARS))


# TODO: Also implement deletion and substitution mutations
# HINT: Message objects inherit from list, so they also inherit
Expand Down Expand Up @@ -170,7 +208,8 @@ def evolve_string(text):

# Get configured toolbox and create a population of random Messages
toolbox = get_toolbox(text)
pop = toolbox.population(n=300)
# pop = toolbox.population(n=300)
pop = toolbox.population(n=250)

# Collect statistics as the EA runs
stats = tools.Statistics(lambda ind: ind.fitness.values)
Expand Down
3 changes: 3 additions & 0 deletions results.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
I played around with the population size. Interestingly enough, the algorithm achieved the goal within approximately the same number of generations at pop=150 as it did at pop=300, failed to achieve the goal with pop=200, and achieved the goal much more quickly with pop=250.

There didn't seem to be a whole lot of correlation between the initial population size and the number of generations required to achieve the goal, though if I ran the program enough times, I might see a correlation.