From e7fb43219c734590b44190a14629c3547f3b457e Mon Sep 17 00:00:00 2001 From: Jabokoe Date: Mon, 13 Jul 2015 00:38:37 +0200 Subject: [PATCH] Changed the testcases to unicode strings. The RDD this function will be used on contains unicode strings. This test uses default type strings, causing implementations of removePunctuation that use string.translate() to pass this test. However string.translate() does not behave well for unicode strings, which cause cryptic errors later down the line (4c). Aditionally positive test results for 4b wil confuse students debugging errors at 4c. Changing the testcases to unicode strings better guides students towards using the regular expressions (re) module. --- ML_lab2_word_count_student.ipynb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ML_lab2_word_count_student.ipynb b/ML_lab2_word_count_student.ipynb index 8b9d63e..001361c 100644 --- a/ML_lab2_word_count_student.ipynb +++ b/ML_lab2_word_count_student.ipynb @@ -608,9 +608,9 @@ " str: The cleaned up string.\n", " \"\"\"\n", " \n", - "print removePunctuation('Hi, you!')\n", - "print removePunctuation(' No under_score!')\n", - "print removePunctuation(' * Remove punctuation then spaces * ')" + "print removePunctuation(u'Hi, you!')\n", + "print removePunctuation(u' No under_score!')\n", + "print removePunctuation(u' * Remove punctuation then spaces * ')" ] }, { @@ -622,8 +622,8 @@ "outputs": [], "source": [ "# TEST Capitalization and punctuation (4b)\n", - "Test.assertEquals(removePunctuation(\" The Elephant's 4 cats. \"),\n", - " 'the elephants 4 cats',\n", + "Test.assertEquals(removePunctuation(u\" The Elephant's 4 cats. \"),\n", + " u'the elephants 4 cats',\n", " 'incorrect definition for removePunctuation function')" ] },