|
132 | 132 | }, |
133 | 133 | { |
134 | 134 | "cell_type": "code", |
135 | | - "execution_count": null, |
| 135 | + "execution_count": 1, |
136 | 136 | "metadata": { |
137 | 137 | "collapsed": true |
138 | 138 | }, |
139 | | - "outputs": [], |
| 139 | + "outputs": [ |
| 140 | + { |
| 141 | + "ename": "NameError", |
| 142 | + "evalue": "name 'comedians' is not defined", |
| 143 | + "output_type": "error", |
| 144 | + "traceback": [ |
| 145 | + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", |
| 146 | + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", |
| 147 | + "Cell \u001b[1;32mIn[1], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# Load pickled files\u001b[39;00m\n\u001b[0;32m 2\u001b[0m data \u001b[38;5;241m=\u001b[39m {}\n\u001b[1;32m----> 3\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, c \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(\u001b[43mcomedians\u001b[49m):\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtranscripts/\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m c \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.txt\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrb\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m file:\n\u001b[0;32m 5\u001b[0m data[c] \u001b[38;5;241m=\u001b[39m pickle\u001b[38;5;241m.\u001b[39mload(file)\n", |
| 148 | + "\u001b[1;31mNameError\u001b[0m: name 'comedians' is not defined" |
| 149 | + ] |
| 150 | + } |
| 151 | + ], |
140 | 152 | "source": [ |
141 | 153 | "# Load pickled files\n", |
142 | 154 | "data = {}\n", |
|
284 | 296 | "def clean_text_round1(text):\n", |
285 | 297 | " '''Make text lowercase, remove text in square brackets, remove punctuation and remove words containing numbers.'''\n", |
286 | 298 | " text = text.lower()\n", |
287 | | - " text = re.sub('\\[.*?\\]', '', text)\n", |
288 | | - " text = re.sub('[%s]' % re.escape(string.punctuation), '', text)\n", |
289 | | - " text = re.sub('\\w*\\d\\w*', '', text)\n", |
| 299 | + " text = re.sub(r'\\[.*?\\]', '', text)\n", |
| 300 | + " text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)\n", |
| 301 | + " text = re.sub(r'\\w*\\d\\w*', '', text)\n", |
290 | 302 | " return text\n", |
291 | 303 | "\n", |
292 | 304 | "round1 = lambda x: clean_text_round1(x)" |
|
508 | 520 | "name": "python", |
509 | 521 | "nbconvert_exporter": "python", |
510 | 522 | "pygments_lexer": "ipython3", |
511 | | - "version": "3.6.2" |
| 523 | + "version": "3.12.2" |
512 | 524 | }, |
513 | 525 | "toc": { |
514 | 526 | "nav_menu": {}, |
|
0 commit comments