|
2 | 2 | "cells": [
|
3 | 3 | {
|
4 | 4 | "cell_type": "code",
|
5 |
| - "execution_count": 29, |
| 5 | + "execution_count": 17, |
6 | 6 | "metadata": {
|
7 | 7 | "collapsed": false
|
8 | 8 | },
|
9 | 9 | "outputs": [],
|
10 | 10 | "source": [
|
11 | 11 | "# imports\n",
|
12 |
| - "\n", |
13 |
| - "import urllib2\n", |
14 |
| - "import json" |
| 12 | + "from urllib.request import urlopen\n", |
| 13 | + "import json\n", |
| 14 | + "import re\n", |
| 15 | + "from collections import defaultdict" |
15 | 16 | ]
|
16 | 17 | },
|
17 | 18 | {
|
18 | 19 | "cell_type": "code",
|
19 |
| - "execution_count": 30, |
| 20 | + "execution_count": 9, |
20 | 21 | "metadata": {
|
21 |
| - "collapsed": true |
| 22 | + "collapsed": false |
22 | 23 | },
|
23 | 24 | "outputs": [],
|
24 | 25 | "source": [
|
25 | 26 | "# get an s3 file from Discursive. This file is a sample of ~500 tweets. \n",
|
26 | 27 | "\n",
|
27 |
| - "response = urllib2.urlopen('https://s3-us-west-2.amazonaws.com/discursive/2017/1/10/18/tweets-25.json')\n", |
| 28 | + "response = urlopen('https://s3-us-west-2.amazonaws.com/discursive/2017/1/10/18/tweets-25.json')\n", |
28 | 29 | "html = response.read()\n",
|
29 | 30 | "resp = json.loads(html)"
|
30 | 31 | ]
|
31 | 32 | },
|
32 | 33 | {
|
33 | 34 | "cell_type": "code",
|
34 |
| - "execution_count": 34, |
| 35 | + "execution_count": 11, |
35 | 36 | "metadata": {
|
36 | 37 | "collapsed": false
|
37 | 38 | },
|
|
40 | 41 | "name": "stdout",
|
41 | 42 | "output_type": "stream",
|
42 | 43 | "text": [
|
43 |
| - "[{u'loc': None, u'description': u\"I Fuck Up... Just don't forget you Fuck Up Too.\", u'friends_count': 979, u'created': u'2017-01-10 18:14:08', u'text': u\"RT @LindaSuhler: Can we hear from #MSM here?\\n@MTV's @Ira Madison III Calls Jeff Sessions' Granddaughter 'Prop' Stolen from Toys R Us\\u2026 \", u'hashtags': u'[\"MSM\"]', u'original_name': u'Linda Suhler, Ph.D.', u'original_id': 347627434, u'id_str': u'818883641640177665', u'user_created': u'2012-12-29 17:54:08', u'followers': 3098, u'retweet_count': 0, u'retweet': u'Y', u'name': u'VFL2013'}, {u'loc': u'Lagos, Nigeria', u'description': u'Publisher | Editor | Online Promoter | Web Designer | Blogger | Social Media Manager.. Contact: peretzomasi51@gmail.com', u'friends_count': 128, u'created': u'2017-01-10 18:14:09', u'text': u'Breitbart to launch populist business news site https://t.co/DeLYRbFZDs', u'hashtags': u'[]', u'original_name': None, u'original_id': None, u'id_str': u'818883644039294976', u'user_created': u'2016-09-25 18:53:21', u'followers': 154, u'retweet_count': 0, u'retweet': u'N', u'name': u'PeretzOmasi'}, {u'loc': u'Schloss von Kastenfarbe', u'description': u'Extra Bl\\xf6d\\n\\nIrgendwann wird auch Dich niemand verstehen wollen Ach ja: \\u201eNicht alles was Ich teile ist gleich meine Meinung\\u201c http://www.faz.net/-gqz-7h2fh', u'friends_count': 496, u'created': u'2017-01-10 18:14:09', u'text': u'RT @dushanwegner: F\\xfcr eine #SPD-Abgeordnete? Hahaha. Witzig. \\U0001f602\\U0001f602\\U0001f602 https://t.co/YBoSx9YR5f', u'hashtags': u'[\"SPD\"]', u'original_name': u'Dushan Wegner', u'original_id': 14784064, u'id_str': u'818883645096202241', u'user_created': u'2012-03-11 21:22:57', u'followers': 145, u'retweet_count': 0, u'retweet': u'Y', u'name': u'kricketkrackel'}, {u'loc': None, u'description': None, u'friends_count': 249, u'created': u'2017-01-10 18:14:09', u'text': u'RT @pg80808: @qatarairways \\u2708\\ufe0f Did you know your ads support stories like this on Breitbart? 10 THINGS MILO HATES ABOUT ISLAM.\\u2026 ', u'hashtags': u'[]', u'original_name': u'ColoradoVsBreitbart', u'original_id': 814872255406620672, u'id_str': u'818883645188358146', u'user_created': u'2014-12-12 22:55:33', u'followers': 76, u'retweet_count': 0, u'retweet': u'Y', u'name': u'awordonplays'}, {u'loc': None, u'description': u'Conservative; luv my doxie...KEEP THE CHANGE!... ONE NATION UNDER GOD!', u'friends_count': 3361, u'created': u'2017-01-10 18:14:10', u'text': u\"#racist #fool #MTV's Ira #Madison III Calls Jeff Sessions' Granddaughter 'Prop' Stolen from Toys R Us - Breitbart https://t.co/z8GR7iN59Y\", u'hashtags': u'[\"racist\", \"fool\", \"MTV\", \"Madison\"]', u'original_name': None, u'original_id': None, u'id_str': u'818883647382192128', u'user_created': u'2011-08-02 13:45:16', u'followers': 3166, u'retweet_count': 0, u'retweet': u'N', u'name': u'Bonacker69'}]\n" |
| 44 | + "[{'description': \"I Fuck Up... Just don't forget you Fuck Up Too.\", 'friends_count': 979, 'text': \"RT @LindaSuhler: Can we hear from #MSM here?\\n@MTV's @Ira Madison III Calls Jeff Sessions' Granddaughter 'Prop' Stolen from Toys R Us… \", 'hashtags': '[\"MSM\"]', 'original_id': 347627434, 'user_created': '2012-12-29 17:54:08', 'retweet': 'Y', 'loc': None, 'name': 'VFL2013', 'created': '2017-01-10 18:14:08', 'original_name': 'Linda Suhler, Ph.D.', 'followers': 3098, 'id_str': '818883641640177665', 'retweet_count': 0}, {'description': 'Publisher | Editor | Online Promoter | Web Designer | Blogger | Social Media Manager.. Contact: peretzomasi51@gmail.com', 'friends_count': 128, 'text': 'Breitbart to launch populist business news site https://t.co/DeLYRbFZDs', 'hashtags': '[]', 'original_id': None, 'user_created': '2016-09-25 18:53:21', 'retweet': 'N', 'loc': 'Lagos, Nigeria', 'name': 'PeretzOmasi', 'created': '2017-01-10 18:14:09', 'original_name': None, 'followers': 154, 'id_str': '818883644039294976', 'retweet_count': 0}, {'description': 'Extra Blöd\\n\\nIrgendwann wird auch Dich niemand verstehen wollen Ach ja: „Nicht alles was Ich teile ist gleich meine Meinung“ http://www.faz.net/-gqz-7h2fh', 'friends_count': 496, 'text': 'RT @dushanwegner: Für eine #SPD-Abgeordnete? Hahaha. Witzig. 😂😂😂 https://t.co/YBoSx9YR5f', 'hashtags': '[\"SPD\"]', 'original_id': 14784064, 'user_created': '2012-03-11 21:22:57', 'retweet': 'Y', 'loc': 'Schloss von Kastenfarbe', 'name': 'kricketkrackel', 'created': '2017-01-10 18:14:09', 'original_name': 'Dushan Wegner', 'followers': 145, 'id_str': '818883645096202241', 'retweet_count': 0}, {'description': None, 'friends_count': 249, 'text': 'RT @pg80808: @qatarairways ✈️ Did you know your ads support stories like this on Breitbart? 10 THINGS MILO HATES ABOUT ISLAM.… ', 'hashtags': '[]', 'original_id': 814872255406620672, 'user_created': '2014-12-12 22:55:33', 'retweet': 'Y', 'loc': None, 'name': 'awordonplays', 'created': '2017-01-10 18:14:09', 'original_name': 'ColoradoVsBreitbart', 'followers': 76, 'id_str': '818883645188358146', 'retweet_count': 0}, {'description': 'Conservative; luv my doxie...KEEP THE CHANGE!... ONE NATION UNDER GOD!', 'friends_count': 3361, 'text': \"#racist #fool #MTV's Ira #Madison III Calls Jeff Sessions' Granddaughter 'Prop' Stolen from Toys R Us - Breitbart https://t.co/z8GR7iN59Y\", 'hashtags': '[\"racist\", \"fool\", \"MTV\", \"Madison\"]', 'original_id': None, 'user_created': '2011-08-02 13:45:16', 'retweet': 'N', 'loc': None, 'name': 'Bonacker69', 'created': '2017-01-10 18:14:10', 'original_name': None, 'followers': 3166, 'id_str': '818883647382192128', 'retweet_count': 0}]\n" |
44 | 45 | ]
|
45 | 46 | }
|
46 | 47 | ],
|
47 | 48 | "source": [
|
48 | 49 | "# have a look at the data\n",
|
49 | 50 | "\n",
|
50 |
| - "print resp[:5]" |
| 51 | + "print(resp[:5])" |
51 | 52 | ]
|
52 | 53 | },
|
53 | 54 | {
|
54 | 55 | "cell_type": "code",
|
55 |
| - "execution_count": 33, |
| 56 | + "execution_count": 12, |
56 | 57 | "metadata": {
|
57 | 58 | "collapsed": false
|
58 | 59 | },
|
|
74 | 75 | "# start exploring the available fields\n",
|
75 | 76 | "\n",
|
76 | 77 | "for x in resp[:5]:\n",
|
77 |
| - " print x['text']" |
| 78 | + " print(x['text'])" |
| 79 | + ] |
| 80 | + }, |
| 81 | + { |
| 82 | + "cell_type": "code", |
| 83 | + "execution_count": 25, |
| 84 | + "metadata": { |
| 85 | + "collapsed": false |
| 86 | + }, |
| 87 | + "outputs": [], |
| 88 | + "source": [ |
| 89 | + "# get all of the hashtags and counts\n", |
| 90 | + "hashtags = defaultdict(int)\n", |
| 91 | + "for tweet in resp:\n", |
| 92 | + " # take out hashtags\n", |
| 93 | + " hts = [ht for ht in tweet['text'].split() if ht.startswith('#')]\n", |
| 94 | + " # add them to the dictionary\n", |
| 95 | + " for ht in hts:\n", |
| 96 | + " hashtags[ht] += 1\n" |
78 | 97 | ]
|
| 98 | + }, |
| 99 | + { |
| 100 | + "cell_type": "code", |
| 101 | + "execution_count": 44, |
| 102 | + "metadata": { |
| 103 | + "collapsed": false |
| 104 | + }, |
| 105 | + "outputs": [ |
| 106 | + { |
| 107 | + "name": "stdout", |
| 108 | + "output_type": "stream", |
| 109 | + "text": [ |
| 110 | + "Hashtag\t\t\t\tCount\n", |
| 111 | + "#MSM \t\t33\n", |
| 112 | + "#MAGA \t\t11\n", |
| 113 | + "#StopSessions \t\t8\n", |
| 114 | + "#FakeNews \t\t8\n", |
| 115 | + "#ConfirmSessions!\t\t7\n", |
| 116 | + "#SupportSessions\t\t7\n", |
| 117 | + "#AmericaFirst \t\t7\n", |
| 118 | + "#Breitbart \t\t6\n", |
| 119 | + "#MakeAmericaGreatAgain’\t\t6\n", |
| 120 | + "#SPD-Abgeordnete?\t\t5\n", |
| 121 | + "#tcot \t\t5\n", |
| 122 | + "#tichy \t\t4\n", |
| 123 | + "#xing \t\t4\n", |
| 124 | + "#Br… \t\t4\n", |
| 125 | + "#Sessions \t\t4\n", |
| 126 | + "#NewYork \t\t4\n", |
| 127 | + "#NYC \t\t4\n", |
| 128 | + "#Real… \t\t3\n", |
| 129 | + "#TCOT \t\t3\n", |
| 130 | + "#PJNET \t\t3\n", |
| 131 | + "#pjnet \t\t3\n", |
| 132 | + "#p2 \t\t3\n", |
| 133 | + "#MakeAmericaGreatAgain'\t\t3\n", |
| 134 | + "#CloserNation \t\t2\n", |
| 135 | + "#worldnews \t\t2\n", |
| 136 | + "#Obamacare \t\t2\n", |
| 137 | + "#PlannedParenthood\t\t2\n", |
| 138 | + "#PJNET🇺🇸… \t\t2\n", |
| 139 | + "#Obama \t\t2\n", |
| 140 | + "#Jerusalem \t\t2\n", |
| 141 | + "#PresidentElectTrump\t\t2\n", |
| 142 | + "#jerusalem \t\t2\n", |
| 143 | + "#TimScott: \t\t2\n", |
| 144 | + "#JeffSessions \t\t2\n", |
| 145 | + "#AttorneyGeneral\t\t2\n", |
| 146 | + "#Racist \t\t2\n", |
| 147 | + "#breitbart \t\t2\n", |
| 148 | + "#LiberalHate \t\t2\n", |
| 149 | + "#EverythingIsRacial\t\t2\n", |
| 150 | + "#racist \t\t1\n", |
| 151 | + "#fool \t\t1\n", |
| 152 | + "#MTV's \t\t1\n", |
| 153 | + "#Madison \t\t1\n", |
| 154 | + "#Libtards \t\t1\n", |
| 155 | + "#Liberals \t\t1\n", |
| 156 | + "#Racism \t\t1\n", |
| 157 | + "#Racist… \t\t1\n", |
| 158 | + "#AI \t\t1\n", |
| 159 | + "#IA \t\t1\n", |
| 160 | + "#machinelearning\t\t1\n", |
| 161 | + "#AfD: \t\t1\n", |
| 162 | + "#TRUMP \t\t1\n", |
| 163 | + "#VETS \t\t1\n", |
| 164 | + "#OATHKEEPERS \t\t1\n", |
| 165 | + "#CONSTITUTION \t\t1\n", |
| 166 | + "#2A \t\t1\n", |
| 167 | + "#1A \t\t1\n", |
| 168 | + "#ORPUW \t\t1\n", |
| 169 | + "#NRA \t\t1\n", |
| 170 | + "#TLOT \t\t1\n", |
| 171 | + "#INFOWARS \t\t1\n", |
| 172 | + "#DrudgeReport… \t\t1\n", |
| 173 | + "#Resist \t\t1\n", |
| 174 | + "#preach \t\t1\n", |
| 175 | + "#Bannon \t\t1\n", |
| 176 | + "#Germany \t\t1\n", |
| 177 | + "#France \t\t1\n", |
| 178 | + "#ConfirmationHearings\t\t1\n", |
| 179 | + "#JeffSessions' \t\t1\n", |
| 180 | + "#TheResistance \t\t1\n", |
| 181 | + "#ResistTrump \t\t1\n", |
| 182 | + "#Fascism \t\t1\n", |
| 183 | + "#NYT \t\t1\n", |
| 184 | + "#NYT=FakeNews \t\t1\n", |
| 185 | + "#NowPlaying \t\t1\n", |
| 186 | + "#woodpile \t\t1\n", |
| 187 | + "#p… \t\t1\n", |
| 188 | + "#populism\" \t\t1\n", |
| 189 | + "#media \t\t1\n", |
| 190 | + "#SPD-Abgeordnete\t\t1\n", |
| 191 | + "#squirrel \t\t1\n", |
| 192 | + "#beta \t\t1\n", |
| 193 | + "#bitches \t\t1\n", |
| 194 | + "#cuck… \t\t1\n", |
| 195 | + "#liberal \t\t1\n", |
| 196 | + "#leftists \t\t1\n", |
| 197 | + "#bannon \t\t1\n", |
| 198 | + "#manafort \t\t1\n", |
| 199 | + "#kushner \t\t1\n", |
| 200 | + "#putin \t\t1\n", |
| 201 | + "#PublicDiplomacy\t\t1\n", |
| 202 | + "#dortmund \t\t1\n", |
| 203 | + "#TuesdayMotivation\t\t1\n", |
| 204 | + "#Fakenews \t\t1\n", |
| 205 | + "#verfassungsschutz\t\t1\n", |
| 206 | + "#APT28 \t\t1\n", |
| 207 | + "#ACA… \t\t1\n", |
| 208 | + "#DumpKelloggs \t\t1\n", |
| 209 | + "#RIPDNC \t\t1\n", |
| 210 | + "#MerylStreep \t\t1\n", |
| 211 | + "#Awesome...coming\t\t1\n", |
| 212 | + "#TGDN \t\t1\n", |
| 213 | + "#CCOT \t\t1\n", |
| 214 | + "#diversity \t\t1\n", |
| 215 | + "#StopFundingHate\t\t1\n" |
| 216 | + ] |
| 217 | + } |
| 218 | + ], |
| 219 | + "source": [ |
| 220 | + "# print out the results of hashtags\n", |
| 221 | + "print(\"Hashtag\\t\\t\\t\\tCount\")\n", |
| 222 | + "for ht in sorted(hashtags, key=hashtags.get, reverse=True):\n", |
| 223 | + " print(\"{:16s}\\t\\t{}\".format(ht, hashtags[ht]))\n", |
| 224 | + " \n" |
| 225 | + ] |
| 226 | + }, |
| 227 | + { |
| 228 | + "cell_type": "code", |
| 229 | + "execution_count": null, |
| 230 | + "metadata": { |
| 231 | + "collapsed": true |
| 232 | + }, |
| 233 | + "outputs": [], |
| 234 | + "source": [] |
79 | 235 | }
|
80 | 236 | ],
|
81 | 237 | "metadata": {
|
82 | 238 | "kernelspec": {
|
83 |
| - "display_name": "Python [Root]", |
| 239 | + "display_name": "Python 3", |
84 | 240 | "language": "python",
|
85 |
| - "name": "Python [Root]" |
| 241 | + "name": "python3" |
86 | 242 | },
|
87 | 243 | "language_info": {
|
88 | 244 | "codemirror_mode": {
|
89 | 245 | "name": "ipython",
|
90 |
| - "version": 2 |
| 246 | + "version": 3 |
91 | 247 | },
|
92 | 248 | "file_extension": ".py",
|
93 | 249 | "mimetype": "text/x-python",
|
94 | 250 | "name": "python",
|
95 | 251 | "nbconvert_exporter": "python",
|
96 |
| - "pygments_lexer": "ipython2", |
97 |
| - "version": "2.7.12" |
| 252 | + "pygments_lexer": "ipython3", |
| 253 | + "version": "3.6.3" |
98 | 254 | }
|
99 | 255 | },
|
100 | 256 | "nbformat": 4,
|
|
0 commit comments