Skip to content

Commit 6aecc3e

Browse files
committed
ran clean data notebook
1 parent e7e02e6 commit 6aecc3e

1 file changed

Lines changed: 43 additions & 43 deletions

File tree

redditStreaming/src/notebooks/read_clean_delta_s3.ipynb

Lines changed: 43 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -161,13 +161,13 @@
161161
"output_type": "stream",
162162
"text": [
163163
"Reading from: s3://reddit-streaming-stevenhurwitt-2/technology_clean\n",
164-
"✓ Successfully read technology_clean: 3,354 records\n",
164+
"✓ Successfully read technology_clean: 3,393 records\n",
165165
"Reading from: s3://reddit-streaming-stevenhurwitt-2/ProgrammerHumor_clean\n",
166-
"✓ Successfully read ProgrammerHumor_clean: 2,350 records\n",
166+
"✓ Successfully read ProgrammerHumor_clean: 2,367 records\n",
167167
"Reading from: s3://reddit-streaming-stevenhurwitt-2/news_clean\n",
168-
"✓ Successfully read news_clean: 2,186 records\n",
168+
"✓ Successfully read news_clean: 2,229 records\n",
169169
"Reading from: s3://reddit-streaming-stevenhurwitt-2/worldnews_clean\n",
170-
"✓ Successfully read worldnews_clean: 4,990 records\n",
170+
"✓ Successfully read worldnews_clean: 5,110 records\n",
171171
"\n",
172172
"Total tables loaded: 4\n"
173173
]
@@ -439,15 +439,15 @@
439439
},
440440
{
441441
"cell_type": "code",
442-
"execution_count": 5,
442+
"execution_count": 4,
443443
"id": "6f6a236f",
444444
"metadata": {},
445445
"outputs": [
446446
{
447447
"name": "stdout",
448448
"output_type": "stream",
449449
"text": [
450-
"Total records across all subreddits: 12,880\n",
450+
"Total records across all subreddits: 13,099\n",
451451
"\n",
452452
"Most recent posts across all subreddits:\n",
453453
"shape: (50, 6)\n",
@@ -456,27 +456,27 @@
456456
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
457457
"│ str ┆ datetime[μs] ┆ str ┆ i32 ┆ str ┆ i32 │\n",
458458
"╞════════════╪═════════════════╪══════════════════════╪═══════╪═════════════════════╪══════════════╡\n",
459-
"news ┆ +48007-03-05How the world has ┆ 4 ┆ GroundbreakingArm17 ┆ 0 │\n",
460-
"│ ┆ 21:58:03.044864 ┆ reacted to U… ┆ ┆ 3 ┆ │\n",
461-
"│ worldnews ┆ +48003-02-13How the world has ┆ 1 ┆ GroundbreakingArm17 ┆ 1\n",
462-
"│ ┆ 10:24:43.044864 ┆ reacted to U… ┆ ┆ 3 ┆ │\n",
463-
"worldnews ┆ +47966-08-13South Korea ┆ 1 ┆ Little-Chemical5006 ┆ 1\n",
464-
"│ ┆ 02:24:43.044864 ┆ president calls on… ┆ ┆ ┆ │\n",
465-
"│ worldnews ┆ +47962-07-23Blasts continue in ┆ 1 ┆ Cybertronian1512 ┆ 1 │\n",
466-
"│ ┆ 14:51:23.044864 ┆ Kabul amid … ┆ ┆ ┆ │\n",
467-
"│ worldnews ┆ +47958-07-03State of Palestine1PestoBolloElemento0\n",
468-
"│ ┆ 03:18:03.044864 ┆ strongly co… ┆ ┆ ┆ │\n",
459+
"worldnews ┆ +50757-04-02Tehran set to launch ┆ 4 ┆ TheNational_News ┆ 0 │\n",
460+
"│ ┆ 08:38:03.044864 ┆ 'fire and ┆ ┆ ┆ │\n",
461+
"│ worldnews ┆ +50753-03-12Cyber front opens ┆ 1 ┆ Dex_Stlap ┆ 0\n",
462+
"│ ┆ 21:04:43.044864 ┆ after US-Isr… ┆ ┆ ┆ │\n",
463+
"news ┆ +50736-12-20Israel and Hezbollah ┆ 1 ┆ Frosty-Ad-4538 ┆ 0\n",
464+
"│ ┆ 22:51:23.044864 ┆ in Lebano… ┆ ┆ ┆ │\n",
465+
"│ worldnews ┆ +50728-11-09Former Iranian ┆ 1 ┆ dkmegg22 ┆ 1 │\n",
466+
"│ ┆ 23:44:43.044864 ┆ President Ahmad… ┆ ┆ ┆ │\n",
467+
"│ worldnews ┆ +50720-09-30Watch: Israel uses2WhoAreYouTalkinTwo1\n",
468+
"│ ┆ 00:38:03.044864 ┆ Iron Beam t… ┆ ┆ ┆ │\n",
469469
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
470-
"│ technology ┆ +47406-11-12Warning: Facebook ┆ 1gdelacalle ┆ 0 │\n",
471-
"│ ┆ 15:44:43.044864 ┆ Ads for Free… ┆ ┆ ┆ │\n",
472-
"│ technology ┆ +47394-09-11WA lawmakers ┆ 1 ┆ esporx ┆ 0\n",
473-
"│ ┆ 05:04:43.044864 ┆ advancing bill re ┆ ┆ │\n",
474-
"│ worldnews ┆ +47394-09-11Australia 'did not402Expensive-Horse5538 ┆ 113\n",
475-
"│ ┆ 05:04:43.044864 ┆ participate… ┆ ┆ ┆ │\n",
476-
"technology ┆ +47354-02-18DeepSeek to release ┆ 1 ┆ kharkovchanin ┆ 0 │\n",
477-
"│ ┆ 09:31:23.044864 ┆ long-await… ┆ ┆ ┆ │\n",
478-
"news ┆ +47354-02-18WA lawmakers ┆ 0netizenbane ┆ 0 │\n",
479-
"│ ┆ 09:31:23.044864 ┆ advancing bill re… ┆ ┆ ┆ │\n",
470+
"│ technology ┆ +49978-06-20Every Car Made After ┆ 2DonkeyFuel ┆ 0 │\n",
471+
"│ ┆ 21:58:03.044864 ┆ 2008 Has … ┆ ┆ ┆ │\n",
472+
"│ technology ┆ +49974-05-31Given Open AI’s most ┆ 1 ┆ Downtown-Elevator96 ┆ 1\n",
473+
"│ ┆ 10:24:43.044864 ┆ recent ro ┆ 8 ┆ │\n",
474+
"│ worldnews ┆ +49954-02-18Oil price expected157Kagedeah ┆ 39 \n",
475+
"│ ┆ 00:38:03.044864 ┆ to surge af… ┆ ┆ ┆ │\n",
476+
"news ┆ +49950-01-28We will also bring ┆ 2 ┆ kharkovchanin ┆ 0 │\n",
477+
"│ ┆ 13:04:43.044864 ┆ experts fro… ┆ ┆ ┆ │\n",
478+
"worldnews ┆ +49950-01-28We will also bring ┆ 3kharkovchanin ┆ 0 │\n",
479+
"│ ┆ 13:04:43.044864 ┆ experts fro… ┆ ┆ ┆ │\n",
480480
"└────────────┴─────────────────┴──────────────────────┴───────┴─────────────────────┴──────────────┘\n"
481481
]
482482
}
@@ -528,7 +528,7 @@
528528
},
529529
{
530530
"cell_type": "code",
531-
"execution_count": 6,
531+
"execution_count": 5,
532532
"id": "b79523a3",
533533
"metadata": {},
534534
"outputs": [
@@ -539,10 +539,10 @@
539539
"Summary by subreddit:\n",
540540
"Subreddit Records Avg Score Avg Comments\n",
541541
"=================================================================\n",
542-
"technology 3,354 2.42 0.37\n",
543-
"ProgrammerHumor 2,350 4.36 0.37\n",
544-
"news 2,186 55.11 5.11\n",
545-
"worldnews 4,990 5.70 1.15\n"
542+
"technology 3,393 2.40 0.37\n",
543+
"ProgrammerHumor 2,367 4.41 0.38\n",
544+
"news 2,229 54.39 5.08\n",
545+
"worldnews 5,110 6.03 1.19\n"
546546
]
547547
}
548548
],
@@ -581,7 +581,7 @@
581581
},
582582
{
583583
"cell_type": "code",
584-
"execution_count": 7,
584+
"execution_count": 6,
585585
"id": "df825cf6",
586586
"metadata": {},
587587
"outputs": [
@@ -595,7 +595,7 @@
595595
" white-space: pre-wrap;\n",
596596
"}\n",
597597
"</style>\n",
598-
"<small>shape: (20, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>subreddit</th><th>date</th><th>post_count</th></tr><tr><td>str</td><td>date</td><td>u32</td></tr></thead><tbody><tr><td>&quot;news&quot;</td><td>2026-03-01</td><td>15</td></tr><tr><td>&quot;technology&quot;</td><td>2026-03-01</td><td>3</td></tr><tr><td>&quot;ProgrammerHumor&quot;</td><td>2026-03-01</td><td>3</td></tr><tr><td>&quot;worldnews&quot;</td><td>2026-03-01</td><td>20</td></tr><tr><td>&quot;technology&quot;</td><td>2026-02-28</td><td>59</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>&quot;ProgrammerHumor&quot;</td><td>2026-02-26</td><td>31</td></tr><tr><td>&quot;news&quot;</td><td>2026-02-25</td><td>32</td></tr><tr><td>&quot;technology&quot;</td><td>2026-02-25</td><td>63</td></tr><tr><td>&quot;worldnews&quot;</td><td>2026-02-25</td><td>95</td></tr><tr><td>&quot;ProgrammerHumor&quot;</td><td>2026-02-25</td><td>36</td></tr></tbody></table></div>"
598+
"<small>shape: (20, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>subreddit</th><th>date</th><th>post_count</th></tr><tr><td>str</td><td>date</td><td>u32</td></tr></thead><tbody><tr><td>&quot;worldnews&quot;</td><td>2026-03-02</td><td>22</td></tr><tr><td>&quot;news&quot;</td><td>2026-03-02</td><td>3</td></tr><tr><td>&quot;technology&quot;</td><td>2026-03-02</td><td>4</td></tr><tr><td>&quot;ProgrammerHumor&quot;</td><td>2026-03-02</td><td>2</td></tr><tr><td>&quot;worldnews&quot;</td><td>2026-03-01</td><td>118</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>&quot;ProgrammerHumor&quot;</td><td>2026-02-27</td><td>40</td></tr><tr><td>&quot;news&quot;</td><td>2026-02-26</td><td>39</td></tr><tr><td>&quot;worldnews&quot;</td><td>2026-02-26</td><td>101</td></tr><tr><td>&quot;technology&quot;</td><td>2026-02-26</td><td>70</td></tr><tr><td>&quot;ProgrammerHumor&quot;</td><td>2026-02-26</td><td>31</td></tr></tbody></table></div>"
599599
],
600600
"text/plain": [
601601
"shape: (20, 3)\n",
@@ -604,21 +604,21 @@
604604
"│ --- ┆ --- ┆ --- │\n",
605605
"│ str ┆ date ┆ u32 │\n",
606606
"╞═════════════════╪════════════╪════════════╡\n",
607-
"news ┆ 2026-03-0115\n",
608-
"technology ┆ 2026-03-01 ┆ 3 │\n",
609-
"ProgrammerHumor ┆ 2026-03-013\n",
610-
"worldnews ┆ 2026-03-0120\n",
611-
"technology ┆ 2026-02-2859 \n",
607+
"worldnews ┆ 2026-03-0222\n",
608+
"news ┆ 2026-03-02 ┆ 3 │\n",
609+
"technology ┆ 2026-03-024\n",
610+
"ProgrammerHumor ┆ 2026-03-022 \n",
611+
"worldnews ┆ 2026-03-01118\n",
612612
"│ … ┆ … ┆ … │\n",
613+
"│ ProgrammerHumor ┆ 2026-02-27 ┆ 40 │\n",
614+
"│ news ┆ 2026-02-26 ┆ 39 │\n",
615+
"│ worldnews ┆ 2026-02-26 ┆ 101 │\n",
616+
"│ technology ┆ 2026-02-26 ┆ 70 │\n",
613617
"│ ProgrammerHumor ┆ 2026-02-26 ┆ 31 │\n",
614-
"│ news ┆ 2026-02-25 ┆ 32 │\n",
615-
"│ technology ┆ 2026-02-25 ┆ 63 │\n",
616-
"│ worldnews ┆ 2026-02-25 ┆ 95 │\n",
617-
"│ ProgrammerHumor ┆ 2026-02-25 ┆ 36 │\n",
618618
"└─────────────────┴────────────┴────────────┘"
619619
]
620620
},
621-
"execution_count": 7,
621+
"execution_count": 6,
622622
"metadata": {},
623623
"output_type": "execute_result"
624624
}

0 commit comments

Comments
 (0)