WeberLab-UW · gshann2 · Mar 11, 2023
diff --git a/Selenium_git.ipynb b/Selenium_git.ipynb
@@ -0,0 +1,153 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "aa3b92cf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Lets us talk to servers on the web\n",
+    "import requests\n",
+    "\n",
+    "# Parsing HTML magic\n",
+    "from bs4 import BeautifulSoup\n",
+    "\n",
+    "# For data manipulation\n",
+    "import pandas as pd\n",
+    "\n",
+    "# Will be helful for converting between timestamps\n",
+    "from datetime import datetime\n",
+    "\n",
+    "# We want to sleep from time-to-time to avoid overwhelming another server\n",
+    "import time\n",
+    "\n",
+    "# We'll need to parse some strings, so we'll write some regular expressions\n",
+    "import re\n",
+    "\n",
+    "from urllib.parse import quote, unquote\n",
+    "import json\n",
+    "import re"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "f5226b24",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Our interface to a real-life web browser... won't import until you install!\n",
+    "import selenium.webdriver\n",
+    "from selenium import webdriver\n",
+    "from selenium.webdriver.common.keys import Keys\n",
+    "from selenium.webdriver.common.by import By"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "89d9dcdd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#the instance of Safari WebDriver is created.\n",
+    "driver = webdriver.Safari()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "edba6a46",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#The driver.get method will navigate to a search results page.\n",
+    "driver.get(\"https://github.com/search?q=university+of+washington&type=users\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "8d4a3bea",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Soup time!\n",
+    "raw = driver.page_source.encode('utf-8')\n",
+    "\n",
+    "soup = BeautifulSoup(raw)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "ab9d6457",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Single out the html block thingy that we need from each search result\n",
+    "users = soup.body.find_all('a', attrs={'class':'color-fg-muted'})\n",
+    "#turn it into a list\n",
+    "user_list = list(users)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "1b943129",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['geohackweek', 'PythonNut', 'shangzhenyang', 'liyi14', 'nsteinme', 'yizhongw', 'mwillsey', 'alishahusain', 'yihozhang', 'ICESAT-2HackWeek']\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Grab those usernames and put them in a new list (Honestly I don't know why this code works -- I initially wrote\n",
+    "#it to just turn the soup elements into strings)\n",
+    "usernames = []\n",
+    "for user in user_list:\n",
+    "    user = str(user.text)\n",
+    "    usernames.append(user)\n",
+    "    \n",
+    "print(usernames)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "1ed31e21",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#all done!\n",
+    "driver.quit()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}