diff --git a/Selenium_git.ipynb b/Selenium_git.ipynb new file mode 100644 index 0000000..c8b1a21 --- /dev/null +++ b/Selenium_git.ipynb @@ -0,0 +1,153 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 19, + "id": "aa3b92cf", + "metadata": {}, + "outputs": [], + "source": [ + "# Lets us talk to servers on the web\n", + "import requests\n", + "\n", + "# Parsing HTML magic\n", + "from bs4 import BeautifulSoup\n", + "\n", + "# For data manipulation\n", + "import pandas as pd\n", + "\n", + "# Will be helful for converting between timestamps\n", + "from datetime import datetime\n", + "\n", + "# We want to sleep from time-to-time to avoid overwhelming another server\n", + "import time\n", + "\n", + "# We'll need to parse some strings, so we'll write some regular expressions\n", + "import re\n", + "\n", + "from urllib.parse import quote, unquote\n", + "import json\n", + "import re" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f5226b24", + "metadata": {}, + "outputs": [], + "source": [ + "# Our interface to a real-life web browser... won't import until you install!\n", + "import selenium.webdriver\n", + "from selenium import webdriver\n", + "from selenium.webdriver.common.keys import Keys\n", + "from selenium.webdriver.common.by import By" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "89d9dcdd", + "metadata": {}, + "outputs": [], + "source": [ + "#the instance of Safari WebDriver is created.\n", + "driver = webdriver.Safari()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "edba6a46", + "metadata": {}, + "outputs": [], + "source": [ + "#The driver.get method will navigate to a search results page.\n", + "driver.get(\"https://github.com/search?q=university+of+washington&type=users\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8d4a3bea", + "metadata": {}, + "outputs": [], + "source": [ + "#Soup time!\n", + "raw = driver.page_source.encode('utf-8')\n", + "\n", + "soup = BeautifulSoup(raw)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "ab9d6457", + "metadata": {}, + "outputs": [], + "source": [ + "#Single out the html block thingy that we need from each search result\n", + "users = soup.body.find_all('a', attrs={'class':'color-fg-muted'})\n", + "#turn it into a list\n", + "user_list = list(users)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "1b943129", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['geohackweek', 'PythonNut', 'shangzhenyang', 'liyi14', 'nsteinme', 'yizhongw', 'mwillsey', 'alishahusain', 'yihozhang', 'ICESAT-2HackWeek']\n" + ] + } + ], + "source": [ + "#Grab those usernames and put them in a new list (Honestly I don't know why this code works -- I initially wrote\n", + "#it to just turn the soup elements into strings)\n", + "usernames = []\n", + "for user in user_list:\n", + " user = str(user.text)\n", + " usernames.append(user)\n", + " \n", + "print(usernames)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "1ed31e21", + "metadata": {}, + "outputs": [], + "source": [ + "#all done!\n", + "driver.quit()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}