Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 153 additions & 0 deletions Selenium_git.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 19,
"id": "aa3b92cf",
"metadata": {},
"outputs": [],
"source": [
"# Lets us talk to servers on the web\n",
"import requests\n",
"\n",
"# Parsing HTML magic\n",
"from bs4 import BeautifulSoup\n",
"\n",
"# For data manipulation\n",
"import pandas as pd\n",
"\n",
"# Will be helful for converting between timestamps\n",
"from datetime import datetime\n",
"\n",
"# We want to sleep from time-to-time to avoid overwhelming another server\n",
"import time\n",
"\n",
"# We'll need to parse some strings, so we'll write some regular expressions\n",
"import re\n",
"\n",
"from urllib.parse import quote, unquote\n",
"import json\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "f5226b24",
"metadata": {},
"outputs": [],
"source": [
"# Our interface to a real-life web browser... won't import until you install!\n",
"import selenium.webdriver\n",
"from selenium import webdriver\n",
"from selenium.webdriver.common.keys import Keys\n",
"from selenium.webdriver.common.by import By"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "89d9dcdd",
"metadata": {},
"outputs": [],
"source": [
"#the instance of Safari WebDriver is created.\n",
"driver = webdriver.Safari()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "edba6a46",
"metadata": {},
"outputs": [],
"source": [
"#The driver.get method will navigate to a search results page.\n",
"driver.get(\"https://github.com/search?q=university+of+washington&type=users\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "8d4a3bea",
"metadata": {},
"outputs": [],
"source": [
"#Soup time!\n",
"raw = driver.page_source.encode('utf-8')\n",
"\n",
"soup = BeautifulSoup(raw)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "ab9d6457",
"metadata": {},
"outputs": [],
"source": [
"#Single out the html block thingy that we need from each search result\n",
"users = soup.body.find_all('a', attrs={'class':'color-fg-muted'})\n",
"#turn it into a list\n",
"user_list = list(users)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "1b943129",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['geohackweek', 'PythonNut', 'shangzhenyang', 'liyi14', 'nsteinme', 'yizhongw', 'mwillsey', 'alishahusain', 'yihozhang', 'ICESAT-2HackWeek']\n"
]
}
],
"source": [
"#Grab those usernames and put them in a new list (Honestly I don't know why this code works -- I initially wrote\n",
"#it to just turn the soup elements into strings)\n",
"usernames = []\n",
"for user in user_list:\n",
" user = str(user.text)\n",
" usernames.append(user)\n",
" \n",
"print(usernames)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "1ed31e21",
"metadata": {},
"outputs": [],
"source": [
"#all done!\n",
"driver.quit()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}