diff --git a/semester-3/big-data/20231123-selenium/.idea/.gitignore b/semester-3/big-data/20231123-selenium/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/semester-3/big-data/20231123-selenium/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/semester-3/big-data/20231123-selenium/.idea/20231123-selenium.iml b/semester-3/big-data/20231123-selenium/.idea/20231123-selenium.iml new file mode 100644 index 0000000..94de8f6 --- /dev/null +++ b/semester-3/big-data/20231123-selenium/.idea/20231123-selenium.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/semester-3/big-data/20231123-selenium/.idea/inspectionProfiles/profiles_settings.xml b/semester-3/big-data/20231123-selenium/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/semester-3/big-data/20231123-selenium/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/semester-3/big-data/20231123-selenium/.idea/misc.xml b/semester-3/big-data/20231123-selenium/.idea/misc.xml new file mode 100644 index 0000000..79d85ca --- /dev/null +++ b/semester-3/big-data/20231123-selenium/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/semester-3/big-data/20231123-selenium/.idea/modules.xml b/semester-3/big-data/20231123-selenium/.idea/modules.xml new file mode 100644 index 0000000..1def397 --- /dev/null +++ b/semester-3/big-data/20231123-selenium/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/semester-3/big-data/20231123-selenium/.idea/vcs.xml b/semester-3/big-data/20231123-selenium/.idea/vcs.xml new file mode 100644 index 0000000..c2365ab --- /dev/null +++ b/semester-3/big-data/20231123-selenium/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/semester-3/big-data/20231123-selenium/main.py b/semester-3/big-data/20231123-selenium/main.py new file mode 100644 index 0000000..8c646e5 --- /dev/null +++ b/semester-3/big-data/20231123-selenium/main.py @@ -0,0 +1,37 @@ +import random + +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.chrome.service import Service +from selenium.webdriver.chrome.options import Options +from webdriver_manager.chrome import ChromeDriverManager + +options = Options() +options.add_experimental_option("detach", True) +options.page_load_strategy = 'eager' + +driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) +driver.get("https://www.google.com") +driver.implicitly_wait(5) + +original_window = driver.current_window_handle + +# select the search bar with aria-label '搜尋' +search_bar_el = driver.find_element(By.CSS_SELECTOR, "textarea[aria-label='搜尋']") +search_bar_el.send_keys("Selenium") +search_bar_el.submit() + +# open new tabs for every result +result_els = driver.find_elements(By.CSS_SELECTOR, "[jsname=UWckNb]") +random.shuffle(result_els) # prevent being detected +result_tabs = [] +for result_el in result_els: + result_title = result_el.find_element(By.CSS_SELECTOR, "h3").text + print("RESULT", result_title) + + # open new tab + result_url = result_el.get_attribute("href") + driver.switch_to.new_window(result_title) + driver.get(result_url) + result_tabs.append(driver.current_window_handle) + driver.switch_to.window(original_window) diff --git a/semester-3/big-data/20231123-selenium/practice-1.py b/semester-3/big-data/20231123-selenium/practice-1.py new file mode 100644 index 0000000..53ead37 --- /dev/null +++ b/semester-3/big-data/20231123-selenium/practice-1.py @@ -0,0 +1,51 @@ +import argparse +import logging +import time + +import loguru +from loguru import logger +from selenium import webdriver +from selenium.webdriver.chrome.service import Service +from selenium.webdriver.chrome.options import Options +from webdriver_manager.chrome import ChromeDriverManager + +parser = argparse.ArgumentParser() +parser.add_argument("--headless", action="store_true") +parser.add_argument("--wait", type=int, default=5) +args = parser.parse_args() + +options = Options() +logger.debug("init: setting eager mode") +options.page_load_strategy = 'eager' + +# background +if args.headless: + logger.info("arg: headless mode enabled") + options.add_argument('--headless') + +logger.debug("init: setting webdriver") +driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) +logger.debug("init: setting implicit wait to 5 seconds") +driver.implicitly_wait(5) + +logger.debug("op: open page") +driver.get("https://ic.nkust.edu.tw") + +logger.info("info: name: {}", driver.name) +logger.info("info: title: {}", driver.title) +logger.info("info: current_url: {}", driver.current_url) +logger.info("info: session_id: {}", driver.session_id) +logger.info("info: capabilities: {}", driver.capabilities) +logger.info("info: src: {}", driver.page_source.replace('\n', '')[:400]) + +logger.debug("op: move window to (10, 10)") +driver.set_window_position(10, 10) + +logger.debug("op: resize window to 500x500") +driver.set_window_size(500, 500) + +logger.debug("op: wait for {} seconds", args.wait) +time.sleep(args.wait) + +logging.debug("op: bye!") +driver.quit()