Add python version of the program

chesshacker · Jan 4, 2023 · 6430029 · 6430029
1 parent 8c3e7ac
commit 6430029
Show file tree

Hide file tree

Showing 3 changed files with 151 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -32,9 +32,21 @@ set -o allexport; source secrets.env; set +o allexport
   -before 2019-12-28T00:00:00Z
 ```
 
-Note that this initial version runs each request to YouTube Data API in series,
-and this example will run several hundred queries, which could take about a
-minute or two to complete.
+Note that this program runs each request to YouTube Data API in series, and runs
+two queries for every 50 videos. If your query returns a lot of videos, it could
+take a minute or two to return.
+
+I wrote a Python version of this program. You can run:
+
+```
+pip install -r requirements.txt
+set -o allexport; source secrets.env; set +o allexport
+python youtube-video-list.py \
+  --channel UCdoadna9HFHsxXWhafhNvKw \
+  --after 2019-12-01T00:00:00Z \
+  --before 2019-12-28T00:00:00Z
+```
+
 
 [YouTube Data API Overview]: https://developers.google.com/youtube/v3/getting-started
 [AWS Events]: https://www.youtube.com/channel/UCdoadna9HFHsxXWhafhNvKw/videos

diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,23 @@
+cachetools==5.2.0
+certifi==2022.12.7
+charset-normalizer==2.1.1
+google-api-core==2.11.0
+google-api-python-client==2.70.0
+google-auth==2.15.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.8.0
+googleapis-common-protos==1.57.0
+httplib2==0.21.0
+idna==3.4
+more-itertools==9.0.0
+oauthlib==3.2.2
+protobuf==4.21.12
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pyparsing==3.0.9
+requests==2.28.1
+requests-oauthlib==1.3.1
+rsa==4.9
+six==1.16.0
+uritemplate==4.1.1
+urllib3==1.26.13
diff --git a/youtube-video-list.py b/youtube-video-list.py
@@ -0,0 +1,113 @@
+import csv
+import os
+import argparse
+import sys
+
+from more_itertools import grouper
+from googleapiclient.discovery import build
+
+
+MAX_RESULTS = 50
+
+
+def get_video_ids(youtube, channel_id, published_after=None, published_before=None):
+    """
+    Get a list of video IDs for a channel published between specified dates (optional).
+    """
+    video_ids = []
+    next_page_token = ""
+
+    while True:
+        search_list = youtube.search().list(
+            part="id",
+            channelId=channel_id,
+            type="video",
+            order="date",
+            publishedAfter=published_after,
+            publishedBefore=published_before,
+            maxResults=MAX_RESULTS,
+            pageToken=next_page_token
+        ).execute()
+        video_ids.extend([item["id"]["videoId"]
+                         for item in search_list["items"]])
+        next_page_token = search_list.get("nextPageToken")
+
+        # If there is no next page, break the loop
+        if not next_page_token:
+            break
+    return video_ids
+
+
+def get_video_data(youtube, video_ids):
+    """
+    Get the view count, title, and video URL for a list of video IDs.
+    """
+    video_data = []
+
+    # Iterate through the video IDs in chunks of MAX_RESULTS
+    for video_ids_chunk in grouper(video_ids, MAX_RESULTS):
+        # Remove None values from the final chunk
+        video_ids_joined = ",".join(
+            [x for x in video_ids_chunk if x is not None])
+        videos_list = youtube.videos().list(
+            id=video_ids_joined,
+            part="snippet,statistics"
+        ).execute()
+
+        # Iterate through the video data and add it to the list
+        for video in videos_list["items"]:
+            video_data.append({
+                "Views": video["statistics"]["viewCount"],
+                "Title": video["snippet"]["title"],
+                "URL": f"https://www.youtube.com/watch?v={video['id']}"
+            })
+
+    # Sort the video data by view count in descending order
+    video_data.sort(key=lambda x: int(x["Views"]), reverse=True)
+
+    return video_data
+
+
+def write_csv(video_data):
+    """
+    Write the video data to STDOUT as CSV.
+    """
+    with sys.stdout as csv_file:
+        writer = csv.DictWriter(csv_file, fieldnames=["Views", "Title", "URL"])
+        writer.writeheader()
+        writer.writerows(video_data)
+
+
+def get_inputs():
+    """
+    Get inputs from command line and environment variables.
+    """
+    parser = argparse.ArgumentParser(
+        description="Get YouTube video data for a channel between and write it out in CSV format.")
+    parser.add_argument(
+        "--channel", help="The ID of the YouTube channel to search.")
+    parser.add_argument(
+        "--after", help="The published after date to search from in the format YYYY-MM-DDTHH:MM:SSZ.", default=None)
+    parser.add_argument(
+        "--before", help="The published before date to search from in the format YYYY-MM-DDTHH:MM:SSZ.", default=None)
+    args = parser.parse_args()
+
+    return {
+        "api_key": os.environ.get("APIKEY"),
+        "channel": args.channel,
+        "after": args.after,
+        "before": args.before,
+    }
+
+
+def main():
+    inputs = get_inputs()
+    youtube = build("youtube", "v3", developerKey=inputs['api_key'])
+    video_ids = get_video_ids(
+        youtube, inputs['channel'], inputs['after'], inputs['before'])
+    video_data = get_video_data(youtube, video_ids)
+    write_csv(video_data)
+
+
+if __name__ == "__main__":
+    main()