Skip to content

Commit

Permalink
Add python version of the program
Browse files Browse the repository at this point in the history
  • Loading branch information
chesshacker committed Jan 4, 2023
1 parent 8c3e7ac commit 6430029
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 3 deletions.
18 changes: 15 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,21 @@ set -o allexport; source secrets.env; set +o allexport
-before 2019-12-28T00:00:00Z
```

Note that this initial version runs each request to YouTube Data API in series,
and this example will run several hundred queries, which could take about a
minute or two to complete.
Note that this program runs each request to YouTube Data API in series, and runs
two queries for every 50 videos. If your query returns a lot of videos, it could
take a minute or two to return.

I wrote a Python version of this program. You can run:

```
pip install -r requirements.txt
set -o allexport; source secrets.env; set +o allexport
python youtube-video-list.py \
--channel UCdoadna9HFHsxXWhafhNvKw \
--after 2019-12-01T00:00:00Z \
--before 2019-12-28T00:00:00Z
```


[YouTube Data API Overview]: https://developers.google.com/youtube/v3/getting-started
[AWS Events]: https://www.youtube.com/channel/UCdoadna9HFHsxXWhafhNvKw/videos
Expand Down
23 changes: 23 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
cachetools==5.2.0
certifi==2022.12.7
charset-normalizer==2.1.1
google-api-core==2.11.0
google-api-python-client==2.70.0
google-auth==2.15.0
google-auth-httplib2==0.1.0
google-auth-oauthlib==0.8.0
googleapis-common-protos==1.57.0
httplib2==0.21.0
idna==3.4
more-itertools==9.0.0
oauthlib==3.2.2
protobuf==4.21.12
pyasn1==0.4.8
pyasn1-modules==0.2.8
pyparsing==3.0.9
requests==2.28.1
requests-oauthlib==1.3.1
rsa==4.9
six==1.16.0
uritemplate==4.1.1
urllib3==1.26.13
113 changes: 113 additions & 0 deletions youtube-video-list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import csv
import os
import argparse
import sys

from more_itertools import grouper
from googleapiclient.discovery import build


MAX_RESULTS = 50


def get_video_ids(youtube, channel_id, published_after=None, published_before=None):
"""
Get a list of video IDs for a channel published between specified dates (optional).
"""
video_ids = []
next_page_token = ""

while True:
search_list = youtube.search().list(
part="id",
channelId=channel_id,
type="video",
order="date",
publishedAfter=published_after,
publishedBefore=published_before,
maxResults=MAX_RESULTS,
pageToken=next_page_token
).execute()
video_ids.extend([item["id"]["videoId"]
for item in search_list["items"]])
next_page_token = search_list.get("nextPageToken")

# If there is no next page, break the loop
if not next_page_token:
break
return video_ids


def get_video_data(youtube, video_ids):
"""
Get the view count, title, and video URL for a list of video IDs.
"""
video_data = []

# Iterate through the video IDs in chunks of MAX_RESULTS
for video_ids_chunk in grouper(video_ids, MAX_RESULTS):
# Remove None values from the final chunk
video_ids_joined = ",".join(
[x for x in video_ids_chunk if x is not None])
videos_list = youtube.videos().list(
id=video_ids_joined,
part="snippet,statistics"
).execute()

# Iterate through the video data and add it to the list
for video in videos_list["items"]:
video_data.append({
"Views": video["statistics"]["viewCount"],
"Title": video["snippet"]["title"],
"URL": f"https://www.youtube.com/watch?v={video['id']}"
})

# Sort the video data by view count in descending order
video_data.sort(key=lambda x: int(x["Views"]), reverse=True)

return video_data


def write_csv(video_data):
"""
Write the video data to STDOUT as CSV.
"""
with sys.stdout as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=["Views", "Title", "URL"])
writer.writeheader()
writer.writerows(video_data)


def get_inputs():
"""
Get inputs from command line and environment variables.
"""
parser = argparse.ArgumentParser(
description="Get YouTube video data for a channel between and write it out in CSV format.")
parser.add_argument(
"--channel", help="The ID of the YouTube channel to search.")
parser.add_argument(
"--after", help="The published after date to search from in the format YYYY-MM-DDTHH:MM:SSZ.", default=None)
parser.add_argument(
"--before", help="The published before date to search from in the format YYYY-MM-DDTHH:MM:SSZ.", default=None)
args = parser.parse_args()

return {
"api_key": os.environ.get("APIKEY"),
"channel": args.channel,
"after": args.after,
"before": args.before,
}


def main():
inputs = get_inputs()
youtube = build("youtube", "v3", developerKey=inputs['api_key'])
video_ids = get_video_ids(
youtube, inputs['channel'], inputs['after'], inputs['before'])
video_data = get_video_data(youtube, video_ids)
write_csv(video_data)


if __name__ == "__main__":
main()

0 comments on commit 6430029

Please sign in to comment.