Skip to content

Commit b7d8320

Browse files
committed
handle github repos
Signed-off-by: Isaac Milarsky <[email protected]>
1 parent 440a41f commit b7d8320

File tree

1 file changed

+48
-9
lines changed

1 file changed

+48
-9
lines changed

codejson_index_generator/parsers.py

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import argparse
55
import os
66
import requests
7+
import re
78

89
from time import sleep, mktime, gmtime, time, localtime
910
from typing import Dict, Optional
@@ -54,7 +55,32 @@ def hit_endpoint(url,token,method='GET'):
5455
return response_json
5556

5657

58+
def get_repo_owner_and_name(repo_http_url):
59+
""" Gets the owner and repo from a url.
5760
61+
Args:
62+
url: Github url
63+
64+
Returns:
65+
Tuple of owner and repo. Or a tuple of None and None if the url is invalid.
66+
"""
67+
68+
# Regular expression to parse a GitHub URL into two groups
69+
# The first group contains the owner of the github repo extracted from the url
70+
# The second group contains the name of the github repo extracted from the url
71+
# 'But what is a regular expression?' ----> https://docs.python.org/3/howto/regex.html
72+
regex = r"https?:\/\/github\.com\/([A-Za-z0-9 \- _]+)\/([A-Za-z0-9 \- _ \.]+)(.git)?\/?$"
73+
result = re.search(regex, repo_http_url)
74+
75+
if not result:
76+
return None, None
77+
78+
capturing_groups = result.groups()
79+
80+
owner = capturing_groups[0]
81+
repo = capturing_groups[1]
82+
83+
return owner, repo
5884

5985

6086

@@ -71,19 +97,29 @@ def __init__(self, agency: str, version: str, token: Optional[str] = None,):
7197
"releases": []
7298
}
7399

74-
def get_code_json(self, repo: str) -> Optional[Dict]:
100+
self.token = token
101+
102+
def get_code_json_github(self,repo : str) -> Optional[Dict]:
75103
try:
76-
content = repo.get_contents("code.json", ref = repo.default_branch)
77-
except GithubException as e:
104+
owner,name = get_repo_owner_and_name(repo)
105+
code_json_endpoint = f"https://api.github.com/repos/{owner}/{name}/contents/code.json"
106+
content_dict = hit_endpoint(code_json_endpoint,self.token)#repo.get_contents("code.json", ref = repo.default_branch)
107+
except Exception as e:
78108
print(f"GitHub Error: {e.data.get('message', 'No message available')}")
79109
return None
80110

81111
try:
82-
decoded_content = base64.b64decode(content.content)
112+
decoded_content = base64.b64decode(content_dict['content'])
83113
return json.loads(decoded_content)
84114
except (json.JSONDecodeError, ValueError) as e:
85115
print(f"JSON Error: {str(e)}")
86116
return None
117+
118+
def get_code_json(self, repo: str) -> Optional[Dict]:
119+
if 'github' in repo:
120+
return self.get_code_json_github(repo)
121+
else:
122+
return None
87123

88124
def save_code_json(self, repo: str, output_path: str) -> Optional[str]:
89125

@@ -107,16 +143,19 @@ def update_index(self, index: Dict, code_json: Dict, org_name: str, repo_name: s
107143

108144
index['releases'].append(baseline)
109145

110-
def get_org_repos(self, org_name: str) -> list[Organization]:
146+
def get_org_repos(self, org_name: str) -> list[Dict]:
111147
try:
112-
org = self.github.get_organization(org_name)
148+
org_endpoint = f"https://api.github.com/orgs/{org_name}/repos"
113149
print(f"\nProcessing organization: {org_name}")
114150

115-
total_repos = org.public_repos
151+
repo_list = hit_endpoint(org_endpoint,self.token)
152+
153+
154+
total_repos = len(repo_list)
116155
print(f"Found {total_repos} public repositories")
117156

118-
return total_repos
119-
except GithubException as e:
157+
return repo_list
158+
except Exception as e:
120159
raise e
121160

122161
def save_organization_files(self, org_name: str, codeJSONPath) -> None:

0 commit comments

Comments
 (0)