Skip to content

Commit

Permalink
Fix twitter URL information grabbing. Related to vitorfs#241 and Enco…
Browse files Browse the repository at this point in the history
…ntrosDigitais#26

Twitter has no <head> and <title> tags
  • Loading branch information
Vamoss committed Jul 9, 2021
1 parent 5f6d84c commit 212d268
Showing 1 changed file with 11 additions and 4 deletions.
15 changes: 11 additions & 4 deletions bootcamp/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,11 @@ def get_metadata(url):
url = f"http://{parsed_url.path}"

try:
response = requests.get(url, timeout=0.9)
headers = {
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36",
"content-type":"text"
}
response = requests.get(url, timeout=0.9, headers=headers)
response.raise_for_status()

except requests.exceptions.ConnectionError:
Expand All @@ -150,12 +154,15 @@ def get_metadata(url):
)

soup = bs4.BeautifulSoup(response.content)
ogs = soup.html.head.find_all(property=re.compile(r"^og"))
data = {og.get("property")[3:]: og.get("content") for og in ogs}
data = {}
if soup.html.head:
ogs = soup.html.head.find_all(property=re.compile(r"^og"))
data = {og.get("property")[3:]: og.get("content") for og in ogs}

if not data.get("url"):
data["url"] = url

if not data.get("title"):
if not data.get("title") and soup.html.title:
data["title"] = soup.html.title.text

if not data.get("image"):
Expand Down

0 comments on commit 212d268

Please sign in to comment.