Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
bbpb >= 1.4.1
beautifulsoup4 >= 4
dnslib < 1
netaddr >= 1.3.0
networkx == 3.*
Expand Down
55 changes: 53 additions & 2 deletions unfurl/parsers/parse_shortlink.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
import json
import os

from bs4 import BeautifulSoup


shortlink_edge = {
'color': {
Expand All @@ -39,6 +41,22 @@ def expand_bitly_url(bitlink_id, api_key):
else:
return {}

def parse_linkedin_slink_url(shortcode):
r = requests.get(url=f'https://www.linkedin.com/slink?code={shortcode}')
soup = BeautifulSoup(r.content, 'html.parser')
link = soup.select_one("main a.artdeco-button")
if link.get('href'):
return link.get('href')
return {}


def expand_vdg_url(shortcode):
# Ref: https://v.gd/apilookupreference.php
r = requests.get(url='https://v.gd/forward.php', params={'shorturl': shortcode, 'format': 'json'})
if r.status_code == 200:
return r.json().get('url')
return {}


def expand_url_via_redirect_header(base_url, shortcode):
r = requests.get(f'{base_url}{shortcode.rstrip("/")}', allow_redirects=False)
Expand All @@ -60,7 +78,7 @@ def run(unfurl, node):
# this works.
if node.data_type == 'url.query.pair' and node.key == 'code':
if 'linkedin.com' in preceding_domain:
expanded_url = expand_url_via_redirect_header('https://www.linkedin.com/slink?code=', node.value)
expanded_url = parse_linkedin_slink_url(node.value)
if expanded_url:
unfurl.add_to_queue(
data_type='url', key=None, value=expanded_url,
Expand All @@ -84,6 +102,25 @@ def run(unfurl, node):
if node.data_type != 'url.path':
return

if 'lnkd.in' == preceding_domain:
expanded_url = parse_linkedin_slink_url(node.value[1:])
if expanded_url:
unfurl.add_to_queue(
data_type='url', key=None, value=expanded_url,
label=f'Expanded URL: {expanded_url}',
hover='Expanded URL, retrieved from linkedin.com via redirect page',
parent_id=node.node_id, incoming_edge_config=shortlink_edge)
return

if 'v.gd' == preceding_domain:
expanded_url = expand_vdg_url(node.value[1:])
if expanded_url:
unfurl.add_to_queue(
data_type='url', key=None, value=expanded_url,
label=f'Expanded URL: {expanded_url}',
hover='Expanded URL, retrieved from v.gd via their API',
parent_id=node.node_id, incoming_edge_config=shortlink_edge)

bitly_domains = ['bit.ly', 'bitly.com', 'j.mp']
if any(bitly_domain in unfurl.find_preceding_domain(node) for bitly_domain in bitly_domains):
expanded_info = expand_bitly_url(node.value[1:], unfurl.api_keys.get('bitly', os.environ.get('bitly')))
Expand Down Expand Up @@ -127,11 +164,13 @@ def run(unfurl, node):
{'domain': 'ift.tt', 'base_url': 'https://ift.tt/'},
{'domain': 'is.gd', 'base_url': 'https://is.gd/'},
{'domain': 'lc.chat', 'base_url': 'https://lc.chat/'},
{'domain': 'lnkd.in', 'base_url': 'https://www.linkedin.com/slink?code='},
{'domain': 'nyti.ms', 'base_url': 'https://nyti.ms/'},
{'domain': 'okt.to', 'base_url': 'https://okt.to/'},
{'domain': 'ow.ly', 'base_url': 'http://ow.ly/'},
{'domain': 'reut.rs', 'base_url': 'https://reut.rs/'},
{'domain': 'rb.gy', 'base_url': 'https://rb.gy/'},
{'domain': 'sansurl.com', 'base_url': 'https://sansurl.com/'},
{'domain': 's.id', 'base_url': 'https://s.id/'},
{'domain': 'snip.ly', 'base_url': 'https://snip.ly/'},
{'domain': 't.co', 'base_url': 'https://t.co/'},
{'domain': 't.ly', 'base_url': 'https://t.ly/'},
Expand All @@ -153,6 +192,18 @@ def run(unfurl, node):
parent_id=node.node_id, incoming_edge_config=shortlink_edge)
return

# Guess that any domain + tld that is less than eight characters is a link shortener, and try to
# expand it via a 301/302 Location header.
if len(preceding_domain) < 8:
expanded_url = expand_url_via_redirect_header(f'https://{preceding_domain}/', node.value[1:])
if expanded_url:
unfurl.add_to_queue(
data_type='url', key=None, value=expanded_url,
label=f'Expanded URL: {expanded_url}',
hover=f'Expanded URL, retrieved from {preceding_domain} via "Location" header',
parent_id=node.node_id, incoming_edge_config=shortlink_edge)
return

# Get the list of "known" URL shortener domains from MISP; many of these seem to be deprecated.
# Try to expand the shortlink via a 301/302 Location header; if the site uses something like a meta refresh,
# this won't work.
Expand Down
40 changes: 20 additions & 20 deletions unfurl/tests/unit/test_shortlink.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,26 @@
import unittest


class TestBitly(unittest.TestCase):

# def test_linkedin_shortlink(self):
# """ Test a LinkedIn shortlink; these work a little different than the rest"""
#
# test = Unfurl(remote_lookups=True)
# test.add_to_queue(data_type='url', key=None, value='https://lnkd.in/fDJnJ64')
# test.parse_queue()
#
# # test number of nodes
# self.assertEqual(len(test.nodes.keys()), 18)
# self.assertEqual(test.total_nodes, 18)
#
# self.assertEqual(test.nodes[4].value, '/fDJnJ64')
# self.assertEqual(test.nodes[11].value, 'thisweekin4n6.com')
# self.assertEqual(test.nodes[18].key, 4)
#
# # is processing finished empty
# self.assertTrue(test.queue.empty())
# self.assertEqual(len(test.edges), 0)
class TestShortLinks(unittest.TestCase):

def test_linkedin_shortlink(self):
""" Test a LinkedIn shortlink; these work a little different from the rest"""

test = Unfurl(remote_lookups=True)
test.add_to_queue(data_type='url', key=None, value='https://lnkd.in/fDJnJ64')
test.parse_queue()

# test number of nodes
self.assertEqual(len(test.nodes.keys()), 19)
self.assertEqual(test.total_nodes, 19)

self.assertEqual(test.nodes[4].value, '/fDJnJ64')
self.assertEqual(test.nodes[12].value, 'thisweekin4n6.com')
self.assertEqual(test.nodes[19].key, 4)

# is processing finished empty
self.assertTrue(test.queue.empty())
self.assertEqual(len(test.edges), 0)

def test_twitter_shortlink(self):
""" Test a Twitter shortlink; these use 301 redirects like most shortlinks"""
Expand Down
Loading