Skip to content

Commit

Permalink
Merge pull request #20 from jaysonsantos/speed-improvements
Browse files Browse the repository at this point in the history
Make the code faster by compiling regex on global context and reusing named collection.
  • Loading branch information
retr0h authored Jan 21, 2019
2 parents 16c4cef + d470a9d commit 249795e
Showing 1 changed file with 44 additions and 46 deletions.
90 changes: 44 additions & 46 deletions giturlparse/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,43 @@
import collections
import re

Parsed = collections.namedtuple('Parsed', [
'pathname',
'protocols',
'protocol',
'href',
'resource',
'user',
'port',
'name',
'owner',
])

POSSIBLE_REGEXES = (
re.compile(r'^(?P<protocol>https?|git|ssh|rsync)\://'
r'(?:(?P<user>.+)@)*'
r'(?P<resource>[a-z0-9_.-]*)'
r'[:/]*'
r'(?P<port>[\d]+){0,1}'
r'(?P<pathname>\/(?P<owner>.+)/(?P<name>.+).git)'),
re.compile(r'(git\+)?'
r'((?P<protocol>\w+)://)'
r'((?P<user>\w+)@)?'
r'((?P<resource>[\w\.\-]+))'
r'(:(?P<port>\d+))?'
r'(?P<pathname>(\/(?P<owner>\w+)/)?'
r'(\/?(?P<name>[\w\-]+)(\.git)?)?)'),
re.compile(r'^(?:(?P<user>.+)@)*'
r'(?P<resource>[a-z0-9_.-]*)[:/]*'
r'(?P<port>[\d]+){0,1}'
r'[:](?P<pathname>\/?(?P<owner>.+)/(?P<name>.+).git)'),
re.compile(r'((?P<user>\w+)@)?'
r'((?P<resource>[\w\.\-]+))'
r'[\:\/]{1,2}'
r'(?P<pathname>((?P<owner>\w+)/)?'
r'((?P<name>[\w\-]+)(\.git)?)?)'),
)


class ParserError(Exception):
""" Error raised when a URL can't be parsed. """
Expand All @@ -37,19 +74,6 @@ class Parser(object):
def __init__(self, url):
self._url = url

def get_parsed(self):
return collections.namedtuple('Parsed', [
'pathname',
'protocols',
'protocol',
'href',
'resource',
'user',
'port',
'name',
'owner',
])

def parse(self):
"""
Parses a GIT URL and returns an object. Raises an exception on invalid
Expand All @@ -69,47 +93,21 @@ def parse(self):
'name': None,
'owner': None,
}
regexes = [
(r'^(?P<protocol>https?|git|ssh|rsync)\://'
r'(?:(?P<user>.+)@)*'
r'(?P<resource>[a-z0-9_.-]*)'
r'[:/]*'
r'(?P<port>[\d]+){0,1}'
r'(?P<pathname>\/(?P<owner>.+)/(?P<name>.+).git)'),
(r'(git\+)?'
r'((?P<protocol>\w+)://)'
r'((?P<user>\w+)@)?'
r'((?P<resource>[\w\.\-]+))'
r'(:(?P<port>\d+))?'
r'(?P<pathname>(\/(?P<owner>\w+)/)?'
r'(\/?(?P<name>[\w\-]+)(\.git)?)?)'),
(r'^(?:(?P<user>.+)@)*'
r'(?P<resource>[a-z0-9_.-]*)[:/]*'
r'(?P<port>[\d]+){0,1}'
r'[:](?P<pathname>\/?(?P<owner>.+)/(?P<name>.+).git)'),
(r'((?P<user>\w+)@)?'
r'((?P<resource>[\w\.\-]+))'
r'[\:\/]{1,2}'
r'(?P<pathname>((?P<owner>\w+)/)?'
r'((?P<name>[\w\-]+)(\.git)?)?)'),
]
for regex in regexes:
if re.search(regex, self._url):
m = re.search(regex, self._url)
d.update(m.groupdict())
for regex in POSSIBLE_REGEXES:
match = regex.search(self._url)
if match:
d.update(match.groupdict())
break
else:
msg = "Invalid URL '{}'".format(self._url)
raise ParserError(msg)

p = self.get_parsed()

return p(**d)
return Parsed(**d)

def _get_protocols(self):
try:
index = self._url.index('://')

return self._url[0:index].split('+')
except ValueError:
return []

return self._url[:index].split('+')

0 comments on commit 249795e

Please sign in to comment.