From eba77f467b75e31415e3a2a2b858cd83c90f79e1 Mon Sep 17 00:00:00 2001 From: shrikrishna <15bcs044@smvdu.ac.in> Date: Fri, 23 Mar 2018 12:40:24 +0530 Subject: [PATCH] Execute one regex at a time Closes https://github.com/retr0h/git-url-parse/issues/5 --- giturlparse/parser.py | 49 ++++++++++++++++--------------------------- 1 file changed, 18 insertions(+), 31 deletions(-) diff --git a/giturlparse/parser.py b/giturlparse/parser.py index 36ae2bc..951e3ba 100644 --- a/giturlparse/parser.py +++ b/giturlparse/parser.py @@ -61,7 +61,7 @@ def parse(self): d = { 'pathname': None, 'protocols': self._get_protocols(), - 'protocol': None, + 'protocol': 'ssh', 'href': self._url, 'resource': None, 'user': None, @@ -69,36 +69,23 @@ def parse(self): 'name': None, 'owner': None, } - - regexp = (r'^(https?|git|ssh|rsync)\://' - '(?:(.+)@)*' - '([a-z0-9_.-]*)' - '[:/]*' - '([\d]+){0,1}' - '(/(.+)/(.+).git)') - m1 = re.search(regexp, self._url) - - regexp = (r'^(?:(.+)@)*' - '([a-z0-9_.-]*)[:/]*' - '([\d]+){0,1}' - '([:/](.+)/(.+).git)') - m2 = re.search(regexp, self._url) - - if m1: - d['pathname'] = m1.group(5) - d['protocol'] = m1.group(1) - d['resource'] = m1.group(3) - d['user'] = m1.group(2) - d['port'] = m1.group(4) - d['name'] = m1.group(7) - d['owner'] = m1.group(6) - elif m2: - d['pathname'] = re.sub(r'^:', '', m2.group(4)) - d['protocol'] = 'ssh' - d['resource'] = m2.group(2) - d['user'] = m2.group(1) - d['name'] = m2.group(6) - d['owner'] = m2.group(5) + regexes = [ + (r'^(?Phttps?|git|ssh|rsync)\://' + '(?:(?P.+)@)*' + '(?P[a-z0-9_.-]*)' + '[:/]*' + '(?P[\d]+){0,1}' + '(?P\/(?P.+)/(?P.+).git)'), + (r'^(?:(?P.+)@)*' + '(?P[a-z0-9_.-]*)[:/]*' + '(?P[\d]+){0,1}' + '[:](?P\/?(?P.+)/(?P.+).git)') + ] + for regex in regexes: + if re.search(regex, self._url): + m = re.search(regex, self._url) + d.update( m.groupdict() ) + break else: msg = "Invalid URL '{}'".format(self._url) raise ParserError(msg)