diff --git a/xgoogle/browser.py b/xgoogle/browser.py index c7d2618..1afb369 100755 --- a/xgoogle/browser.py +++ b/xgoogle/browser.py @@ -81,12 +81,11 @@ def __init__(self, user_agent=BROWSERS[0], debug=False, use_pool=False): self.debug = debug def get_page(self, url, data=None): - handlers = [PoolHTTPHandler] - opener = urllib2.build_opener(*handlers) - if data: data = urllib.urlencode(data) - request = urllib2.Request(url, data, self.headers) + opener = urllib2.build_opener() + opener.addheaders = [('User-agent', 'Mozilla/5.0')] + try: - response = opener.open(request) + response = opener.open(url) return response.read() except (urllib2.HTTPError, urllib2.URLError), e: raise BrowserError(url, str(e)) diff --git a/xgoogle/search.py b/xgoogle/search.py index 98b681e..1b0a534 100755 --- a/xgoogle/search.py +++ b/xgoogle/search.py @@ -205,7 +205,6 @@ def _get_results_page(self): page = self.browser.get_page(safe_url) except BrowserError, e: raise SearchError, "Failed getting %s: %s" % (e.url, e.error) - return BeautifulSoup(page) def _extract_info(self, soup): @@ -226,7 +225,7 @@ def _extract_info(self, soup): return {'from': int(matches.group(1)), 'to': int(matches.group(2)), 'total': int(matches.group(3))} def _extract_results(self, soup): - results = soup.findAll('li', {'class': 'g'}) + results = soup.findAll('div', {'class': 'g'}) ret_res = [] for result in results: eres = self._extract_result(result) @@ -237,7 +236,8 @@ def _extract_results(self, soup): def _extract_result(self, result): title, url = self._extract_title_url(result) desc = self._extract_description(result) - if not title or not url or not desc: + desc = desc is None and desc or "" + if not title or not url : return None return SearchResult(title, url, desc)