Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions google/modules/standard_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def __init__(self):
self.page = None # Results page this one was on
self.index = None # What index on this page it was on
self.number_of_results = None # The total number of results the query returned
self.knowledge_result = None # Result of the knowledge-panel scraping

def __repr__(self):
name = self._limit_str_size(self.name, 55)
Expand Down Expand Up @@ -76,6 +77,9 @@ def search(query, pages=1, lang='en', area='com', ncr=False, void=True, time_per
results_div = soup.find("div", attrs={"id": "resultStats"})
number_of_results = _get_number_of_results(results_div)

knowledge_panel = soup.find("div",
attrs={"class": "knowledge-panel"})

j = 0
for li in divs:
res = GoogleResult()
Expand All @@ -90,6 +94,8 @@ def search(query, pages=1, lang='en', area='com', ncr=False, void=True, time_per
res.thumb = _get_thumb()
res.cached = _get_cached(li)
res.number_of_results = number_of_results
if knowledge_panel:
res.knowledge_result = _get_knowledge_panel(knowledge_panel)

if void is True:
if res.description is None:
Expand Down Expand Up @@ -196,6 +202,38 @@ def _get_description(li):
return None


def _get_knowledge_panel(panel):
"""Return the knowledge panel results of a google search."""
header = panel.find("div", attrs={"class": "kp-header"})
subheader = header.find("div", attrs={"class": "kp-hc"})
name, desc = "", ""
if subheader:
name_divs = subheader.findAll("div", attrs={"class": "kno-ecr-pt"})
for x in name_divs:
if x.find("span"):
name += x.find("span").text + " "
desc_divs = subheader.findAll("div", attrs={"class": "vk_gy"})
for x in desc_divs:
if x.findAll("span"):
for span in x.findAll("span"):
desc += span.text + " "

else:
name_desc = header.findAll("div", attrs={"class": "kno-fb-ctx"})

name_tag = [x for x in name_desc if "kno-ecr-pt" in x.attrs["class"]]
for x in name_tag:
if x.find("span"):
if x.find("span").text:
name += x.find("span").text + " "
desc_tag = [x for x in name_desc if "kno-ecr-pt" not in x.attrs["class"]]
for x in desc_tag:
if x.find("span"):
if x.find("span").text:
desc += x.find("span").text + " "
return "{}; {}".format(name.strip(), desc.strip())


def _get_thumb():
"""Return the link to a thumbnail of the website."""
pass
Expand Down
2 changes: 1 addition & 1 deletion google/modules/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def _get_search_url(query, page=0, per_page=10, lang='en', area='com', ncr=False
# google instant)

params = {
'nl': lang,
'hl': lang,
'q': query.encode('utf8'),
'start': page * per_page,
'num': per_page
Expand Down