From aebd137729a6975c50592157ec6be95f757e0510 Mon Sep 17 00:00:00 2001
From: Engjell Avdiu <engjellavdiu01@gmail.com>
Date: Mon, 30 Oct 2023 20:28:22 +0100
Subject: [PATCH 1/4] Add support for fetching sponsored Google search results

This commit introduces the ability to fetch sponsored search results from Google.

- Extended the `SearchResult` class to include a `is_sponsored` boolean field.
- Modified the `search` function to accept a new parameter `sponsored` that toggles whether or not to include sponsored results.
- Added additional parsing logic to identify and include sponsored results in the output.

Note: Ensure compliance with Google's terms of service when using this feature.
---
 googlesearch/__init__.py | 54 +++++++++++++++++++++++++++++-----------
 1 file changed, 39 insertions(+), 15 deletions(-)

diff --git a/googlesearch/__init__.py b/googlesearch/__init__.py
index 74e6564..4ea8eaa 100644
--- a/googlesearch/__init__.py
+++ b/googlesearch/__init__.py
@@ -14,30 +14,31 @@ def _req(term, results, lang, start, proxies, timeout):
         },
         params={
             "q": term,
-            "num": results + 2,  # Prevents multiple requests
+            "num": results,
             "hl": lang,
             "start": start,
         },
         proxies=proxies,
         timeout=timeout,
     )
+
     resp.raise_for_status()
     return resp
 
-
 class SearchResult:
-    def __init__(self, url, title, description):
+    def __init__(self, url, title, description, is_sponsored):
         self.url = url
         self.title = title
         self.description = description
+        self.is_sponsored = is_sponsored
 
     def __repr__(self):
-        return f"SearchResult(url={self.url}, title={self.title}, description={self.description})"
-
-
-def search(term, num_results=10, lang="en", proxy=None, advanced=False, sleep_interval=0, timeout=5):
-    """Search the Google search engine"""
-
+        if self.is_sponsored:
+            return f"SearchResult(url={self.url}, title={self.title}, description={self.description}, is_sponsored={self.is_sponsored})"
+        else:
+            return f"SearchResult(url={self.url}, title={self.title}, description={self.description})"
+   
+def search(term, sponsored=False, num_results=10, lang="en", proxy=None, advanced=False, sleep_interval=0, timeout=5):
     escaped_term = urllib.parse.quote_plus(term) # make 'site:xxx.xxx.xxx ' works.
 
     # Proxy
@@ -52,29 +53,52 @@ def search(term, num_results=10, lang="en", proxy=None, advanced=False, sleep_in
     start = 0
     while start < num_results:
         # Send request
-        resp = _req(escaped_term, num_results - start,
+        resp = _req(escaped_term,num_results - start,
                     lang, start, proxies, timeout)
 
         # Parse
         soup = BeautifulSoup(resp.text, "html.parser")
+
+        # Check for sponsored results
+        if sponsored:
+            sponsored_block = soup.find_all("div", attrs={"class": "vdQmEd"})
+            if len(sponsored_block) == 0:
+                start += 1
+            for sponsored_result in sponsored_block:
+                link = sponsored_result.find("a", href=True,attrs={"class":"sVXRqc"}).get("href")
+                title = sponsored_result.find("span", attrs={"class":"OSrXXb"})
+                description_box = sponsored_result.find(lambda tag: tag.name == 'span' and not tag.has_attr('class'))
+
+                if description_box:
+                    description = description_box.text
+                    if link and title and description:
+                        start += 1
+                        if advanced:
+                            yield SearchResult(link, title.text, description, True)
+                        else: 
+                            yield link
+                            
+        # Check for not sponsored results           
         result_block = soup.find_all("div", attrs={"class": "g"})
-        if len(result_block) ==0:
+        if len(result_block) == 0:
             start += 1
         for result in result_block:
             # Find link, title, description
-            link = result.find("a", href=True)
+            link = result.find("a", href=True).get("href")
             title = result.find("h3")
             description_box = result.find(
                 "div", {"style": "-webkit-line-clamp:2"})
+
             if description_box:
                 description = description_box.text
                 if link and title and description:
                     start += 1
                     if advanced:
-                        yield SearchResult(link["href"], title.text, description)
+                        yield SearchResult(link, title.text, description, False)
                     else:
-                        yield link["href"]
+                        yield link
+                    
         sleep(sleep_interval)
 
         if start == 0:
-            return []
+            return []
\ No newline at end of file

From fce19265a963a7f0695ee89cb9a4e66baa857a3d Mon Sep 17 00:00:00 2001
From: Engjell Avdiu <engjellavdiu01@gmail.com>
Date: Mon, 30 Oct 2023 20:30:17 +0100
Subject: [PATCH 2/4] Add to_df function to convert search results to DataFrame

This commit adds a utility function `to_df` that converts the search results to a Pandas DataFrame for easier manipulation and analysis.

- Created `to_df` function that takes an iterable of `SearchResult` objects.
- The function enumerates through the search results, extracting `url`, `title`, and `description`, and optionally `is_sponsored` if present.
- Returns a Pandas DataFrame containing these details.

This feature enhances data manipulation capabilities, making it easier to process and analyze search results.
---
 googlesearch/__init__.py | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/googlesearch/__init__.py b/googlesearch/__init__.py
index 4ea8eaa..500112d 100644
--- a/googlesearch/__init__.py
+++ b/googlesearch/__init__.py
@@ -4,7 +4,7 @@
 from requests import get
 from .user_agents import get_useragent
 import urllib
-
+import pandas as pd
 
 def _req(term, results, lang, start, proxies, timeout):
     resp = get(
@@ -25,6 +25,23 @@ def _req(term, results, lang, start, proxies, timeout):
     resp.raise_for_status()
     return resp
 
+def to_df(search_results):
+    data = []
+    for index, result in enumerate(search_results):
+        result_data = {
+            'index': index + 1,
+            'url': result.url,
+            'title': result.title,
+            'description': result.description
+        }
+        if hasattr(result, 'is_sponsored'):
+            result_data['is_sponsored'] = result.is_sponsored
+        data.append(result_data)
+
+    df = pd.DataFrame(data)
+
+    return df
+
 class SearchResult:
     def __init__(self, url, title, description, is_sponsored):
         self.url = url

From 90076a88caf0a65d4ae304e0e125e3a6f9edd605 Mon Sep 17 00:00:00 2001
From: Engjell Avdiu <engjellavdiu01@gmail.com>
Date: Mon, 30 Oct 2023 20:35:53 +0100
Subject: [PATCH 3/4] Update README to include new features and usage

This commit updates the README to reflect the recent changes and new features added to the library:

- Added section on fetching sponsored results using the `sponsored` parameter.
- Included information about the `to_df` function for converting search results to a Pandas DataFrame.

The update aims to provide users with a comprehensive guide to using the latest version of the library.
---
 README.md | 36 ++++++++++++++++++++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index e43f1bc..2333671 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,17 @@
 # googlesearch
-googlesearch is a Python library for searching Google, easily. googlesearch uses requests and BeautifulSoup4 to scrape Google. 
+
+googlesearch is a Python library for searching Google, easily. googlesearch uses requests and BeautifulSoup4 to scrape Google.
 
 ## Installation
+
 To install, run the following command:
+
 ```bash
 python3 -m pip install googlesearch-python
 ```
 
 ## Usage
+
 To get results for a search term, simply use the search function in googlesearch. For example, to get results for "Google" in Google, just run the following program:
 ```python
 from googlesearch import search
@@ -15,17 +19,23 @@ search("Google")
 ```
 
 ## Additional options
+
 googlesearch supports a few additional options. By default, googlesearch returns 10 results. This can be changed. To get a 100 results on Google for example, run the following program.
+
 ```python
 from googlesearch import search
 search("Google", num_results=100)
 ```
+
 In addition, you can change the language google searches in. For example, to get results in French run the following program:
+
 ```python
 from googlesearch import search
 search("Google", lang="fr")
 ```
+
 To extract more information, such as the description or the result URL, use an advanced search:
+
 ```python
 from googlesearch import search
 search("Google", advanced=True)
@@ -35,8 +45,30 @@ search("Google", advanced=True)
 # - url
 # - description
 ```
+
+To also fetch sponsored results along with general search results, use the sponsored parameter:
+
+```python
+from googlesearch import search
+search("Google", sponsored=True)
+# Returns a list of SearchResult
+# Properties:
+# - title
+# - url
+# - description
+# - sponsored
+```
+
+To convert the search results to a Pandas DataFrame, use the to_df function:
+
+```python
+from googlesearch import to_df
+df = to_df(advanced_results)
+```
+
 If requesting more than 100 results, googlesearch will send multiple requests to go through the pages. To increase the time between these requests, use `sleep_interval`:
+
 ```python
 from googlesearch import search
 search("Google", sleep_interval=5, num_results=200)
-```
\ No newline at end of file
+```

From 6b20c0191d0ec280f6385d90dfbbbbbf2b72f6ed Mon Sep 17 00:00:00 2001
From: Engjell Avdiu <engjellavdiu01@gmail.com>
Date: Thu, 9 Nov 2023 19:51:56 +0100
Subject: [PATCH 4/4] add pandas to requirements

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 56399db..db44518 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,3 @@
 beautifulsoup4>=4.9
 requests>=2.20
+pandas