v0.5 Update

SimplySecurity · Nov 21, 2015 · cb1515b · cb1515b
1 parent 66a2661
commit cb1515b
Show file tree

Hide file tree

Showing 5 changed files with 174 additions and 22 deletions.
diff --git a/Common/SimplyEmail.ini b/Common/SimplyEmail.ini
@@ -79,3 +79,9 @@ QueryStart: 1
 [RedditPostSearch]
 QueryLimit: 200
 QueryStart: 0
+
+# Settings for Google Search
+[GooglePDFSearch]
+StartQuantity: 0
+QueryLimit: 50
+QueryStart: 0
diff --git a/Common/TaskController.py b/Common/TaskController.py
@@ -36,7 +36,7 @@ def __init__(self):
         self.ConsumerList = []
         self.HtmlList = []
         self.Tasks = []
-        self.version = "0.4"
+        self.version = "0.5"
         self.ResultsList = []
 
     def ConfigSectionMap(section):

diff --git a/Modules/GooglePDFSearch.py b/Modules/GooglePDFSearch.py
@@ -0,0 +1,139 @@
+ #!/usr/bin/env python
+
+# Class will have the following properties:
+# 1) name / description
+# 2) main name called "ClassName"
+# 3) execute function (calls everthing it neeeds)
+# 4) places the findings into a queue
+import re
+import requests
+import urlparse
+import os
+import configparser
+import requests
+import time
+from Helpers import helpers
+from Helpers import Parser
+from BeautifulSoup import BeautifulSoup
+from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
+from pdfminer.converter import TextConverter
+from pdfminer.layout import LAParams
+from pdfminer.pdfpage import PDFPage
+from cStringIO import StringIO
+
+class ClassName:
+
+    def __init__(self, Domain, verbose=False):
+        self.name = "Google PDF Search for Emails"
+        self.description = "Uses google Dorking to search for emails"
+        config = configparser.ConfigParser()
+        try:
+            config.read('Common/SimplyEmail.ini')
+            self.Domain = Domain
+            self.Quanity = int(config['GooglePDFSearch']['StartQuantity'])
+            self.UserAgent = {
+                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
+            self.Limit = int(config['GooglePDFSearch']['QueryLimit'])
+            self.Counter = int(config['GooglePDFSearch']['QueryStart'])
+            self.verbose = verbose
+            self.urlList = []
+            self.Text = ""
+        except:
+            print helpers.color("[*] Major Settings for GooglePDFSearch are missing, EXITING!\n", warning=True)
+
+    def execute(self):
+        self.search()
+        FinalOutput, HtmlResults = self.get_emails()
+        return FinalOutput, HtmlResults
+
+
+    def convert_pdf_to_txt(self, path):
+        rsrcmgr = PDFResourceManager()
+        retstr = StringIO()
+        codec = 'utf-8'
+        laparams = LAParams()
+        device = TextConverter(rsrcmgr, retstr, codec=codec, laparams=laparams)
+        fp = file(path, 'rb')
+        interpreter = PDFPageInterpreter(rsrcmgr, device)
+        password = ""
+        maxpages = 0
+        caching = True
+        pagenos=set()
+
+        for page in PDFPage.get_pages(fp, pagenos, maxpages=maxpages, password=password,caching=caching, check_extractable=True):
+            interpreter.process_page(page)
+
+        text = retstr.getvalue()
+
+        fp.close()
+        device.close()
+        retstr.close()
+        return text
+
+
+    def download_file(self, url):
+        local_filename = url.split('/')[-1]
+        # NOTE the stream=True parameter
+        r = requests.get(url, stream=True)
+        with open(local_filename, 'wb') as f:
+            for chunk in r.iter_content(chunk_size=1024): 
+                if chunk: # filter out keep-alive new chunks
+                    f.write(chunk)
+                    #f.flush() commented by recommendation from J.F.Sebastian
+        return local_filename
+
+
+    def search(self):
+        while self.Counter <= self.Limit and self.Counter <= 100:
+            time.sleep(1)
+            if self.verbose:
+                p = '[*] Google PDF Search on page: ' + str(self.Counter)
+                print helpers.color(p, firewall=True)
+            try:
+                urly = "https://www.google.com/search?q=site:" + self.Domain + "+filetype:pdf&start=" + str(self.Counter)
+            except Exception as e:
+                error = "[!] Major issue with Google Search:" + str(e)
+                print helpers.color(error, warning=True)
+            try:
+                r = requests.get(urly)
+            except Exception as e:
+                error = "[!] Fail during Request to Google (Check Connection):" + \
+                    str(e)
+                print helpers.color(error, warning=True)
+            RawHtml = r.content
+            soup = BeautifulSoup(RawHtml)
+            for a in soup.findAll('a'):
+                  try:
+                    # https://stackoverflow.com/questions/21934004/not-getting-proper-links-
+                    # from-google-search-results-using-mechanize-and-beautifu/22155412#22155412?
+                    # newreg=01f0ed80771f4dfaa269b15268b3f9a9
+                    l = urlparse.parse_qs(urlparse.urlparse(a['href']).query)['q'][0]
+                    if l.startswith('http') or l.startswith('www'):
+                      if "webcache.googleusercontent.com" not in l:
+                        self.urlList.append(l)
+                  except:
+                    pass
+            self.Counter += 10
+        # now download the required files
+        try:
+          for url in self.urlList:
+            if self.verbose:
+              p = '[*] Google PDF search downloading: ' + str(url)
+              print helpers.color(p, firewall=True)
+            try:
+              FileName = self.download_file(url)
+              self.Text += self.convert_pdf_to_txt(FileName)
+              os.remove(FileName)
+            except Exception as e:
+              pass
+        except:
+          print helpers.color("[*] No PDF's to download from google!\n", firewall=true)
+
+
+    def get_emails(self):
+        Parse = Parser.Parser(self.Text)
+        Parse.genericClean()
+        Parse.urlClean()
+        FinalOutput = Parse.GrepFindEmails()
+        HtmlResults = Parse.BuildResults(FinalOutput,self.name)
+        return FinalOutput, HtmlResults
diff --git a/README.md b/README.md
@@ -35,7 +35,7 @@ root@kali:~/Desktop/SimplyEmail# ./Setup.sh
 ### Standard Help
 ```
  ============================================================
- Curent Version: 0.4 | Website: CyberSyndicates.com
+ Curent Version: 0.5 | Website: CyberSyndicates.com
  ============================================================
  Twitter: @real_slacker007 |  Twitter: @Killswitch_gui
  ============================================================
@@ -45,7 +45,7 @@ root@kali:~/Desktop/SimplyEmail# ./Setup.sh
 /$$$$$$  $$$$$$$$/ _____  ____   ______ $$/$$ |
 $$ \__$$/$$ |__   /     \/    \ /      \/  $$ |
 $$      \$$    |  $$$$$$ $$$$  |$$$$$$  $$ $$ |
- $$$$$$  $$$$$/   $$ | $$ | $$ |/     $$ $$ $$ |
+ $$$$$$  $$$$$/   $$ | $$ | $$ |/    $$ $$ $$ |
 /  \__$$ $$ |_____$$ | $$ | $$ /$$$$$$$ $$ $$ |
 $$    $$/$$       $$ | $$ | $$ $$    $$ $$ $$ |
  $$$$$$/ $$$$$$$$/$$/  $$/  $$/ $$$$$$$/$$/$$/
@@ -84,7 +84,7 @@ This will run ALL modules that are have API Key placed in the SimpleEmail.ini fi
 root@vapt-kali:~/Desktop/SimplyEmail# ./SimplyEmail.py -l
 
  ============================================================
- Curent Version: 0.4 | Website: CyberSyndicates.com
+ Curent Version: 0.5 | Website: CyberSyndicates.com
  ============================================================
  Twitter: @real_slacker007 |  Twitter: @Killswitch_gui
  ============================================================
@@ -94,28 +94,30 @@ root@vapt-kali:~/Desktop/SimplyEmail# ./SimplyEmail.py -l
 /$$$$$$  $$$$$$$$/ _____  ____   ______ $$/$$ |
 $$ \__$$/$$ |__   /     \/    \ /      \/  $$ |
 $$      \$$    |  $$$$$$ $$$$  |$$$$$$  $$ $$ |
- $$$$$$  $$$$$/   $$ | $$ | $$ |/     $$ $$ $$ |
+ $$$$$$  $$$$$/   $$ | $$ | $$ |/    $$ $$ $$ |
 /  \__$$ $$ |_____$$ | $$ | $$ /$$$$$$$ $$ $$ |
 $$    $$/$$       $$ | $$ | $$ $$    $$ $$ $$ |
  $$$$$$/ $$$$$$$$/$$/  $$/  $$/ $$$$$$$/$$/$$/
 
 ------------------------------------------------------------
  [*] Available Modules are:
 
-	1)	Modules/HtmlScrape.py   
-	2)	Modules/GitHubUserSearch.py
-	3)	Modules/Whoisolgy.py    
-	4)	Modules/CanaryBinSearch.py
-	5)	Modules/YahooSearch.py  
-	6)	Modules/GitHubCodeSearch.py
-	7)	Modules/AskSearch.py    
-	8)	Modules/EmailHunter.py  
-	9)	Modules/WhoisAPISearch.py
-	10)	Modules/SearchPGP.py    
-	11)	Modules/GoogleSearch.py 
-	12)	Modules/GitHubGistSearch.py
-	13)	Modules/OnionStagram.py 
-	14)	Modules/FlickrSearch.py 
+  1)  Modules/GooglePDFSearch.py
+  2)  Modules/HtmlScrape.py   
+  3)  Modules/GitHubUserSearch.py
+  4)  Modules/Whoisolgy.py    
+  5)  Modules/CanaryBinSearch.py
+  6)  Modules/YahooSearch.py  
+  7)  Modules/GitHubCodeSearch.py
+  8)  Modules/OnionStagram.py 
+  9)  Modules/AskSearch.py    
+  10) Modules/EmailHunter.py  
+  11) Modules/WhoisAPISearch.py
+  12) Modules/SearchPGP.py    
+  13) Modules/GoogleSearch.py 
+  14) Modules/GitHubGistSearch.py
+  15) Modules/RedditPostSearch.py
+  16) Modules/FlickrSearch.py 
 ```
 ## Understanding Reporting Options:
 One of the most frustrating aspects of Pen-testing is the tools' ability
@@ -180,13 +182,18 @@ As I mentioned before a powerful function that I wanted to integrate was the abi
 - shinichiro.hamaji--at--gmail.com
 
 ##Build Log:
-####Changelog (Current v0.4):
+####Changelog (Current v0.5):
 ```
+===================================
+Modules Added in v0.5
+-----------------------------
+(x) Reddit Post Search added
+(x) Google PDF search
+
 ===================================
 Modules Added in v0.4
 -----------------------------
 (x) GitHubUser added
-(x) Reddit Post Search added
 
 Issues Fixed in v0.4:
 -----------------------------
@@ -257,7 +264,6 @@ Modules Under Dev:
 ( ) StartPage Search (can help with captcha issues)
 ( ) GitHub User Search
 ( ) Searching SEC Data
-( ) PDFMiner 
 ( ) Exalead Search - Port from theHarvester
 ( ) PwnBin Search 
 ( ) PasteBin Searches 

diff --git a/Setup.sh b/Setup.sh
@@ -43,6 +43,7 @@ func_install_requests(){
   sudo pip install --upgrade requests 
   sudo pip install configparser --upgrade
   sudo pip install BeautifulSoup --upgrade
+  sudo pip install pdfminer --upgrade
   chmod 755 SimplyEmail.py
 
 }