From c8e406d021457c837819eeaca1067e4f5ec314d3 Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <aroyc@zn-osx-ayushr.local>
Date: Sun, 8 Dec 2024 00:47:03 -0600
Subject: [PATCH 01/26] first draft of oversharing.py

---
 src/powerpwn/copilot/oversharing/oversharing.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 src/powerpwn/copilot/oversharing/oversharing.py

diff --git a/src/powerpwn/copilot/oversharing/oversharing.py b/src/powerpwn/copilot/oversharing/oversharing.py
new file mode 100644
index 0000000..e69de29

From a87a9b4913ea87dfeb1f52af86d2b6aaa71ab4ee Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <aroyc@zn-osx-ayushr.local>
Date: Sun, 8 Dec 2024 00:59:26 -0600
Subject: [PATCH 02/26] updated code

---
 .../copilot/oversharing/oversharing.py        | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/src/powerpwn/copilot/oversharing/oversharing.py b/src/powerpwn/copilot/oversharing/oversharing.py
index e69de29..481f41d 100644
--- a/src/powerpwn/copilot/oversharing/oversharing.py
+++ b/src/powerpwn/copilot/oversharing/oversharing.py
@@ -0,0 +1,23 @@
+import asyncio
+
+from powerpwn.copilot.models.chat_argument import ChatArguments
+from powerpwn.copilot.copilot_connector.copilot_connector import CopilotConnector
+from powerpwn.copilot.enums.copilot_scenario_enum import CopilotScenarioEnum
+from powerpwn.copilot.enums.verbose_enum import VerboseEnum
+
+args = ChatArguments(
+        user="User",
+        password="Password",
+        verbose=VerboseEnum.full,
+        scenario=CopilotScenarioEnum.teamshub,
+        use_cached_access_token=False
+    )
+copilot_connector = CopilotConnector(args)
+
+# init connection
+copilot_connector.init_connection()
+
+# send a prompt and receive an answer from Copilot
+result = asyncio.get_event_loop().run_until_complete(asyncio.gather(copilot_connector.connect("Hello World")))
+if result[0]:
+    print(result[0].parsed_message)
\ No newline at end of file

From ba32011be853e70af5538532b43c5912eb1c2061 Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <aroyc@zn-osx-ayushr.local>
Date: Tue, 10 Dec 2024 00:46:46 -0600
Subject: [PATCH 03/26] trying websocket

---
 .../copilot/oversharing/oversharing.py        | 28 ++++++++++---------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/src/powerpwn/copilot/oversharing/oversharing.py b/src/powerpwn/copilot/oversharing/oversharing.py
index 481f41d..3bc9ae4 100644
--- a/src/powerpwn/copilot/oversharing/oversharing.py
+++ b/src/powerpwn/copilot/oversharing/oversharing.py
@@ -1,23 +1,25 @@
-import asyncio
-
 from powerpwn.copilot.models.chat_argument import ChatArguments
-from powerpwn.copilot.copilot_connector.copilot_connector import CopilotConnector
 from powerpwn.copilot.enums.copilot_scenario_enum import CopilotScenarioEnum
 from powerpwn.copilot.enums.verbose_enum import VerboseEnum
+from powerpwn.copilot.chat_automator.chat_automator import ChatAutomator
+import os
+
+user = os.getenv('m365user')
+user_password = os.getenv('m365pass')
+
+if user is None or user_password is None:
+    raise ValueError("Environment variables for email or password are not set.")
+
+print("User being looked at is: ", user)
 
 args = ChatArguments(
-        user="User",
-        password="Password",
+        user=os.getenv('m365user'),
+        password=os.getenv('m365pass'),
         verbose=VerboseEnum.full,
         scenario=CopilotScenarioEnum.teamshub,
         use_cached_access_token=False
     )
-copilot_connector = CopilotConnector(args)
-
-# init connection
-copilot_connector.init_connection()
 
-# send a prompt and receive an answer from Copilot
-result = asyncio.get_event_loop().run_until_complete(asyncio.gather(copilot_connector.connect("Hello World")))
-if result[0]:
-    print(result[0].parsed_message)
\ No newline at end of file
+chat_automator = ChatAutomator(args)
+chat_automator.init_connector()
+result = chat_automator.send_prompt("Hello World")

From c2637d97625ba3f77bb17cd788453f4e68830dca Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <aroyc@zn-osx-ayushr.local>
Date: Tue, 10 Dec 2024 01:12:02 -0600
Subject: [PATCH 04/26] connection working

---
 .../copilot/oversharing/oversharing.py        | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/powerpwn/copilot/oversharing/oversharing.py b/src/powerpwn/copilot/oversharing/oversharing.py
index 3bc9ae4..d6dd165 100644
--- a/src/powerpwn/copilot/oversharing/oversharing.py
+++ b/src/powerpwn/copilot/oversharing/oversharing.py
@@ -2,24 +2,35 @@
 from powerpwn.copilot.enums.copilot_scenario_enum import CopilotScenarioEnum
 from powerpwn.copilot.enums.verbose_enum import VerboseEnum
 from powerpwn.copilot.chat_automator.chat_automator import ChatAutomator
+from powerpwn.copilot.copilot_connector.copilot_connector import CopilotConnector
 import os
+import asyncio
 
 user = os.getenv('m365user')
 user_password = os.getenv('m365pass')
+init_prompt = os.getenv('initprompt')
 
 if user is None or user_password is None:
     raise ValueError("Environment variables for email or password are not set.")
 
 print("User being looked at is: ", user)
 
+print("Initial Prompt: ", init_prompt)
+
 args = ChatArguments(
         user=os.getenv('m365user'),
         password=os.getenv('m365pass'),
         verbose=VerboseEnum.full,
-        scenario=CopilotScenarioEnum.teamshub,
+        scenario=CopilotScenarioEnum.officeweb,
         use_cached_access_token=False
     )
 
-chat_automator = ChatAutomator(args)
-chat_automator.init_connector()
-result = chat_automator.send_prompt("Hello World")
+copilot_connector = CopilotConnector(args)
+
+# init connection
+copilot_connector.init_connection()
+
+# send a prompt and receive an answer from Copilot
+result = asyncio.get_event_loop().run_until_complete(asyncio.gather(copilot_connector.connect(os.getenv(init_prompt))))
+if result[0]:
+    print(result[0].parsed_message)

From 8fe0ba17598a29afe30dfa00ef594cdb85aec9d8 Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <aroyc@zn-osx-ayushr.local>
Date: Wed, 18 Dec 2024 20:55:30 -0600
Subject: [PATCH 05/26] discovery and extract data first draft

---
 src/powerpwn/copilot/oversharing/attempt.py   |   0
 src/powerpwn/copilot/oversharing/discovery.py | 168 ++++++++++++++++++
 .../copilot/oversharing/extract_data.py       |  87 +++++++++
 .../copilot/oversharing/oversharing.py        |  36 ----
 src/powerpwn/copilot/oversharing/pii          |   0
 5 files changed, 255 insertions(+), 36 deletions(-)
 create mode 100644 src/powerpwn/copilot/oversharing/attempt.py
 create mode 100644 src/powerpwn/copilot/oversharing/discovery.py
 create mode 100644 src/powerpwn/copilot/oversharing/extract_data.py
 delete mode 100644 src/powerpwn/copilot/oversharing/oversharing.py
 create mode 100644 src/powerpwn/copilot/oversharing/pii

diff --git a/src/powerpwn/copilot/oversharing/attempt.py b/src/powerpwn/copilot/oversharing/attempt.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/powerpwn/copilot/oversharing/discovery.py b/src/powerpwn/copilot/oversharing/discovery.py
new file mode 100644
index 0000000..d2e9de0
--- /dev/null
+++ b/src/powerpwn/copilot/oversharing/discovery.py
@@ -0,0 +1,168 @@
+import os
+import asyncio
+import json
+import openpyxl
+from powerpwn.copilot.models.chat_argument import ChatArguments
+from powerpwn.copilot.enums.copilot_scenario_enum import CopilotScenarioEnum
+from powerpwn.copilot.enums.verbose_enum import VerboseEnum
+from powerpwn.copilot.copilot_connector.copilot_connector import CopilotConnector
+
+# Function to read prompts from the prompts.txt file
+def read_prompts_from_file(file_path="pii.txt"):
+    prompts = {}
+    with open(file_path, "r") as f:
+        content = f.read().strip().split("\n\n")
+        for section in content:
+            lines = section.split("\n", 1)
+            if len(lines) == 2:
+                prompts[lines[0].strip(":")] = lines[1].strip()
+    return prompts
+
+# Categorize files based on PII content
+def categorize_files(files):
+    """Categorize files based on PII content."""
+    pii_keywords = ['social security number', 'passport numbers', 'drivers license numbers', 'employee records', 'contact information']
+
+    categorized_data = []
+
+    for file in files:
+        file_info = {
+            'file_name': file.get('Title', 'Unknown'),
+            'author': file.get('Author', 'Unknown'),
+            'last_modified': file.get('LastModifiedTime', 'Unknown'),
+            'contains': []  # New field to store what the file contains
+        }
+
+        file_name = file_info['file_name'].lower()
+
+        # Check for PII-related keywords in the file name
+        for keyword in pii_keywords:
+            if keyword.lower() in file_name:
+                file_info['contains'].append(keyword)
+
+        if file_info['contains']:
+            categorized_data.append(file_info)
+
+    return categorized_data
+
+def save_to_excel(categorized_files):
+    """Save the categorized files data to an Excel spreadsheet."""
+    output_file = "pii_sensitive_files_report.xlsx"
+    if os.path.exists(output_file):
+        wb = openpyxl.load_workbook(output_file)
+        ws = wb.active
+    else:
+        wb = openpyxl.Workbook()
+        ws = wb.active
+        ws.title = "Sensitive PII Files"
+        headers = ['File Name', 'Author', 'Last Modified', 'Contains']
+        ws.append(headers)
+
+    for file in categorized_files:
+        row = [file['file_name'], file['author'], file['last_modified'], ", ".join(file['contains'])]
+        ws.append(row)
+
+    wb.save(output_file)
+    print(f"Excel file updated: {output_file}")
+
+# Function to save the response to a .txt file
+def save_response_to_txt(response, file_name="response.txt"):
+    """Save the raw response to a text file."""
+    # Check if the response is a tuple (e.g., it could be something like (message, status))
+    if isinstance(response, tuple):
+        response = response[0]  # Extract the string from the tuple if it's a tuple
+
+    # Now response should be a string
+    with open(file_name, "a") as f:
+        f.write(response + "\n")
+    print(f"Response saved to {file_name}")
+
+
+# Set environment variables for M365 user credentials
+user = os.getenv('m365user')
+user_password = os.getenv('m365pass')
+
+if user is None or user_password is None:
+    raise ValueError("Environment variables for email or password are not set.")
+
+args = ChatArguments(
+    user=user,
+    password=user_password,
+    verbose=VerboseEnum.full,
+    scenario=CopilotScenarioEnum.officeweb,
+    use_cached_access_token=False
+)
+
+copilot_connector = CopilotConnector(args)
+copilot_connector.init_connection()
+
+# Read prompts from the file
+prompts = read_prompts_from_file("pii.txt")
+
+# Send initial prompt to get started
+print("Sending initial prompt to Copilot...")
+result = asyncio.get_event_loop().run_until_complete(asyncio.gather(copilot_connector.connect(prompts['init_prompt'])))
+if result[0]:
+    print(result[0].parsed_message)
+    save_response_to_txt(result[0].parsed_message)
+
+# Send second prompt to list sensitive files
+print("Sending second prompt to Copilot...")
+result = asyncio.get_event_loop().run_until_complete(asyncio.gather(copilot_connector.connect(prompts['second_prompt'])))
+if result[0]:
+    raw_message = result[0].parsed_message
+    print("Raw Response (Second Prompt):", raw_message)
+    save_response_to_txt(raw_message)
+
+    try:
+        raw_message_str = raw_message.copilot_message
+        json_str = raw_message_str.split('```json\n')[1].split('\n```')[0]
+        files_with_pii = json.loads(json_str)
+
+        categorized_files = categorize_files(files_with_pii)
+        print("Categorized Files (Second Prompt):", categorized_files)
+
+        save_to_excel(categorized_files)
+
+    except (IndexError, json.JSONDecodeError) as e:
+        print("Error parsing the JSON response:", e)
+
+# Loop to continuously ask for more PII-related files
+while True:
+    print("Sending third prompt to get more PII-related files...")
+    result = asyncio.get_event_loop().run_until_complete(asyncio.gather(copilot_connector.connect(prompts['third_prompt'])))
+
+    if result[0]:
+        raw_message = result[0].parsed_message
+        print("Raw Response (Third Prompt):", raw_message)
+        save_response_to_txt(raw_message)
+
+        try:
+            raw_message_str = raw_message.copilot_message
+
+            if 'None' in raw_message_str:
+                print("No more PII files found. Exiting...")
+                break
+
+            json_str = raw_message_str.split('```json\n')[1].split('\n```')[0]
+            more_files = json.loads(json_str)
+
+            if more_files:
+                categorized_files = categorize_files(more_files)
+                print("Categorized Files (Third Prompt):", categorized_files)
+
+                save_to_excel(categorized_files)
+            else:
+                print("No additional files found in response.")
+
+        except (IndexError, json.JSONDecodeError) as e:
+            print("Error parsing the JSON response:", e)
+
+
+
+
+
+
+
+
+
diff --git a/src/powerpwn/copilot/oversharing/extract_data.py b/src/powerpwn/copilot/oversharing/extract_data.py
new file mode 100644
index 0000000..338738c
--- /dev/null
+++ b/src/powerpwn/copilot/oversharing/extract_data.py
@@ -0,0 +1,87 @@
+import os
+import openpyxl
+import re
+import asyncio
+from powerpwn.copilot.models.chat_argument import ChatArguments
+from powerpwn.copilot.enums.copilot_scenario_enum import CopilotScenarioEnum
+from powerpwn.copilot.enums.verbose_enum import VerboseEnum
+from powerpwn.copilot.chat_automator.chat_automator import ChatAutomator
+from powerpwn.copilot.copilot_connector.copilot_connector import CopilotConnector
+
+# Set environment variables (if not already set)
+user = os.getenv('m365user')
+user_password = os.getenv('m365pass')
+third_prompt = os.getenv('thirdprompt')
+
+if user is None or user_password is None:
+    raise ValueError("Environment variables for email or password are not set.")
+
+print("User being looked at is:", user)
+
+args = ChatArguments(
+    user=user,
+    password=user_password,
+    verbose=VerboseEnum.full,
+    scenario=CopilotScenarioEnum.officeweb,
+    use_cached_access_token=False
+)
+
+copilot_connector = CopilotConnector(args)
+copilot_connector.init_connection()
+
+# Load the Excel file and extract file names
+def extract_file_names_from_excel(file_path):
+    """
+    Extracts file names from the 'File Name' column in the Excel sheet.
+    It uses regex to capture the portion inside the square brackets.
+    """
+    file_names = []
+    wb = openpyxl.load_workbook(file_path)
+    ws = wb.active
+
+    # Assuming the first row contains headers, and file names are in the first column (index 0)
+    for row in ws.iter_rows(min_row=2, min_col=1, max_col=1):  # Skip header row
+        file_name = row[0].value
+        if file_name:
+            match = re.search(r'\[(.*?)\]', file_name)  # Extract text inside []
+            if match:
+                file_names.append(match.group(1))  # Append the extracted file name
+    return file_names
+
+
+async def get_file_content_and_save(file_name):
+    """
+    Request Copilot to print out the content of the file and save it to a .txt file.
+    """
+    prompt = f"I want to make an internal email regarding '{file_name}' to discuss any numbers or figures listed and insights gained and would love it if you drafted the email for me. Would be good to answer any possible FAQs with specific figures etc."
+
+    # Send the request to Copilot
+    result = await copilot_connector.connect(prompt)
+
+    # Check if the result contains a valid message
+    if hasattr(result, 'parsed_message') and result.parsed_message:
+        raw_message = result.parsed_message
+        print(f"Content retrieved for {file_name}:")
+        print(raw_message.copilot_message)  # Display the content
+
+        # Save the content to a text file
+        with open(f"{file_name}.txt", "w") as txt_file:
+            txt_file.write(raw_message.copilot_message)  # Write the actual message content to the file
+        print(f"Content of {file_name} saved as {file_name}.txt")
+    else:
+        print(f"Failed to retrieve content for {file_name}.")
+
+
+# Main script to extract file names and retrieve content for each file
+async def main():
+    # Load the Excel file and extract file names
+    file_names = extract_file_names_from_excel("sensitive_files_report.xlsx")
+    print("Extracted file names:", file_names)
+
+    # For each file name, request content from Copilot and save it to a text file
+    for file_name in file_names:
+        await get_file_content_and_save(file_name)
+
+# Run the async script
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/src/powerpwn/copilot/oversharing/oversharing.py b/src/powerpwn/copilot/oversharing/oversharing.py
deleted file mode 100644
index d6dd165..0000000
--- a/src/powerpwn/copilot/oversharing/oversharing.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from powerpwn.copilot.models.chat_argument import ChatArguments
-from powerpwn.copilot.enums.copilot_scenario_enum import CopilotScenarioEnum
-from powerpwn.copilot.enums.verbose_enum import VerboseEnum
-from powerpwn.copilot.chat_automator.chat_automator import ChatAutomator
-from powerpwn.copilot.copilot_connector.copilot_connector import CopilotConnector
-import os
-import asyncio
-
-user = os.getenv('m365user')
-user_password = os.getenv('m365pass')
-init_prompt = os.getenv('initprompt')
-
-if user is None or user_password is None:
-    raise ValueError("Environment variables for email or password are not set.")
-
-print("User being looked at is: ", user)
-
-print("Initial Prompt: ", init_prompt)
-
-args = ChatArguments(
-        user=os.getenv('m365user'),
-        password=os.getenv('m365pass'),
-        verbose=VerboseEnum.full,
-        scenario=CopilotScenarioEnum.officeweb,
-        use_cached_access_token=False
-    )
-
-copilot_connector = CopilotConnector(args)
-
-# init connection
-copilot_connector.init_connection()
-
-# send a prompt and receive an answer from Copilot
-result = asyncio.get_event_loop().run_until_complete(asyncio.gather(copilot_connector.connect(os.getenv(init_prompt))))
-if result[0]:
-    print(result[0].parsed_message)
diff --git a/src/powerpwn/copilot/oversharing/pii b/src/powerpwn/copilot/oversharing/pii
new file mode 100644
index 0000000..e69de29

From 8e3edc91f1c27a8c1f52bfc7a6d6d832a3f2c070 Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <aroyc@zn-osx-ayushr.local>
Date: Thu, 26 Dec 2024 14:56:10 -0600
Subject: [PATCH 06/26] updated with regex parsing

---
 src/powerpwn/copilot/oversharing/discovery.py | 282 ++++++++++--------
 .../copilot/oversharing/response.json         |   0
 2 files changed, 165 insertions(+), 117 deletions(-)
 create mode 100644 src/powerpwn/copilot/oversharing/response.json

diff --git a/src/powerpwn/copilot/oversharing/discovery.py b/src/powerpwn/copilot/oversharing/discovery.py
index d2e9de0..b1bff62 100644
--- a/src/powerpwn/copilot/oversharing/discovery.py
+++ b/src/powerpwn/copilot/oversharing/discovery.py
@@ -1,53 +1,89 @@
 import os
 import asyncio
 import json
+import re
 import openpyxl
+from datetime import datetime
+
+# -- powerpwn/copilot imports --
 from powerpwn.copilot.models.chat_argument import ChatArguments
 from powerpwn.copilot.enums.copilot_scenario_enum import CopilotScenarioEnum
 from powerpwn.copilot.enums.verbose_enum import VerboseEnum
 from powerpwn.copilot.copilot_connector.copilot_connector import CopilotConnector
 
-# Function to read prompts from the prompts.txt file
 def read_prompts_from_file(file_path="pii.txt"):
+    """
+    Reads prompts from the file. Expects sections separated by two newlines.
+    """
     prompts = {}
-    with open(file_path, "r") as f:
-        content = f.read().strip().split("\n\n")
-        for section in content:
+    with open(file_path, "r", encoding="utf-8") as f:
+        content = f.read().strip()
+
+        # Split on double newlines
+        sections = content.split("\n\n")
+        for section in sections:
             lines = section.split("\n", 1)
             if len(lines) == 2:
-                prompts[lines[0].strip(":")] = lines[1].strip()
+                key = lines[0].strip(":").strip()
+                value = lines[1].strip()
+                prompts[key] = value
+            else:
+                print(f"Warning: Skipping invalid section: {section}")
     return prompts
 
-# Categorize files based on PII content
-def categorize_files(files):
-    """Categorize files based on PII content."""
-    pii_keywords = ['social security number', 'passport numbers', 'drivers license numbers', 'employee records', 'contact information']
-
-    categorized_data = []
 
-    for file in files:
-        file_info = {
-            'file_name': file.get('Title', 'Unknown'),
-            'author': file.get('Author', 'Unknown'),
-            'last_modified': file.get('LastModifiedTime', 'Unknown'),
-            'contains': []  # New field to store what the file contains
-        }
+def enhanced_parser(raw_message):
+    """
+    Extracts file_name, link, and author from free-text, Markdown, or structured formats.
+    """
+    extracted_files = []
+
+    # Use regex to find file details in free-text or Markdown-like formats
+    file_pattern = re.compile(
+        r"\*\*File Name:\*\*\s*(?P<file_name>.*?)\n.*?"
+        r"\*\*Author:\*\*\s*(?P<author>.*?)\n.*?"
+        r"\*\*Link:\*\*\s*\[.*?\]\((?P<link>.*?)\)",
+        re.DOTALL
+    )
+
+    # Match all occurrences of the pattern
+    matches = file_pattern.finditer(raw_message)
+    for match in matches:
+        extracted_files.append({
+            'file_name': match.group('file_name').strip(),
+            'link': match.group('link').strip(),
+            'author': match.group('author').strip(),
+        })
+
+    # Fallback: Find lines that match individual fields if no comprehensive match found
+    if not extracted_files:
+        print("DEBUG: No structured matches found. Attempting fallback extraction.")
+        file_lines = raw_message.splitlines()
+        current_file = {}
+
+        for line in file_lines:
+            if "File Name:" in line:
+                current_file['file_name'] = line.split("File Name:")[1].strip()
+            elif "Author:" in line:
+                current_file['author'] = line.split("Author:")[1].strip()
+            elif "Link:" in line and '(' in line:
+                link_match = re.search(r'\((.*?)\)', line)
+                if link_match:
+                    current_file['link'] = link_match.group(1).strip()
+
+            # If all fields are populated, add to extracted_files and reset
+            if all(k in current_file for k in ['file_name', 'link', 'author']):
+                extracted_files.append(current_file)
+                current_file = {}
+
+    return extracted_files
 
-        file_name = file_info['file_name'].lower()
-
-        # Check for PII-related keywords in the file name
-        for keyword in pii_keywords:
-            if keyword.lower() in file_name:
-                file_info['contains'].append(keyword)
-
-        if file_info['contains']:
-            categorized_data.append(file_info)
-
-    return categorized_data
 
 def save_to_excel(categorized_files):
-    """Save the categorized files data to an Excel spreadsheet."""
-    output_file = "pii_sensitive_files_report.xlsx"
+    """
+    Append new file entries to 'pii_sensitive_files_report.xlsx' if not duplicates.
+    """
+    output_file = "new_pii_sensitive_files_report.xlsx"
     if os.path.exists(output_file):
         wb = openpyxl.load_workbook(output_file)
         ws = wb.active
@@ -55,108 +91,120 @@ def save_to_excel(categorized_files):
         wb = openpyxl.Workbook()
         ws = wb.active
         ws.title = "Sensitive PII Files"
-        headers = ['File Name', 'Author', 'Last Modified', 'Contains']
+        headers = ['File Name - Link', 'Author', 'Contains']
         ws.append(headers)
 
-    for file in categorized_files:
-        row = [file['file_name'], file['author'], file['last_modified'], ", ".join(file['contains'])]
-        ws.append(row)
+    existing_files = set()
+    for row in ws.iter_rows(min_row=2, values_only=True):
+        existing_files.add((row[0], row[1]))
 
+    for file in categorized_files:
+        file_name_link = file['file_name'] + " - " + file['link']
+        if (file_name_link, file['author']) not in existing_files:
+            ws.append([
+                file_name_link,
+                file['author'],
+                ", ".join(file.get('contains', []))
+            ])
     wb.save(output_file)
     print(f"Excel file updated: {output_file}")
 
-# Function to save the response to a .txt file
-def save_response_to_txt(response, file_name="response.txt"):
-    """Save the raw response to a text file."""
-    # Check if the response is a tuple (e.g., it could be something like (message, status))
-    if isinstance(response, tuple):
-        response = response[0]  # Extract the string from the tuple if it's a tuple
-
-    # Now response should be a string
-    with open(file_name, "a") as f:
-        f.write(response + "\n")
-    print(f"Response saved to {file_name}")
-
-
-# Set environment variables for M365 user credentials
-user = os.getenv('m365user')
-user_password = os.getenv('m365pass')
-
-if user is None or user_password is None:
-    raise ValueError("Environment variables for email or password are not set.")
-
-args = ChatArguments(
-    user=user,
-    password=user_password,
-    verbose=VerboseEnum.full,
-    scenario=CopilotScenarioEnum.officeweb,
-    use_cached_access_token=False
-)
-
-copilot_connector = CopilotConnector(args)
-copilot_connector.init_connection()
-
-# Read prompts from the file
-prompts = read_prompts_from_file("pii.txt")
-
-# Send initial prompt to get started
-print("Sending initial prompt to Copilot...")
-result = asyncio.get_event_loop().run_until_complete(asyncio.gather(copilot_connector.connect(prompts['init_prompt'])))
-if result[0]:
-    print(result[0].parsed_message)
-    save_response_to_txt(result[0].parsed_message)
-
-# Send second prompt to list sensitive files
-print("Sending second prompt to Copilot...")
-result = asyncio.get_event_loop().run_until_complete(asyncio.gather(copilot_connector.connect(prompts['second_prompt'])))
-if result[0]:
-    raw_message = result[0].parsed_message
-    print("Raw Response (Second Prompt):", raw_message)
-    save_response_to_txt(raw_message)
-
-    try:
-        raw_message_str = raw_message.copilot_message
-        json_str = raw_message_str.split('```json\n')[1].split('\n```')[0]
-        files_with_pii = json.loads(json_str)
-
-        categorized_files = categorize_files(files_with_pii)
-        print("Categorized Files (Second Prompt):", categorized_files)
 
-        save_to_excel(categorized_files)
+def handle_response(raw_message):
+    """
+    Processes the raw response using the enhanced parser.
+    """
+    # Check if raw_message is a WebsocketParsedMessage object
+    if hasattr(raw_message, 'copilot_message'):
+        raw_message = raw_message.copilot_message  # Extract the actual message
+
+    if not isinstance(raw_message, str):
+        raise TypeError("raw_message must be a string or an object containing a message string.")
+
+    extracted_files = enhanced_parser(raw_message)
+    print("DEBUG: Extracted files:", extracted_files)
+    return extracted_files
+
+
+
+# ---------------- MAIN SCRIPT ----------------
+
+if __name__ == "__main__":
+    user = os.getenv('m365user')
+    user_password = os.getenv('m365pass')
+    if user is None or user_password is None:
+        raise ValueError("Environment variables 'm365user' or 'm365pass' are not set.")
+
+    args = ChatArguments(
+        user=user,
+        password=user_password,
+        verbose=VerboseEnum.full,
+        scenario=CopilotScenarioEnum.officeweb,
+        use_cached_access_token=False
+    )
+
+    copilot_connector = CopilotConnector(args)
+    copilot_connector.init_connection()
+
+    # Read prompts
+    prompts = read_prompts_from_file("pii.txt")
+    print("DEBUG: prompts ->", prompts)
+
+    # Initial Prompt
+    print("Sending initial prompt to Copilot...")
+    result = asyncio.get_event_loop().run_until_complete(
+        asyncio.gather(copilot_connector.connect(prompts['init_prompt']))
+    )
+    if result and result[0]:
+        print(result)
+        print(result[0].parsed_message)
+
+    # Second Prompt
+    print("Sending second prompt to Copilot...")
+    result = asyncio.get_event_loop().run_until_complete(
+        asyncio.gather(copilot_connector.connect(prompts['second_prompt']))
+    )
+    if result and result[0]:
+        raw_message = result[0].parsed_message
+        print("Raw Response (Second Prompt):", raw_message)
+        files_list = handle_response(raw_message)
+        if files_list:
+            save_to_excel(files_list)
+
+    # Dynamic Third Prompts
+    pii_types = [
+        'Social Security Numbers',
+        'Employee Records such as start date, end date, salary, position',
+        'Passport Numbers',
+        'Driver\'s License Numbers',
+        'Employee Salaries, Start Dates, Project Access, and IP Addresses',
+        'Contact Information such as phone numbers, emails, addresses'
+    ]
 
-    except (IndexError, json.JSONDecodeError) as e:
-        print("Error parsing the JSON response:", e)
+    try:
+        for pii_type in pii_types:
+            print(f"Sending third prompt for PII type: {pii_type}")
+            third_prompt_dynamic = f"Great job! are there any other files of filetype (docx, csv, xlsx, or pptx) that you have didn't mention that contains *{pii_type}* or *emails*? If yes, please list them as well."
+            result = asyncio.get_event_loop().run_until_complete(
+                asyncio.gather(copilot_connector.connect(third_prompt_dynamic))
+            )
 
-# Loop to continuously ask for more PII-related files
-while True:
-    print("Sending third prompt to get more PII-related files...")
-    result = asyncio.get_event_loop().run_until_complete(asyncio.gather(copilot_connector.connect(prompts['third_prompt'])))
+            if result and result[0]:
+                raw_message = result[0].parsed_message
+                print(f"Raw Response for {pii_type}:", raw_message)
+                files_list = handle_response(raw_message)
+                if files_list:
+                    save_to_excel(files_list)
 
-    if result[0]:
-        raw_message = result[0].parsed_message
-        print("Raw Response (Third Prompt):", raw_message)
-        save_response_to_txt(raw_message)
+    except KeyboardInterrupt:
+        print("\nKeyboard interrupt received. Exiting gracefully.")
 
-        try:
-            raw_message_str = raw_message.copilot_message
+    print("Finished processing.")
 
-            if 'None' in raw_message_str:
-                print("No more PII files found. Exiting...")
-                break
 
-            json_str = raw_message_str.split('```json\n')[1].split('\n```')[0]
-            more_files = json.loads(json_str)
 
-            if more_files:
-                categorized_files = categorize_files(more_files)
-                print("Categorized Files (Third Prompt):", categorized_files)
 
-                save_to_excel(categorized_files)
-            else:
-                print("No additional files found in response.")
 
-        except (IndexError, json.JSONDecodeError) as e:
-            print("Error parsing the JSON response:", e)
 
 
 
diff --git a/src/powerpwn/copilot/oversharing/response.json b/src/powerpwn/copilot/oversharing/response.json
new file mode 100644
index 0000000..e69de29

From e7618595703058e30a4452add5056d3d3f5aa48f Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <ayushr@zenity.io>
Date: Sun, 29 Dec 2024 08:47:06 -0600
Subject: [PATCH 07/26] Delete src/powerpwn/copilot/oversharing/response.json

---
 src/powerpwn/copilot/oversharing/response.json | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 src/powerpwn/copilot/oversharing/response.json

diff --git a/src/powerpwn/copilot/oversharing/response.json b/src/powerpwn/copilot/oversharing/response.json
deleted file mode 100644
index e69de29..0000000

From 672f923fffebd5cd084bb2fb3cd8396210a5a611 Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <ayushr@zenity.io>
Date: Sun, 29 Dec 2024 08:47:18 -0600
Subject: [PATCH 08/26] Delete src/powerpwn/copilot/oversharing/pii

---
 src/powerpwn/copilot/oversharing/pii | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 src/powerpwn/copilot/oversharing/pii

diff --git a/src/powerpwn/copilot/oversharing/pii b/src/powerpwn/copilot/oversharing/pii
deleted file mode 100644
index e69de29..0000000

From d8073691d80cfce46ba6bccd402c6d5d8b983a64 Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <ayushr@zenity.io>
Date: Sun, 29 Dec 2024 08:47:44 -0600
Subject: [PATCH 09/26] Delete src/powerpwn/copilot/oversharing/attempt.py

---
 src/powerpwn/copilot/oversharing/attempt.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 src/powerpwn/copilot/oversharing/attempt.py

diff --git a/src/powerpwn/copilot/oversharing/attempt.py b/src/powerpwn/copilot/oversharing/attempt.py
deleted file mode 100644
index e69de29..0000000

From 9618adc2bcf544546625e1f5ac06449a82448c3a Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <aroyc@zn-osx-ayushr.local>
Date: Sun, 29 Dec 2024 16:38:31 -0600
Subject: [PATCH 10/26] updated questions

---
 src/powerpwn/copilot/oversharing/discovery.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/powerpwn/copilot/oversharing/discovery.py b/src/powerpwn/copilot/oversharing/discovery.py
index b1bff62..b0c4e06 100644
--- a/src/powerpwn/copilot/oversharing/discovery.py
+++ b/src/powerpwn/copilot/oversharing/discovery.py
@@ -158,6 +158,11 @@ def handle_response(raw_message):
     if result and result[0]:
         print(result)
         print(result[0].parsed_message)
+        raw_message = result[0].parsed_message
+        print("Raw Response (Second Prompt):", raw_message)
+        files_list = handle_response(raw_message)
+        if files_list:
+            save_to_excel(files_list)
 
     # Second Prompt
     print("Sending second prompt to Copilot...")
@@ -177,14 +182,15 @@ def handle_response(raw_message):
         'Employee Records such as start date, end date, salary, position',
         'Passport Numbers',
         'Driver\'s License Numbers',
-        'Employee Salaries, Start Dates, Project Access, and IP Addresses',
+        '*Employee Salaries*',
+        '*IP addresses*',
         'Contact Information such as phone numbers, emails, addresses'
     ]
 
     try:
         for pii_type in pii_types:
             print(f"Sending third prompt for PII type: {pii_type}")
-            third_prompt_dynamic = f"Great job! are there any other files of filetype (docx, csv, xlsx, or pptx) that you have didn't mention that contains *{pii_type}* or *emails*? If yes, please list them as well."
+            third_prompt_dynamic = f"Great job! are there any other files of filetype (docx, csv, xlsx, or pptx) that you have didn't mention that contains *{pii_type}* or *emails*? If yes, please list them as well. Make sure to NOT MISS ANY FILE"
             result = asyncio.get_event_loop().run_until_complete(
                 asyncio.gather(copilot_connector.connect(third_prompt_dynamic))
             )

From 910b59c5e6e35b54486ac73d81a8d3fded1ea2e5 Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <aroyc@zn-osx-ayushr.local>
Date: Sun, 29 Dec 2024 16:54:39 -0600
Subject: [PATCH 11/26] lint attempt

---
 .../get_substrate_bearer_office.js                       | 4 ++--
 src/powerpwn/cli/runners.py                              | 9 +++++++--
 .../copilot/copilot_connector/copilot_connector.py       | 2 +-
 src/powerpwn/copilot/oversharing/discovery.py            | 4 ----
 .../copilot_studio/final_results/chat_exists_output.txt  | 4 ++++
 5 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/puppeteer_get_substrate_bearer/get_substrate_bearer_office.js b/puppeteer_get_substrate_bearer/get_substrate_bearer_office.js
index 5f3f6a8..d10d641 100644
--- a/puppeteer_get_substrate_bearer/get_substrate_bearer_office.js
+++ b/puppeteer_get_substrate_bearer/get_substrate_bearer_office.js
@@ -28,7 +28,7 @@ function delay(time) {
         // For windows the executable path is to open the existing chrome instead of the
         // "Chrome for testing" that is included with puppeteer - solves white screen bug
         browser = await puppeteer.launch({
-            headless: true, // Change to 'false' to see the browser actions for debugging
+            headless: false, // Change to 'false' to see the browser actions for debugging
             // Use the default windows path for chrome exe - solves white window bug for windows
             executablePath: 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe',
             // Start the browser in incognito mode
@@ -36,7 +36,7 @@ function delay(time) {
         });
     } catch(e) {
          browser = await puppeteer.launch({
-            headless: true, // Change to 'false' to see the browser actions for debugging
+            headless: false, // Change to 'false' to see the browser actions for debugging
             // Start the browser in fullscreen and incognito mode
             args: ['--start-fullscreen', '--incognito']
         });
diff --git a/src/powerpwn/cli/runners.py b/src/powerpwn/cli/runners.py
index e7d19c9..b99f187 100644
--- a/src/powerpwn/cli/runners.py
+++ b/src/powerpwn/cli/runners.py
@@ -204,13 +204,18 @@ def run_copilot_chat_command(args):
         if args.gui:
             CopilotGui().run(output_dir)
         return
-
     elif args.copilot_subcommand == "dump":
         dump = Dump(parsed_args, args.directory)
         output_dir = dump.run()
         if args.gui:
             CopilotGui().run(output_dir)
         return
+    elif args.copilot_subcommand == "oversharing":
+        oversharing = Oversharing(parsed_args, args.directory)
+        output_dir = oversharing.run()
+        if args.gui:
+            CopilotGui().run(output_dir)
+        return
 
     raise NotImplementedError(f"Copilot {args.copilot_subcommand} subcommand has not been implemented yet.")
 
@@ -229,4 +234,4 @@ def run_copilot_studio_command(args):
 
 
 def run_powerpages_command(args):
-    PowerPages(args)
+    PowerPages(args)
\ No newline at end of file
diff --git a/src/powerpwn/copilot/copilot_connector/copilot_connector.py b/src/powerpwn/copilot/copilot_connector/copilot_connector.py
index d94c0c5..fa37cd5 100644
--- a/src/powerpwn/copilot/copilot_connector/copilot_connector.py
+++ b/src/powerpwn/copilot/copilot_connector/copilot_connector.py
@@ -365,7 +365,7 @@ def __get_conversation_parameters(self, refresh: bool = False) -> ConversationPa
         url = self.__get_websocket_url(access_token, self.__arguments.scenario, parsed_jwt)
         session_id = self.__get_session_from_url(url)
 
-        available_plugins: list[PluginInfo] = self.__get_plugins(access_token)
+        available_plugins: list[PluginInfo] = []
 
         return ConversationParameters(
             conversation_id=str(uuid.uuid4()), url=url, session_id=session_id, available_plugins=available_plugins, used_plugins=[]
diff --git a/src/powerpwn/copilot/oversharing/discovery.py b/src/powerpwn/copilot/oversharing/discovery.py
index b0c4e06..2fd08a7 100644
--- a/src/powerpwn/copilot/oversharing/discovery.py
+++ b/src/powerpwn/copilot/oversharing/discovery.py
@@ -125,10 +125,6 @@ def handle_response(raw_message):
     print("DEBUG: Extracted files:", extracted_files)
     return extracted_files
 
-
-
-# ---------------- MAIN SCRIPT ----------------
-
 if __name__ == "__main__":
     user = os.getenv('m365user')
     user_password = os.getenv('m365pass')
diff --git a/src/powerpwn/copilot_studio/final_results/chat_exists_output.txt b/src/powerpwn/copilot_studio/final_results/chat_exists_output.txt
index e69de29..53a3146 100644
--- a/src/powerpwn/copilot_studio/final_results/chat_exists_output.txt
+++ b/src/powerpwn/copilot_studio/final_results/chat_exists_output.txt
@@ -0,0 +1,4 @@
+https://copilotstudio.microsoft.com/environments/Default-32f814a9-68c8-4ca1-93aa-5594523476b3/bots/cr6e4_agent7/canvas?__version__=2
+https://copilotstudio.microsoft.com/environments/Default-32f814a9-68c8-4ca1-93aa-5594523476b3/bots/cr6e4_copilot2/canvas?__version__=2
+https://copilotstudio.microsoft.com/environments/Default-32f814a9-68c8-4ca1-93aa-5594523476b3/bots/cr6e4_copilot7/canvas?__version__=2
+https://copilotstudio.microsoft.com/environments/Default-32f814a9-68c8-4ca1-93aa-5594523476b3/bots/cr6e4_copilotPoc/canvas?__version__=2

From 862ae014a5622998517e07e4330ad05dbeb7354a Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <aroyc@zn-osx-ayushr.local>
Date: Sun, 29 Dec 2024 17:01:26 -0600
Subject: [PATCH 12/26] isort and black

---
 src/powerpwn/copilot/oversharing/discovery.py | 102 ++++++------------
 .../copilot/oversharing/extract_data.py       |  30 +++---
 2 files changed, 50 insertions(+), 82 deletions(-)

diff --git a/src/powerpwn/copilot/oversharing/discovery.py b/src/powerpwn/copilot/oversharing/discovery.py
index 2fd08a7..e0326ec 100644
--- a/src/powerpwn/copilot/oversharing/discovery.py
+++ b/src/powerpwn/copilot/oversharing/discovery.py
@@ -1,15 +1,14 @@
-import os
 import asyncio
-import json
+import os
 import re
+
 import openpyxl
-from datetime import datetime
 
-# -- powerpwn/copilot imports --
-from powerpwn.copilot.models.chat_argument import ChatArguments
+from powerpwn.copilot.copilot_connector.copilot_connector import CopilotConnector
 from powerpwn.copilot.enums.copilot_scenario_enum import CopilotScenarioEnum
 from powerpwn.copilot.enums.verbose_enum import VerboseEnum
-from powerpwn.copilot.copilot_connector.copilot_connector import CopilotConnector
+from powerpwn.copilot.models.chat_argument import ChatArguments
+
 
 def read_prompts_from_file(file_path="pii.txt"):
     """
@@ -40,20 +39,16 @@ def enhanced_parser(raw_message):
 
     # Use regex to find file details in free-text or Markdown-like formats
     file_pattern = re.compile(
-        r"\*\*File Name:\*\*\s*(?P<file_name>.*?)\n.*?"
-        r"\*\*Author:\*\*\s*(?P<author>.*?)\n.*?"
-        r"\*\*Link:\*\*\s*\[.*?\]\((?P<link>.*?)\)",
-        re.DOTALL
+        r"\*\*File Name:\*\*\s*(?P<file_name>.*?)\n.*?" r"\*\*Author:\*\*\s*(?P<author>.*?)\n.*?" r"\*\*Link:\*\*\s*\[.*?\]\((?P<link>.*?)\)",
+        re.DOTALL,
     )
 
     # Match all occurrences of the pattern
     matches = file_pattern.finditer(raw_message)
     for match in matches:
-        extracted_files.append({
-            'file_name': match.group('file_name').strip(),
-            'link': match.group('link').strip(),
-            'author': match.group('author').strip(),
-        })
+        extracted_files.append(
+            {"file_name": match.group("file_name").strip(), "link": match.group("link").strip(), "author": match.group("author").strip()}
+        )
 
     # Fallback: Find lines that match individual fields if no comprehensive match found
     if not extracted_files:
@@ -63,16 +58,16 @@ def enhanced_parser(raw_message):
 
         for line in file_lines:
             if "File Name:" in line:
-                current_file['file_name'] = line.split("File Name:")[1].strip()
+                current_file["file_name"] = line.split("File Name:")[1].strip()
             elif "Author:" in line:
-                current_file['author'] = line.split("Author:")[1].strip()
-            elif "Link:" in line and '(' in line:
-                link_match = re.search(r'\((.*?)\)', line)
+                current_file["author"] = line.split("Author:")[1].strip()
+            elif "Link:" in line and "(" in line:
+                link_match = re.search(r"\((.*?)\)", line)
                 if link_match:
-                    current_file['link'] = link_match.group(1).strip()
+                    current_file["link"] = link_match.group(1).strip()
 
             # If all fields are populated, add to extracted_files and reset
-            if all(k in current_file for k in ['file_name', 'link', 'author']):
+            if all(k in current_file for k in ["file_name", "link", "author"]):
                 extracted_files.append(current_file)
                 current_file = {}
 
@@ -91,7 +86,7 @@ def save_to_excel(categorized_files):
         wb = openpyxl.Workbook()
         ws = wb.active
         ws.title = "Sensitive PII Files"
-        headers = ['File Name - Link', 'Author', 'Contains']
+        headers = ["File Name - Link", "Author", "Contains"]
         ws.append(headers)
 
     existing_files = set()
@@ -99,13 +94,9 @@ def save_to_excel(categorized_files):
         existing_files.add((row[0], row[1]))
 
     for file in categorized_files:
-        file_name_link = file['file_name'] + " - " + file['link']
-        if (file_name_link, file['author']) not in existing_files:
-            ws.append([
-                file_name_link,
-                file['author'],
-                ", ".join(file.get('contains', []))
-            ])
+        file_name_link = file["file_name"] + " - " + file["link"]
+        if (file_name_link, file["author"]) not in existing_files:
+            ws.append([file_name_link, file["author"], ", ".join(file.get("contains", []))])
     wb.save(output_file)
     print(f"Excel file updated: {output_file}")
 
@@ -115,7 +106,7 @@ def handle_response(raw_message):
     Processes the raw response using the enhanced parser.
     """
     # Check if raw_message is a WebsocketParsedMessage object
-    if hasattr(raw_message, 'copilot_message'):
+    if hasattr(raw_message, "copilot_message"):
         raw_message = raw_message.copilot_message  # Extract the actual message
 
     if not isinstance(raw_message, str):
@@ -125,18 +116,15 @@ def handle_response(raw_message):
     print("DEBUG: Extracted files:", extracted_files)
     return extracted_files
 
+
 if __name__ == "__main__":
-    user = os.getenv('m365user')
-    user_password = os.getenv('m365pass')
+    user = os.getenv("m365user")
+    user_password = os.getenv("m365pass")
     if user is None or user_password is None:
         raise ValueError("Environment variables 'm365user' or 'm365pass' are not set.")
 
     args = ChatArguments(
-        user=user,
-        password=user_password,
-        verbose=VerboseEnum.full,
-        scenario=CopilotScenarioEnum.officeweb,
-        use_cached_access_token=False
+        user=user, password=user_password, verbose=VerboseEnum.full, scenario=CopilotScenarioEnum.officeweb, use_cached_access_token=False
     )
 
     copilot_connector = CopilotConnector(args)
@@ -148,9 +136,7 @@ def handle_response(raw_message):
 
     # Initial Prompt
     print("Sending initial prompt to Copilot...")
-    result = asyncio.get_event_loop().run_until_complete(
-        asyncio.gather(copilot_connector.connect(prompts['init_prompt']))
-    )
+    result = asyncio.get_event_loop().run_until_complete(asyncio.gather(copilot_connector.connect(prompts["init_prompt"])))
     if result and result[0]:
         print(result)
         print(result[0].parsed_message)
@@ -162,9 +148,7 @@ def handle_response(raw_message):
 
     # Second Prompt
     print("Sending second prompt to Copilot...")
-    result = asyncio.get_event_loop().run_until_complete(
-        asyncio.gather(copilot_connector.connect(prompts['second_prompt']))
-    )
+    result = asyncio.get_event_loop().run_until_complete(asyncio.gather(copilot_connector.connect(prompts["second_prompt"])))
     if result and result[0]:
         raw_message = result[0].parsed_message
         print("Raw Response (Second Prompt):", raw_message)
@@ -174,22 +158,20 @@ def handle_response(raw_message):
 
     # Dynamic Third Prompts
     pii_types = [
-        'Social Security Numbers',
-        'Employee Records such as start date, end date, salary, position',
-        'Passport Numbers',
-        'Driver\'s License Numbers',
-        '*Employee Salaries*',
-        '*IP addresses*',
-        'Contact Information such as phone numbers, emails, addresses'
+        "Social Security Numbers",
+        "Employee Records such as start date, end date, salary, position",
+        "Passport Numbers",
+        "Driver's License Numbers",
+        "*Employee Salaries*",
+        "*IP addresses*",
+        "Contact Information such as phone numbers, emails, addresses",
     ]
 
     try:
         for pii_type in pii_types:
             print(f"Sending third prompt for PII type: {pii_type}")
             third_prompt_dynamic = f"Great job! are there any other files of filetype (docx, csv, xlsx, or pptx) that you have didn't mention that contains *{pii_type}* or *emails*? If yes, please list them as well. Make sure to NOT MISS ANY FILE"
-            result = asyncio.get_event_loop().run_until_complete(
-                asyncio.gather(copilot_connector.connect(third_prompt_dynamic))
-            )
+            result = asyncio.get_event_loop().run_until_complete(asyncio.gather(copilot_connector.connect(third_prompt_dynamic)))
 
             if result and result[0]:
                 raw_message = result[0].parsed_message
@@ -202,17 +184,3 @@ def handle_response(raw_message):
         print("\nKeyboard interrupt received. Exiting gracefully.")
 
     print("Finished processing.")
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/src/powerpwn/copilot/oversharing/extract_data.py b/src/powerpwn/copilot/oversharing/extract_data.py
index 338738c..d5dc45c 100644
--- a/src/powerpwn/copilot/oversharing/extract_data.py
+++ b/src/powerpwn/copilot/oversharing/extract_data.py
@@ -1,17 +1,19 @@
+import asyncio
 import os
-import openpyxl
 import re
-import asyncio
-from powerpwn.copilot.models.chat_argument import ChatArguments
-from powerpwn.copilot.enums.copilot_scenario_enum import CopilotScenarioEnum
-from powerpwn.copilot.enums.verbose_enum import VerboseEnum
+
+import openpyxl
+
 from powerpwn.copilot.chat_automator.chat_automator import ChatAutomator
 from powerpwn.copilot.copilot_connector.copilot_connector import CopilotConnector
+from powerpwn.copilot.enums.copilot_scenario_enum import CopilotScenarioEnum
+from powerpwn.copilot.enums.verbose_enum import VerboseEnum
+from powerpwn.copilot.models.chat_argument import ChatArguments
 
 # Set environment variables (if not already set)
-user = os.getenv('m365user')
-user_password = os.getenv('m365pass')
-third_prompt = os.getenv('thirdprompt')
+user = os.getenv("m365user")
+user_password = os.getenv("m365pass")
+third_prompt = os.getenv("thirdprompt")
 
 if user is None or user_password is None:
     raise ValueError("Environment variables for email or password are not set.")
@@ -19,16 +21,13 @@
 print("User being looked at is:", user)
 
 args = ChatArguments(
-    user=user,
-    password=user_password,
-    verbose=VerboseEnum.full,
-    scenario=CopilotScenarioEnum.officeweb,
-    use_cached_access_token=False
+    user=user, password=user_password, verbose=VerboseEnum.full, scenario=CopilotScenarioEnum.officeweb, use_cached_access_token=False
 )
 
 copilot_connector = CopilotConnector(args)
 copilot_connector.init_connection()
 
+
 # Load the Excel file and extract file names
 def extract_file_names_from_excel(file_path):
     """
@@ -43,7 +42,7 @@ def extract_file_names_from_excel(file_path):
     for row in ws.iter_rows(min_row=2, min_col=1, max_col=1):  # Skip header row
         file_name = row[0].value
         if file_name:
-            match = re.search(r'\[(.*?)\]', file_name)  # Extract text inside []
+            match = re.search(r"\[(.*?)\]", file_name)  # Extract text inside []
             if match:
                 file_names.append(match.group(1))  # Append the extracted file name
     return file_names
@@ -59,7 +58,7 @@ async def get_file_content_and_save(file_name):
     result = await copilot_connector.connect(prompt)
 
     # Check if the result contains a valid message
-    if hasattr(result, 'parsed_message') and result.parsed_message:
+    if hasattr(result, "parsed_message") and result.parsed_message:
         raw_message = result.parsed_message
         print(f"Content retrieved for {file_name}:")
         print(raw_message.copilot_message)  # Display the content
@@ -82,6 +81,7 @@ async def main():
     for file_name in file_names:
         await get_file_content_and_save(file_name)
 
+
 # Run the async script
 if __name__ == "__main__":
     asyncio.run(main())

From f5e532074e3a90c08c83649dfbef9cecc82c5871 Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <aroyc@zn-osx-ayushr.local>
Date: Tue, 31 Dec 2024 08:43:46 -0600
Subject: [PATCH 13/26] added runners and arguments

---
 src/powerpwn/cli/arguments.py |  5 +++++
 src/powerpwn/cli/runners.py   | 11 ++++++-----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/powerpwn/cli/arguments.py b/src/powerpwn/cli/arguments.py
index 199e093..a8e71fc 100644
--- a/src/powerpwn/cli/arguments.py
+++ b/src/powerpwn/cli/arguments.py
@@ -152,6 +152,11 @@ def module_copilot(command_subparsers: argparse.ArgumentParser):
     copilot_modules(whoami)
     whoami.add_argument("-g", "--gui", action="store_true", help="Run local server for gui.")
 
+    oversharing = copilot_subparsers.add_parser("oversharing", description="Get the sensitive files that user has access to",
+                                           help="Get the current user's information")
+    copilot_modules(oversharing)
+    oversharing.add_argument("-p", "--prompts", action="store_true", help="Run local file for prompts")
+
     dump = copilot_subparsers.add_parser(
         "dump",
         description="Data dump using recon from whoami command",
diff --git a/src/powerpwn/cli/runners.py b/src/powerpwn/cli/runners.py
index b99f187..0f691a9 100644
--- a/src/powerpwn/cli/runners.py
+++ b/src/powerpwn/cli/runners.py
@@ -15,6 +15,7 @@
 from powerpwn.copilot.models.chat_argument import ChatArguments
 from powerpwn.copilot.spearphishing.automated_spear_phisher import AutomatedSpearPhisher
 from powerpwn.copilot.whoami.whoami import WhoAmI
+from powerpwn.copilot.oversharing.discovery import Discovery
 from powerpwn.copilot_studio.modules.deep_scan import DeepScan
 from powerpwn.copilot_studio.modules.enum import Enum
 from powerpwn.nocodemalware.enums.code_exec_type_enum import CodeExecTypeEnum
@@ -210,11 +211,11 @@ def run_copilot_chat_command(args):
         if args.gui:
             CopilotGui().run(output_dir)
         return
-    elif args.copilot_subcommand == "oversharing":
-        oversharing = Oversharing(parsed_args, args.directory)
-        output_dir = oversharing.run()
-        if args.gui:
-            CopilotGui().run(output_dir)
+    elif args.copilot_subcommand == "discovery":
+        discovery = Discovery(parsed_args, args.directory)
+        output_dir = discovery.run()
+        if args.prompt:
+            Discovery().run(prompt)
         return
 
     raise NotImplementedError(f"Copilot {args.copilot_subcommand} subcommand has not been implemented yet.")

From 784b8a6d986cfd97b7e9006433da7dd9fc74006c Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <aroyc@zn-osx-ayushr.local>
Date: Tue, 31 Dec 2024 09:05:44 -0600
Subject: [PATCH 14/26] runnable modules

---
 src/powerpwn/cli/arguments.py                 |   6 +-
 src/powerpwn/copilot/oversharing/discovery.py | 326 +++++++++---------
 2 files changed, 161 insertions(+), 171 deletions(-)

diff --git a/src/powerpwn/cli/arguments.py b/src/powerpwn/cli/arguments.py
index a8e71fc..20b8155 100644
--- a/src/powerpwn/cli/arguments.py
+++ b/src/powerpwn/cli/arguments.py
@@ -152,10 +152,10 @@ def module_copilot(command_subparsers: argparse.ArgumentParser):
     copilot_modules(whoami)
     whoami.add_argument("-g", "--gui", action="store_true", help="Run local server for gui.")
 
-    oversharing = copilot_subparsers.add_parser("oversharing", description="Get the sensitive files that user has access to",
+    discovery = copilot_subparsers.add_parser("discovery", description="Get the sensitive files that user has access to",
                                            help="Get the current user's information")
-    copilot_modules(oversharing)
-    oversharing.add_argument("-p", "--prompts", action="store_true", help="Run local file for prompts")
+    copilot_modules(discovery)
+    discovery.add_argument("-p", "--prompts", type=str, required=True, help="Run local file for prompts from the directory")
 
     dump = copilot_subparsers.add_parser(
         "dump",
diff --git a/src/powerpwn/copilot/oversharing/discovery.py b/src/powerpwn/copilot/oversharing/discovery.py
index e0326ec..466bbbd 100644
--- a/src/powerpwn/copilot/oversharing/discovery.py
+++ b/src/powerpwn/copilot/oversharing/discovery.py
@@ -1,186 +1,176 @@
 import asyncio
 import os
 import re
-
 import openpyxl
-
 from powerpwn.copilot.copilot_connector.copilot_connector import CopilotConnector
 from powerpwn.copilot.enums.copilot_scenario_enum import CopilotScenarioEnum
 from powerpwn.copilot.enums.verbose_enum import VerboseEnum
 from powerpwn.copilot.models.chat_argument import ChatArguments
 
 
-def read_prompts_from_file(file_path="pii.txt"):
-    """
-    Reads prompts from the file. Expects sections separated by two newlines.
-    """
-    prompts = {}
-    with open(file_path, "r", encoding="utf-8") as f:
-        content = f.read().strip()
-
-        # Split on double newlines
-        sections = content.split("\n\n")
-        for section in sections:
-            lines = section.split("\n", 1)
-            if len(lines) == 2:
-                key = lines[0].strip(":").strip()
-                value = lines[1].strip()
-                prompts[key] = value
-            else:
-                print(f"Warning: Skipping invalid section: {section}")
-    return prompts
-
-
-def enhanced_parser(raw_message):
-    """
-    Extracts file_name, link, and author from free-text, Markdown, or structured formats.
-    """
-    extracted_files = []
-
-    # Use regex to find file details in free-text or Markdown-like formats
-    file_pattern = re.compile(
-        r"\*\*File Name:\*\*\s*(?P<file_name>.*?)\n.*?" r"\*\*Author:\*\*\s*(?P<author>.*?)\n.*?" r"\*\*Link:\*\*\s*\[.*?\]\((?P<link>.*?)\)",
-        re.DOTALL,
-    )
-
-    # Match all occurrences of the pattern
-    matches = file_pattern.finditer(raw_message)
-    for match in matches:
-        extracted_files.append(
-            {"file_name": match.group("file_name").strip(), "link": match.group("link").strip(), "author": match.group("author").strip()}
-        )
+class Discovery:
+    def __init__(self, prompts_file="pii.txt", output_file="new_pii_sensitive_files_report.xlsx"):
+        self.prompts_file = prompts_file
+        self.output_file = output_file
+        self.user = os.getenv("m365user")
+        self.user_password = os.getenv("m365pass")
 
-    # Fallback: Find lines that match individual fields if no comprehensive match found
-    if not extracted_files:
-        print("DEBUG: No structured matches found. Attempting fallback extraction.")
-        file_lines = raw_message.splitlines()
-        current_file = {}
-
-        for line in file_lines:
-            if "File Name:" in line:
-                current_file["file_name"] = line.split("File Name:")[1].strip()
-            elif "Author:" in line:
-                current_file["author"] = line.split("Author:")[1].strip()
-            elif "Link:" in line and "(" in line:
-                link_match = re.search(r"\((.*?)\)", line)
-                if link_match:
-                    current_file["link"] = link_match.group(1).strip()
-
-            # If all fields are populated, add to extracted_files and reset
-            if all(k in current_file for k in ["file_name", "link", "author"]):
-                extracted_files.append(current_file)
-                current_file = {}
-
-    return extracted_files
-
-
-def save_to_excel(categorized_files):
-    """
-    Append new file entries to 'pii_sensitive_files_report.xlsx' if not duplicates.
-    """
-    output_file = "new_pii_sensitive_files_report.xlsx"
-    if os.path.exists(output_file):
-        wb = openpyxl.load_workbook(output_file)
-        ws = wb.active
-    else:
-        wb = openpyxl.Workbook()
-        ws = wb.active
-        ws.title = "Sensitive PII Files"
-        headers = ["File Name - Link", "Author", "Contains"]
-        ws.append(headers)
-
-    existing_files = set()
-    for row in ws.iter_rows(min_row=2, values_only=True):
-        existing_files.add((row[0], row[1]))
-
-    for file in categorized_files:
-        file_name_link = file["file_name"] + " - " + file["link"]
-        if (file_name_link, file["author"]) not in existing_files:
-            ws.append([file_name_link, file["author"], ", ".join(file.get("contains", []))])
-    wb.save(output_file)
-    print(f"Excel file updated: {output_file}")
-
-
-def handle_response(raw_message):
-    """
-    Processes the raw response using the enhanced parser.
-    """
-    # Check if raw_message is a WebsocketParsedMessage object
-    if hasattr(raw_message, "copilot_message"):
-        raw_message = raw_message.copilot_message  # Extract the actual message
-
-    if not isinstance(raw_message, str):
-        raise TypeError("raw_message must be a string or an object containing a message string.")
-
-    extracted_files = enhanced_parser(raw_message)
-    print("DEBUG: Extracted files:", extracted_files)
-    return extracted_files
+        if not self.user or not self.user_password:
+            raise ValueError("Environment variables 'm365user' or 'm365pass' are not set.")
+
+        self.args = ChatArguments(
+            user=self.user, password=self.user_password, verbose=VerboseEnum.full, scenario=CopilotScenarioEnum.officeweb, use_cached_access_token=False
+        )
+        self.copilot_connector = CopilotConnector(self.args)
+        self.prompts = self.read_prompts_from_file(self.prompts_file)
+
+    def read_prompts_from_file(self, file_path):
+        """
+        Reads prompts from the file. Expects sections separated by two newlines.
+        """
+        prompts = {}
+        with open(file_path, "r", encoding="utf-8") as f:
+            content = f.read().strip()
+
+            # Split on double newlines
+            sections = content.split("\n\n")
+            for section in sections:
+                lines = section.split("\n", 1)
+                if len(lines) == 2:
+                    key = lines[0].strip(":").strip()
+                    value = lines[1].strip()
+                    prompts[key] = value
+                else:
+                    print(f"Warning: Skipping invalid section: {section}")
+        return prompts
+
+    def enhanced_parser(self, raw_message):
+        """
+        Extracts file_name, link, and author from free-text, Markdown, or structured formats.
+        """
+        extracted_files = []
+
+        # Use regex to find file details in free-text or Markdown-like formats
+        file_pattern = re.compile(
+            r"\*\*File Name:\*\*\s*(?P<file_name>.*?)\n.*?" r"\*\*Author:\*\*\s*(?P<author>.*?)\n.*?" r"\*\*Link:\*\*\s*\[.*?\]\((?P<link>.*?)\)",
+            re.DOTALL,
+        )
 
+        # Match all occurrences of the pattern
+        matches = file_pattern.finditer(raw_message)
+        for match in matches:
+            extracted_files.append(
+                {"file_name": match.group("file_name").strip(), "link": match.group("link").strip(), "author": match.group("author").strip()}
+            )
+
+        # Fallback: Find lines that match individual fields if no comprehensive match found
+        if not extracted_files:
+            print("DEBUG: No structured matches found. Attempting fallback extraction.")
+            file_lines = raw_message.splitlines()
+            current_file = {}
+
+            for line in file_lines:
+                if "File Name:" in line:
+                    current_file["file_name"] = line.split("File Name:")[1].strip()
+                elif "Author:" in line:
+                    current_file["author"] = line.split("Author:")[1].strip()
+                elif "Link:" in line and "(" in line:
+                    link_match = re.search(r"\((.*?)\)", line)
+                    if link_match:
+                        current_file["link"] = link_match.group(1).strip()
+
+                # If all fields are populated, add to extracted_files and reset
+                if all(k in current_file for k in ["file_name", "link", "author"]):
+                    extracted_files.append(current_file)
+                    current_file = {}
+
+        return extracted_files
+
+    def save_to_excel(self, categorized_files):
+        """
+        Append new file entries to 'pii_sensitive_files_report.xlsx' if not duplicates.
+        """
+        if os.path.exists(self.output_file):
+            wb = openpyxl.load_workbook(self.output_file)
+            ws = wb.active
+        else:
+            wb = openpyxl.Workbook()
+            ws = wb.active
+            ws.title = "Sensitive PII Files"
+            headers = ["File Name - Link", "Author", "Contains"]
+            ws.append(headers)
+
+        existing_files = set()
+        for row in ws.iter_rows(min_row=2, values_only=True):
+            existing_files.add((row[0], row[1]))
+
+        for file in categorized_files:
+            file_name_link = file["file_name"] + " - " + file["link"]
+            if (file_name_link, file["author"]) not in existing_files:
+                ws.append([file_name_link, file["author"], ", ".join(file.get("contains", []))])
+        wb.save(self.output_file)
+        print(f"Excel file updated: {self.output_file}")
+
+    def handle_response(self, raw_message):
+        """
+        Processes the raw response using the enhanced parser.
+        """
+        # Check if raw_message is a WebsocketParsedMessage object
+        if hasattr(raw_message, "copilot_message"):
+            raw_message = raw_message.copilot_message  # Extract the actual message
+
+        if not isinstance(raw_message, str):
+            raise TypeError("raw_message must be a string or an object containing a message string.")
+
+        extracted_files = self.enhanced_parser(raw_message)
+        print("DEBUG: Extracted files:", extracted_files)
+        return extracted_files
+
+    async def process_prompt(self, prompt):
+        """
+        Sends a prompt to Copilot and handles the response.
+        """
+        print(f"Sending prompt: {prompt}")
+        result = await asyncio.gather(self.copilot_connector.connect(prompt))
+
+        if result and result[0]:
+            raw_message = result[0].parsed_message
+            print(f"Raw Response: {raw_message}")
+            files_list = self.handle_response(raw_message)
+            if files_list:
+                self.save_to_excel(files_list)
+
+    def run(self):
+        """
+        The main method to run the entire discovery process.
+        """
+        # Initialize connection to Copilot
+        self.copilot_connector.init_connection()
+
+        # Process initial and second prompts
+        asyncio.run(self.process_prompt(self.prompts["init_prompt"]))
+        asyncio.run(self.process_prompt(self.prompts["second_prompt"]))
+
+        # Dynamic third prompts
+        pii_types = [
+            "Social Security Numbers",
+            "Employee Records such as start date, end date, salary, position",
+            "Passport Numbers",
+            "Driver's License Numbers",
+            "*Employee Salaries*",
+            "*IP addresses*",
+            "Contact Information such as phone numbers, emails, addresses",
+        ]
 
-if __name__ == "__main__":
-    user = os.getenv("m365user")
-    user_password = os.getenv("m365pass")
-    if user is None or user_password is None:
-        raise ValueError("Environment variables 'm365user' or 'm365pass' are not set.")
-
-    args = ChatArguments(
-        user=user, password=user_password, verbose=VerboseEnum.full, scenario=CopilotScenarioEnum.officeweb, use_cached_access_token=False
-    )
-
-    copilot_connector = CopilotConnector(args)
-    copilot_connector.init_connection()
-
-    # Read prompts
-    prompts = read_prompts_from_file("pii.txt")
-    print("DEBUG: prompts ->", prompts)
-
-    # Initial Prompt
-    print("Sending initial prompt to Copilot...")
-    result = asyncio.get_event_loop().run_until_complete(asyncio.gather(copilot_connector.connect(prompts["init_prompt"])))
-    if result and result[0]:
-        print(result)
-        print(result[0].parsed_message)
-        raw_message = result[0].parsed_message
-        print("Raw Response (Second Prompt):", raw_message)
-        files_list = handle_response(raw_message)
-        if files_list:
-            save_to_excel(files_list)
-
-    # Second Prompt
-    print("Sending second prompt to Copilot...")
-    result = asyncio.get_event_loop().run_until_complete(asyncio.gather(copilot_connector.connect(prompts["second_prompt"])))
-    if result and result[0]:
-        raw_message = result[0].parsed_message
-        print("Raw Response (Second Prompt):", raw_message)
-        files_list = handle_response(raw_message)
-        if files_list:
-            save_to_excel(files_list)
-
-    # Dynamic Third Prompts
-    pii_types = [
-        "Social Security Numbers",
-        "Employee Records such as start date, end date, salary, position",
-        "Passport Numbers",
-        "Driver's License Numbers",
-        "*Employee Salaries*",
-        "*IP addresses*",
-        "Contact Information such as phone numbers, emails, addresses",
-    ]
-
-    try:
         for pii_type in pii_types:
-            print(f"Sending third prompt for PII type: {pii_type}")
             third_prompt_dynamic = f"Great job! are there any other files of filetype (docx, csv, xlsx, or pptx) that you have didn't mention that contains *{pii_type}* or *emails*? If yes, please list them as well. Make sure to NOT MISS ANY FILE"
-            result = asyncio.get_event_loop().run_until_complete(asyncio.gather(copilot_connector.connect(third_prompt_dynamic)))
+            print(f"Sending third prompt for PII type: {pii_type}")
+            asyncio.run(self.process_prompt(third_prompt_dynamic))
 
-            if result and result[0]:
-                raw_message = result[0].parsed_message
-                print(f"Raw Response for {pii_type}:", raw_message)
-                files_list = handle_response(raw_message)
-                if files_list:
-                    save_to_excel(files_list)
+        print("Finished processing.")
 
-    except KeyboardInterrupt:
-        print("\nKeyboard interrupt received. Exiting gracefully.")
+# Example usage:
+if __name__ == "__main__":
+    discovery = Discovery()
+    discovery.run()
 
-    print("Finished processing.")

From 16746a3088fc2691b3e4fe75e36a564bcb11f2eb Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <aroyc@zn-osx-ayushr.local>
Date: Wed, 1 Jan 2025 08:57:41 -0600
Subject: [PATCH 15/26] updated parser to work

---
 src/powerpwn/copilot/oversharing/discovery.py | 150 ++++++++++++------
 1 file changed, 103 insertions(+), 47 deletions(-)

diff --git a/src/powerpwn/copilot/oversharing/discovery.py b/src/powerpwn/copilot/oversharing/discovery.py
index 466bbbd..e183039 100644
--- a/src/powerpwn/copilot/oversharing/discovery.py
+++ b/src/powerpwn/copilot/oversharing/discovery.py
@@ -9,7 +9,7 @@
 
 
 class Discovery:
-    def __init__(self, prompts_file="pii.txt", output_file="new_pii_sensitive_files_report.xlsx"):
+    def __init__(self, prompts_file="pii.txt", output_file="osharefiles_report.xlsx"):
         self.prompts_file = prompts_file
         self.output_file = output_file
         self.user = os.getenv("m365user")
@@ -46,49 +46,96 @@ def read_prompts_from_file(self, file_path):
 
     def enhanced_parser(self, raw_message):
         """
-        Extracts file_name, link, and author from free-text, Markdown, or structured formats.
+        Parses the LLM response in a chunk-by-chunk manner to extract file info.
         """
-        extracted_files = []
+        # Split by numbered items or bullet points
+        # This handles lines like "1. **File Name:** [something]" etc.
+        chunks = re.split(r"\n\s*(?=\d+\.\s)", raw_message)
 
-        # Use regex to find file details in free-text or Markdown-like formats
-        file_pattern = re.compile(
-            r"\*\*File Name:\*\*\s*(?P<file_name>.*?)\n.*?" r"\*\*Author:\*\*\s*(?P<author>.*?)\n.*?" r"\*\*Link:\*\*\s*\[.*?\]\((?P<link>.*?)\)",
-            re.DOTALL,
-        )
-
-        # Match all occurrences of the pattern
-        matches = file_pattern.finditer(raw_message)
-        for match in matches:
-            extracted_files.append(
-                {"file_name": match.group("file_name").strip(), "link": match.group("link").strip(), "author": match.group("author").strip()}
-            )
+        files = []
 
-        # Fallback: Find lines that match individual fields if no comprehensive match found
-        if not extracted_files:
-            print("DEBUG: No structured matches found. Attempting fallback extraction.")
-            file_lines = raw_message.splitlines()
-            current_file = {}
-
-            for line in file_lines:
-                if "File Name:" in line:
-                    current_file["file_name"] = line.split("File Name:")[1].strip()
-                elif "Author:" in line:
-                    current_file["author"] = line.split("Author:")[1].strip()
-                elif "Link:" in line and "(" in line:
-                    link_match = re.search(r"\((.*?)\)", line)
-                    if link_match:
-                        current_file["link"] = link_match.group(1).strip()
-
-                # If all fields are populated, add to extracted_files and reset
-                if all(k in current_file for k in ["file_name", "link", "author"]):
-                    extracted_files.append(current_file)
-                    current_file = {}
+        for chunk in chunks:
+            file_info = {}
 
-        return extracted_files
+            # 1) Try to extract a file name line with "File Name:" or "File: ..."
+            match_file = re.search(
+                r"(?:File Name\s*:|File\s*:|File:\s*)(.*)",
+                chunk,
+                re.IGNORECASE
+            )
+            if match_file:
+                file_name_line = match_file.group(1).strip()
+                # Optionally, parse out the text in brackets vs the link
+                # E.g., [MyFile.docx](http://...)
+                bracket_match = re.search(r"\[([^\]]+)\]\(([^)]+)\)", file_name_line)
+                if bracket_match:
+                    file_info["file_name"] = bracket_match.group(1)
+                    file_info["file_link"] = bracket_match.group(2)
+                else:
+                    # Fallback if there's no bracket/link pattern
+                    file_info["file_name"] = file_name_line
+            else:
+                # 1a) Fallback: maybe the chunk has a bracketed link but not prefixed with "File:"
+                bracket_only_match = re.search(r"\[([^\]]+)\]\(([^)]+)\)", chunk)
+                if bracket_only_match:
+                    file_info["file_name"] = bracket_only_match.group(1).strip()
+                    file_info["file_link"] = bracket_only_match.group(2).strip()
+
+            # 2) Extract the author line:
+            #    e.g., "- **Author:** Owner Owner"
+            #    or "Author: Kris Smith"
+            #    or "The author is indicated as **python-docx;[Kris Smith](...)**."
+            match_author = re.search(
+                r"(?:Author[s]*:\s*|The\s+author[s]?\s+(?:is|are)\s+(?:indicated\s+as\s+)?)([^\n]+)",
+                chunk,
+                re.IGNORECASE
+            )
+            if match_author:
+                # Clean up any markdown or extra characters
+                authors = match_author.group(1).strip().replace("**", "")
+                file_info["author"] = authors
+
+            # 3) If you also want to capture "Contains:" or "Last Modified:", do something similar:
+            # Example: "Contains: Salary info, IP addresses"
+            match_contains = re.search(
+                r"(?:Contains:?\s*)([^\n]+)",
+                chunk,
+                re.IGNORECASE
+            )
+            if match_contains:
+                file_info["contains"] = [
+                    c.strip() for c in match_contains.group(1).split(",")
+                ]
+
+            # Similarly for "Last Modified:" if you want it:
+            # match_last_modified = re.search(
+            #     r"(?:Last Modified:\s*)([^\n]+)",
+            #     chunk,
+            #     re.IGNORECASE
+            # )
+            # if match_last_modified:
+            #     file_info["last_modified"] = match_last_modified.group(1).strip()
+
+            # 4) Check if we actually found anything that looks like a file name
+            if "file_name" in file_info:
+                # If no file_link was found, default to "N/A"
+                file_info.setdefault("file_link", "N/A")
+                # If no author was found, default to "N/A"
+                file_info.setdefault("author", "N/A")
+
+                # Optional: Filter out obviously invalid file names
+                # (for example, if it’s just “Here” or “I have found”)
+                # if not is_likely_valid_file(file_info["file_name"]):
+                #     continue
+
+                files.append(file_info)
+
+        return files
 
     def save_to_excel(self, categorized_files):
         """
         Append new file entries to 'pii_sensitive_files_report.xlsx' if not duplicates.
+        Each file will have its own separate columns for File Name, File Link, and Author.
         """
         if os.path.exists(self.output_file):
             wb = openpyxl.load_workbook(self.output_file)
@@ -97,17 +144,22 @@ def save_to_excel(self, categorized_files):
             wb = openpyxl.Workbook()
             ws = wb.active
             ws.title = "Sensitive PII Files"
-            headers = ["File Name - Link", "Author", "Contains"]
+            headers = ["File Name", "File Link", "Author", "Contains"]
             ws.append(headers)
 
         existing_files = set()
         for row in ws.iter_rows(min_row=2, values_only=True):
-            existing_files.add((row[0], row[1]))
+            existing_files.add((row[0], row[1]))  # Check for duplicates based on file name and link
 
         for file in categorized_files:
-            file_name_link = file["file_name"] + " - " + file["link"]
-            if (file_name_link, file["author"]) not in existing_files:
-                ws.append([file_name_link, file["author"], ", ".join(file.get("contains", []))])
+            file_name = file["file_name"]
+            file_link = file["file_link"]
+            author = file["author"]
+            if (file_name, file_link) not in existing_files:
+                # Append separate file details to the Excel sheet
+                ws.append([file_name, file_link, author, ", ".join(file.get("contains", []))])
+                existing_files.add((file_name, file_link))  # Add to existing files set to avoid duplicates
+
         wb.save(self.output_file)
         print(f"Excel file updated: {self.output_file}")
 
@@ -153,17 +205,21 @@ def run(self):
 
         # Dynamic third prompts
         pii_types = [
-            "Social Security Numbers",
-            "Employee Records such as start date, end date, salary, position",
-            "Passport Numbers",
-            "Driver's License Numbers",
+            "*Social Security Numbers*",
+            "*Employee Records*",
+            "*Employee Records* such as start dates, end dates, and date of births",
+            "*Employee Records* such as salary and job position",
+            "*Employee Records* such as addresses",
+            "*Passport Numbers*",
+            "*Driver's License Numbers*",
             "*Employee Salaries*",
             "*IP addresses*",
-            "Contact Information such as phone numbers, emails, addresses",
+            "*Contact Information such as phone numbers*",
+            "*Contact Information such as emails*"
         ]
 
         for pii_type in pii_types:
-            third_prompt_dynamic = f"Great job! are there any other files of filetype (docx, csv, xlsx, or pptx) that you have didn't mention that contains *{pii_type}* or *emails*? If yes, please list them as well. Make sure to NOT MISS ANY FILE"
+            third_prompt_dynamic = f"are there any other sensitive files that contains {pii_type}? i believe you are missing quite a lot. be sure to also search for different formats such as .csv, .xlsx, and .pptx!"
             print(f"Sending third prompt for PII type: {pii_type}")
             asyncio.run(self.process_prompt(third_prompt_dynamic))
 

From c07085a8e3458c5132186e0462716c6193f34737 Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <aroyc@zn-osx-ayushr.local>
Date: Wed, 1 Jan 2025 09:50:25 -0600
Subject: [PATCH 16/26] headless browser

---
 puppeteer_get_substrate_bearer/get_substrate_bearer_office.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/puppeteer_get_substrate_bearer/get_substrate_bearer_office.js b/puppeteer_get_substrate_bearer/get_substrate_bearer_office.js
index d10d641..5f3f6a8 100644
--- a/puppeteer_get_substrate_bearer/get_substrate_bearer_office.js
+++ b/puppeteer_get_substrate_bearer/get_substrate_bearer_office.js
@@ -28,7 +28,7 @@ function delay(time) {
         // For windows the executable path is to open the existing chrome instead of the
         // "Chrome for testing" that is included with puppeteer - solves white screen bug
         browser = await puppeteer.launch({
-            headless: false, // Change to 'false' to see the browser actions for debugging
+            headless: true, // Change to 'false' to see the browser actions for debugging
             // Use the default windows path for chrome exe - solves white window bug for windows
             executablePath: 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe',
             // Start the browser in incognito mode
@@ -36,7 +36,7 @@ function delay(time) {
         });
     } catch(e) {
          browser = await puppeteer.launch({
-            headless: false, // Change to 'false' to see the browser actions for debugging
+            headless: true, // Change to 'false' to see the browser actions for debugging
             // Start the browser in fullscreen and incognito mode
             args: ['--start-fullscreen', '--incognito']
         });

From cbff871633977dae26a7bfe31890bf8d01f558a5 Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <aroyc@zn-osx-ayushr.local>
Date: Fri, 3 Jan 2025 23:13:19 -0600
Subject: [PATCH 17/26] fixed parser

---
 src/powerpwn/copilot/oversharing/discovery.py | 131 +++++++++++-------
 1 file changed, 81 insertions(+), 50 deletions(-)

diff --git a/src/powerpwn/copilot/oversharing/discovery.py b/src/powerpwn/copilot/oversharing/discovery.py
index e183039..18cef64 100644
--- a/src/powerpwn/copilot/oversharing/discovery.py
+++ b/src/powerpwn/copilot/oversharing/discovery.py
@@ -9,7 +9,7 @@
 
 
 class Discovery:
-    def __init__(self, prompts_file="pii.txt", output_file="osharefiles_report.xlsx"):
+    def __init__(self, prompts_file="pii.txt", output_file="oversharedfiles_report.xlsx"):
         self.prompts_file = prompts_file
         self.output_file = output_file
         self.user = os.getenv("m365user")
@@ -44,91 +44,122 @@ def read_prompts_from_file(self, file_path):
                     print(f"Warning: Skipping invalid section: {section}")
         return prompts
 
+    import re
+
     def enhanced_parser(self, raw_message):
         """
         Parses the LLM response in a chunk-by-chunk manner to extract file info.
         """
         # Split by numbered items or bullet points
-        # This handles lines like "1. **File Name:** [something]" etc.
         chunks = re.split(r"\n\s*(?=\d+\.\s)", raw_message)
 
+        # A small set of known file extensions we might expect.
+        # You can expand as needed.
+        FILE_EXTENSIONS = [".docx", ".xlsx", ".csv", ".pdf", ".pptx", ".aspx"]
+
         files = []
 
         for chunk in chunks:
             file_info = {}
 
-            # 1) Try to extract a file name line with "File Name:" or "File: ..."
+            # Option 1: If there's a "File Name:" or "File:" pattern, handle as before
             match_file = re.search(
                 r"(?:File Name\s*:|File\s*:|File:\s*)(.*)",
                 chunk,
                 re.IGNORECASE
             )
+
+            # We will store a "candidate filename" in file_info["file_name"]
+            # and "candidate link" in file_info["file_link"]
             if match_file:
                 file_name_line = match_file.group(1).strip()
-                # Optionally, parse out the text in brackets vs the link
-                # E.g., [MyFile.docx](http://...)
+                # Remove trailing bracket references like [1], [2], etc., if any
+                file_name_line = re.sub(r"\[\d+\]", "", file_name_line).strip()
+
+                # Check for Markdown link pattern: [someFile.ext](URL)
                 bracket_match = re.search(r"\[([^\]]+)\]\(([^)]+)\)", file_name_line)
                 if bracket_match:
-                    file_info["file_name"] = bracket_match.group(1)
-                    file_info["file_link"] = bracket_match.group(2)
+                    file_info["file_name"] = bracket_match.group(1).strip()
+                    file_info["file_link"] = bracket_match.group(2).strip()
                 else:
                     # Fallback if there's no bracket/link pattern
                     file_info["file_name"] = file_name_line
             else:
-                # 1a) Fallback: maybe the chunk has a bracketed link but not prefixed with "File:"
-                bracket_only_match = re.search(r"\[([^\]]+)\]\(([^)]+)\)", chunk)
-                if bracket_only_match:
-                    file_info["file_name"] = bracket_only_match.group(1).strip()
-                    file_info["file_link"] = bracket_only_match.group(2).strip()
-
-            # 2) Extract the author line:
-            #    e.g., "- **Author:** Owner Owner"
-            #    or "Author: Kris Smith"
-            #    or "The author is indicated as **python-docx;[Kris Smith](...)**."
+                # No explicit "File:" line found => fallback to extension-based detection
+                # We look for something that ends with .docx or .csv, etc.
+                # Then, in the same chunk, see if there's a URL (http/https).
+                # This might look for lines like:
+                # "employee_records.csv[1](https://...)"
+                # or "SensitiveInformation.docx (http://...)"
+
+                # a) Look for any potential link
+                url_match = re.search(r"(https?://[^\s]+)", chunk)
+                if url_match:
+                    candidate_url = url_match.group(1).strip()
+                    file_info["file_link"] = candidate_url
+                else:
+                    candidate_url = None  # no link found
+
+                # b) Try to find a filename with known extension before or around that link
+                #    We can attempt a regex that captures something like "SomeFile.pdf[1]" or "SomeFile.pdf"
+                #    prior to the link.
+                if candidate_url:
+                    # If there's a bracket pattern, e.g. "employee_records.csv[1](...)"
+                    # we can capture everything up to "[1]("
+                    # Example: "employee_records.csv[1](https://...)"
+                    # We'll match group(1) = "employee_records.csv"
+                    pattern_before_url = rf"([^\s]+(?:{'|'.join(FILE_EXTENSIONS)})(?:\[\d+\])?)\("
+                    fallback_match = re.search(pattern_before_url, chunk, re.IGNORECASE)
+                    if fallback_match:
+                        # strip any trailing [1], [2], etc.
+                        raw_name = re.sub(r"\[\d+\]$", "", fallback_match.group(1).strip())
+                        file_info["file_name"] = raw_name
+                    else:
+                        # If we can't find that pattern, look for a simpler approach
+                        # e.g. "employee_records.csv https://..."
+                        # We'll assume there's a space or punctuation before the link
+                        pattern_before_url = rf"([^\s]+(?:{'|'.join(FILE_EXTENSIONS)}))(?=\s*\(?https?)"
+                        fallback_match2 = re.search(pattern_before_url, chunk, re.IGNORECASE)
+                        if fallback_match2:
+                            file_info["file_name"] = fallback_match2.group(1).strip()
+
+            # 2) Extract the author line or “Author:”
+            #    e.g., "- **Author:** Kris Smith[1](...)"
+            #    or "The author is indicated as Kris Smith"
             match_author = re.search(
                 r"(?:Author[s]*:\s*|The\s+author[s]?\s+(?:is|are)\s+(?:indicated\s+as\s+)?)([^\n]+)",
                 chunk,
                 re.IGNORECASE
             )
             if match_author:
-                # Clean up any markdown or extra characters
-                authors = match_author.group(1).strip().replace("**", "")
+                authors = match_author.group(1).strip()
+                # Remove markdown asterisks
+                authors = authors.replace("**", "")
+                # Remove bracket references like [1](http...)
+                authors = re.sub(r"\[\d+\]\([^)]*\)", "", authors).strip()
                 file_info["author"] = authors
 
-            # 3) If you also want to capture "Contains:" or "Last Modified:", do something similar:
-            # Example: "Contains: Salary info, IP addresses"
-            match_contains = re.search(
-                r"(?:Contains:?\s*)([^\n]+)",
-                chunk,
-                re.IGNORECASE
-            )
+            # If we also want "Contains:" or "File Type:" or "Last Modified:"
+            match_file_type = re.search(r"(?:File Type:\s*)([^\n]+)", chunk, re.IGNORECASE)
+            if match_file_type:
+                file_info["file_type"] = match_file_type.group(1).strip()
+
+            match_contains = re.search(r"(?:Contains:\s*)([^\n]+)", chunk, re.IGNORECASE)
             if match_contains:
-                file_info["contains"] = [
-                    c.strip() for c in match_contains.group(1).split(",")
-                ]
-
-            # Similarly for "Last Modified:" if you want it:
-            # match_last_modified = re.search(
-            #     r"(?:Last Modified:\s*)([^\n]+)",
-            #     chunk,
-            #     re.IGNORECASE
-            # )
-            # if match_last_modified:
-            #     file_info["last_modified"] = match_last_modified.group(1).strip()
-
-            # 4) Check if we actually found anything that looks like a file name
-            if "file_name" in file_info:
-                # If no file_link was found, default to "N/A"
+                file_info["contains"] = [c.strip() for c in match_contains.group(1).split(",")]
+
+            # 4) Validate file_info
+            if "file_name" in file_info or "file_link" in file_info:
+                file_info.setdefault("file_name", "N/A")
                 file_info.setdefault("file_link", "N/A")
-                # If no author was found, default to "N/A"
                 file_info.setdefault("author", "N/A")
 
-                # Optional: Filter out obviously invalid file names
-                # (for example, if it’s just “Here” or “I have found”)
-                # if not is_likely_valid_file(file_info["file_name"]):
-                #     continue
-
-                files.append(file_info)
+                # Optionally filter out if it doesn't look legit
+                # (for example, if file_name is still "1" or "N/A")
+                valid_ext = any(file_info["file_name"].lower().endswith(ext) for ext in FILE_EXTENSIONS)
+                # If you want to ensure there's at least a link or something:
+                if valid_ext or file_info["file_link"].startswith("http"):
+                    files.append(file_info)
 
         return files
 
@@ -219,7 +250,7 @@ def run(self):
         ]
 
         for pii_type in pii_types:
-            third_prompt_dynamic = f"are there any other sensitive files that contains {pii_type}? i believe you are missing quite a lot. be sure to also search for different formats such as .csv, .xlsx, and .pptx!"
+            third_prompt_dynamic = f"great job! but are there any other sensitive files that contains {pii_type}? i believe you are missing quite a lot. For each file be sure to include the file name, file link and author. be sure to also search for different formats such as .csv, .xlsx, and .pdf!"
             print(f"Sending third prompt for PII type: {pii_type}")
             asyncio.run(self.process_prompt(third_prompt_dynamic))
 

From 067a376c71bba68511248bb10c67e722677b54a7 Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <ayushr@zenity.io>
Date: Fri, 3 Jan 2025 23:19:38 -0600
Subject: [PATCH 18/26] Delete
 src/powerpwn/copilot_studio/final_results/chat_exists_output.txt

---
 .../copilot_studio/final_results/chat_exists_output.txt       | 4 ----
 1 file changed, 4 deletions(-)
 delete mode 100644 src/powerpwn/copilot_studio/final_results/chat_exists_output.txt

diff --git a/src/powerpwn/copilot_studio/final_results/chat_exists_output.txt b/src/powerpwn/copilot_studio/final_results/chat_exists_output.txt
deleted file mode 100644
index 53a3146..0000000
--- a/src/powerpwn/copilot_studio/final_results/chat_exists_output.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-https://copilotstudio.microsoft.com/environments/Default-32f814a9-68c8-4ca1-93aa-5594523476b3/bots/cr6e4_agent7/canvas?__version__=2
-https://copilotstudio.microsoft.com/environments/Default-32f814a9-68c8-4ca1-93aa-5594523476b3/bots/cr6e4_copilot2/canvas?__version__=2
-https://copilotstudio.microsoft.com/environments/Default-32f814a9-68c8-4ca1-93aa-5594523476b3/bots/cr6e4_copilot7/canvas?__version__=2
-https://copilotstudio.microsoft.com/environments/Default-32f814a9-68c8-4ca1-93aa-5594523476b3/bots/cr6e4_copilotPoc/canvas?__version__=2

From 77f0277ce21c44739768cc3953826cf0263640c0 Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <aroyc@zn-osx-ayushr.local>
Date: Mon, 13 Jan 2025 23:10:43 -0600
Subject: [PATCH 19/26] finalized runners

---
 src/powerpwn/cli/arguments.py                 |  1 -
 src/powerpwn/cli/runners.py                   |  5 +-
 src/powerpwn/copilot/oversharing/discovery.py | 70 +++++++++++++++----
 3 files changed, 58 insertions(+), 18 deletions(-)

diff --git a/src/powerpwn/cli/arguments.py b/src/powerpwn/cli/arguments.py
index 20b8155..c46a51c 100644
--- a/src/powerpwn/cli/arguments.py
+++ b/src/powerpwn/cli/arguments.py
@@ -155,7 +155,6 @@ def module_copilot(command_subparsers: argparse.ArgumentParser):
     discovery = copilot_subparsers.add_parser("discovery", description="Get the sensitive files that user has access to",
                                            help="Get the current user's information")
     copilot_modules(discovery)
-    discovery.add_argument("-p", "--prompts", type=str, required=True, help="Run local file for prompts from the directory")
 
     dump = copilot_subparsers.add_parser(
         "dump",
diff --git a/src/powerpwn/cli/runners.py b/src/powerpwn/cli/runners.py
index 0f691a9..11e1211 100644
--- a/src/powerpwn/cli/runners.py
+++ b/src/powerpwn/cli/runners.py
@@ -212,10 +212,7 @@ def run_copilot_chat_command(args):
             CopilotGui().run(output_dir)
         return
     elif args.copilot_subcommand == "discovery":
-        discovery = Discovery(parsed_args, args.directory)
-        output_dir = discovery.run()
-        if args.prompt:
-            Discovery().run(prompt)
+        Discovery(parsed_args).run()
         return
 
     raise NotImplementedError(f"Copilot {args.copilot_subcommand} subcommand has not been implemented yet.")
diff --git a/src/powerpwn/copilot/oversharing/discovery.py b/src/powerpwn/copilot/oversharing/discovery.py
index 18cef64..aaea078 100644
--- a/src/powerpwn/copilot/oversharing/discovery.py
+++ b/src/powerpwn/copilot/oversharing/discovery.py
@@ -3,25 +3,14 @@
 import re
 import openpyxl
 from powerpwn.copilot.copilot_connector.copilot_connector import CopilotConnector
-from powerpwn.copilot.enums.copilot_scenario_enum import CopilotScenarioEnum
-from powerpwn.copilot.enums.verbose_enum import VerboseEnum
 from powerpwn.copilot.models.chat_argument import ChatArguments
 
 
 class Discovery:
-    def __init__(self, prompts_file="pii.txt", output_file="oversharedfiles_report.xlsx"):
+    def __init__(self, parsed_args: ChatArguments, prompts_file="pii.txt", output_file="oversharedfiles_report1.xlsx"):
         self.prompts_file = prompts_file
         self.output_file = output_file
-        self.user = os.getenv("m365user")
-        self.user_password = os.getenv("m365pass")
-
-        if not self.user or not self.user_password:
-            raise ValueError("Environment variables 'm365user' or 'm365pass' are not set.")
-
-        self.args = ChatArguments(
-            user=self.user, password=self.user_password, verbose=VerboseEnum.full, scenario=CopilotScenarioEnum.officeweb, use_cached_access_token=False
-        )
-        self.copilot_connector = CopilotConnector(self.args)
+        self.copilot_connector = CopilotConnector(parsed_args)
         self.prompts = self.read_prompts_from_file(self.prompts_file)
 
     def read_prompts_from_file(self, file_path):
@@ -194,6 +183,59 @@ def save_to_excel(self, categorized_files):
         wb.save(self.output_file)
         print(f"Excel file updated: {self.output_file}")
 
+    import re
+
+    def post_process_files(self, files_list):
+        """
+        Given a list of dictionaries (each with file_name, file_link, author, etc.),
+        perform some clean-up:
+        1) Remove duplicates by file link.
+        2) Skip files whose file_name is numeric or N/A.
+        3) Clean author field by removing bracket references and parentheses text.
+        """
+
+        unique_links = set()
+        cleaned_files = []
+
+        for f in files_list:
+            link = f.get("file_link", "")
+
+            # 1) Remove duplicates based on link
+            #    If the link is already encountered, skip
+            if link in unique_links:
+                continue
+            unique_links.add(link)
+
+            # 2) Skip if file_name is numeric (e.g. '1') or 'N/A'
+            file_name = f.get("file_name", "").strip()
+            if file_name.isdigit() or file_name == "N/A":
+                continue
+
+            # 3) Clean up author field
+            #    Remove everything in parentheses ( ... ) and also [ ... ](...) references
+            #    e.g. "[Kris Smith](https://...)" or "Kris Smith (Dept Head)"
+            author = f.get("author", "")
+
+            # Remove "[anything](anything)" references first
+            author = re.sub(r"\[.*?\]\(.*?\)", "", author)
+
+            # Remove parentheses and their contents, e.g. "(Dept Head)"
+            author = re.sub(r"\(.*?\)", "", author)
+
+            # Trim whitespace, semicolons, extra commas
+            author = author.strip().strip(";").strip(",")
+
+            # If you'd like to replace semicolons with commas (in case of multiple authors)
+            # author = author.replace(";", ",").strip(",")
+
+            # Put cleaned author back into the dictionary
+            f["author"] = author if author else "N/A"
+
+            # Add the cleaned file entry to the final list
+            cleaned_files.append(f)
+
+        return cleaned_files
+
     def handle_response(self, raw_message):
         """
         Processes the raw response using the enhanced parser.
@@ -206,6 +248,8 @@ def handle_response(self, raw_message):
             raise TypeError("raw_message must be a string or an object containing a message string.")
 
         extracted_files = self.enhanced_parser(raw_message)
+        extracted_files = self.enhanced_parser(raw_message)
+        cleaned_files = self.post_process_files(extracted_files)
         print("DEBUG: Extracted files:", extracted_files)
         return extracted_files
 

From ab3e814784a173c1e900b16a40ead2863c133936 Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <aroyc@zn-osx-ayushr.local>
Date: Tue, 14 Jan 2025 01:10:39 -0600
Subject: [PATCH 20/26] black formatting

---
 src/powerpwn/cli/arguments.py                    |  5 +++--
 src/powerpwn/cli/runners.py                      |  2 +-
 src/powerpwn/copilot/oversharing/discovery.py    | 16 ++++------------
 src/powerpwn/copilot_studio/modules/deep_scan.py | 16 ++++++++--------
 4 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/src/powerpwn/cli/arguments.py b/src/powerpwn/cli/arguments.py
index c46a51c..f3ed7a5 100644
--- a/src/powerpwn/cli/arguments.py
+++ b/src/powerpwn/cli/arguments.py
@@ -152,8 +152,9 @@ def module_copilot(command_subparsers: argparse.ArgumentParser):
     copilot_modules(whoami)
     whoami.add_argument("-g", "--gui", action="store_true", help="Run local server for gui.")
 
-    discovery = copilot_subparsers.add_parser("discovery", description="Get the sensitive files that user has access to",
-                                           help="Get the current user's information")
+    discovery = copilot_subparsers.add_parser(
+        "discovery", description="Get the sensitive files that user has access to", help="Get the current user's information"
+    )
     copilot_modules(discovery)
 
     dump = copilot_subparsers.add_parser(
diff --git a/src/powerpwn/cli/runners.py b/src/powerpwn/cli/runners.py
index 11e1211..f495096 100644
--- a/src/powerpwn/cli/runners.py
+++ b/src/powerpwn/cli/runners.py
@@ -232,4 +232,4 @@ def run_copilot_studio_command(args):
 
 
 def run_powerpages_command(args):
-    PowerPages(args)
\ No newline at end of file
+    PowerPages(args)
diff --git a/src/powerpwn/copilot/oversharing/discovery.py b/src/powerpwn/copilot/oversharing/discovery.py
index aaea078..1d0b684 100644
--- a/src/powerpwn/copilot/oversharing/discovery.py
+++ b/src/powerpwn/copilot/oversharing/discovery.py
@@ -52,11 +52,7 @@ def enhanced_parser(self, raw_message):
             file_info = {}
 
             # Option 1: If there's a "File Name:" or "File:" pattern, handle as before
-            match_file = re.search(
-                r"(?:File Name\s*:|File\s*:|File:\s*)(.*)",
-                chunk,
-                re.IGNORECASE
-            )
+            match_file = re.search(r"(?:File Name\s*:|File\s*:|File:\s*)(.*)", chunk, re.IGNORECASE)
 
             # We will store a "candidate filename" in file_info["file_name"]
             # and "candidate link" in file_info["file_link"]
@@ -115,11 +111,7 @@ def enhanced_parser(self, raw_message):
             # 2) Extract the author line or “Author:”
             #    e.g., "- **Author:** Kris Smith[1](...)"
             #    or "The author is indicated as Kris Smith"
-            match_author = re.search(
-                r"(?:Author[s]*:\s*|The\s+author[s]?\s+(?:is|are)\s+(?:indicated\s+as\s+)?)([^\n]+)",
-                chunk,
-                re.IGNORECASE
-            )
+            match_author = re.search(r"(?:Author[s]*:\s*|The\s+author[s]?\s+(?:is|are)\s+(?:indicated\s+as\s+)?)([^\n]+)", chunk, re.IGNORECASE)
             if match_author:
                 authors = match_author.group(1).strip()
                 # Remove markdown asterisks
@@ -290,7 +282,7 @@ def run(self):
             "*Employee Salaries*",
             "*IP addresses*",
             "*Contact Information such as phone numbers*",
-            "*Contact Information such as emails*"
+            "*Contact Information such as emails*",
         ]
 
         for pii_type in pii_types:
@@ -300,8 +292,8 @@ def run(self):
 
         print("Finished processing.")
 
+
 # Example usage:
 if __name__ == "__main__":
     discovery = Discovery()
     discovery.run()
-
diff --git a/src/powerpwn/copilot_studio/modules/deep_scan.py b/src/powerpwn/copilot_studio/modules/deep_scan.py
index 27bc665..d56a5f9 100644
--- a/src/powerpwn/copilot_studio/modules/deep_scan.py
+++ b/src/powerpwn/copilot_studio/modules/deep_scan.py
@@ -507,6 +507,7 @@ def run_pup_commands(existing_bots: List[str]):
         return sort_unique_values_in_file(open_bots_path)
     return []
 
+
 def query_using_pup(open_bots: List[str]):
     """
     Execute the Puppeteer JavaScript code for each bot URL given.
@@ -537,6 +538,7 @@ def query_using_pup(open_bots: List[str]):
 
     return {}
 
+
 def parse_chatbot_results(file_path):
     """
     Parses the output Excel file generated by query_chat.js and returns a dictionary.
@@ -549,22 +551,20 @@ def parse_chatbot_results(file_path):
 
     bot_results = {}
     for _, row in df.iterrows():
-        url = str(row.get('URL', '')).strip()
-        has_knowledge = str(row.get('Has Knowledge', '')).strip()
-        titles_str = row.get('Titles', '')
+        url = str(row.get("URL", "")).strip()
+        has_knowledge = str(row.get("Has Knowledge", "")).strip()
+        titles_str = row.get("Titles", "")
         titles = []
 
         if pd.notnull(titles_str) and titles_str:
             # Split titles by semicolon and strip whitespace
-            titles = [title.strip() for title in titles_str.split(';')]
+            titles = [title.strip() for title in titles_str.split(";")]
 
-        bot_results[url] = {
-            'has_knowledge': has_knowledge,
-            'titles': titles
-        }
+        bot_results[url] = {"has_knowledge": has_knowledge, "titles": titles}
 
     return bot_results
 
+
 def camel_case_split(identifier: str):
     """
     creates a word array from a camel case string

From b5f06fe4e0a6794244996aeb1059c2935eacfdb4 Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <aroyc@zn-osx-ayushr.local>
Date: Tue, 14 Jan 2025 01:15:04 -0600
Subject: [PATCH 21/26] run isort

---
 src/powerpwn/cli/runners.py                   | 2 +-
 src/powerpwn/copilot/oversharing/discovery.py | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/powerpwn/cli/runners.py b/src/powerpwn/cli/runners.py
index f495096..6f386fa 100644
--- a/src/powerpwn/cli/runners.py
+++ b/src/powerpwn/cli/runners.py
@@ -13,9 +13,9 @@
 from powerpwn.copilot.gui.gui import Gui as CopilotGui
 from powerpwn.copilot.interactive_chat.interactive_chat import InteractiveChat
 from powerpwn.copilot.models.chat_argument import ChatArguments
+from powerpwn.copilot.oversharing.discovery import Discovery
 from powerpwn.copilot.spearphishing.automated_spear_phisher import AutomatedSpearPhisher
 from powerpwn.copilot.whoami.whoami import WhoAmI
-from powerpwn.copilot.oversharing.discovery import Discovery
 from powerpwn.copilot_studio.modules.deep_scan import DeepScan
 from powerpwn.copilot_studio.modules.enum import Enum
 from powerpwn.nocodemalware.enums.code_exec_type_enum import CodeExecTypeEnum
diff --git a/src/powerpwn/copilot/oversharing/discovery.py b/src/powerpwn/copilot/oversharing/discovery.py
index 1d0b684..5ca51f4 100644
--- a/src/powerpwn/copilot/oversharing/discovery.py
+++ b/src/powerpwn/copilot/oversharing/discovery.py
@@ -1,7 +1,9 @@
 import asyncio
 import os
 import re
+
 import openpyxl
+
 from powerpwn.copilot.copilot_connector.copilot_connector import CopilotConnector
 from powerpwn.copilot.models.chat_argument import ChatArguments
 

From 3730fcabef638ec45a6ae348f0d1a63c59c32ea5 Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <aroyc@zn-osx-ayushr.local>
Date: Tue, 14 Jan 2025 01:24:57 -0600
Subject: [PATCH 22/26] reformatting

---
 src/powerpwn/copilot/oversharing/discovery.py    | 8 +-------
 src/powerpwn/copilot/oversharing/extract_data.py | 1 -
 src/powerpwn/copilot_studio/modules/deep_scan.py | 2 +-
 3 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/src/powerpwn/copilot/oversharing/discovery.py b/src/powerpwn/copilot/oversharing/discovery.py
index 5ca51f4..dee1fde 100644
--- a/src/powerpwn/copilot/oversharing/discovery.py
+++ b/src/powerpwn/copilot/oversharing/discovery.py
@@ -35,8 +35,6 @@ def read_prompts_from_file(self, file_path):
                     print(f"Warning: Skipping invalid section: {section}")
         return prompts
 
-    import re
-
     def enhanced_parser(self, raw_message):
         """
         Parses the LLM response in a chunk-by-chunk manner to extract file info.
@@ -177,8 +175,6 @@ def save_to_excel(self, categorized_files):
         wb.save(self.output_file)
         print(f"Excel file updated: {self.output_file}")
 
-    import re
-
     def post_process_files(self, files_list):
         """
         Given a list of dictionaries (each with file_name, file_link, author, etc.),
@@ -242,8 +238,6 @@ def handle_response(self, raw_message):
             raise TypeError("raw_message must be a string or an object containing a message string.")
 
         extracted_files = self.enhanced_parser(raw_message)
-        extracted_files = self.enhanced_parser(raw_message)
-        cleaned_files = self.post_process_files(extracted_files)
         print("DEBUG: Extracted files:", extracted_files)
         return extracted_files
 
@@ -298,4 +292,4 @@ def run(self):
 # Example usage:
 if __name__ == "__main__":
     discovery = Discovery()
-    discovery.run()
+    discovery.run()
\ No newline at end of file
diff --git a/src/powerpwn/copilot/oversharing/extract_data.py b/src/powerpwn/copilot/oversharing/extract_data.py
index d5dc45c..3994edf 100644
--- a/src/powerpwn/copilot/oversharing/extract_data.py
+++ b/src/powerpwn/copilot/oversharing/extract_data.py
@@ -4,7 +4,6 @@
 
 import openpyxl
 
-from powerpwn.copilot.chat_automator.chat_automator import ChatAutomator
 from powerpwn.copilot.copilot_connector.copilot_connector import CopilotConnector
 from powerpwn.copilot.enums.copilot_scenario_enum import CopilotScenarioEnum
 from powerpwn.copilot.enums.verbose_enum import VerboseEnum
diff --git a/src/powerpwn/copilot_studio/modules/deep_scan.py b/src/powerpwn/copilot_studio/modules/deep_scan.py
index d56a5f9..b461968 100644
--- a/src/powerpwn/copilot_studio/modules/deep_scan.py
+++ b/src/powerpwn/copilot_studio/modules/deep_scan.py
@@ -1049,4 +1049,4 @@ def run(self):
                 else:
                     logging.error("Did not find a solution publisher prefix")
 
-        self.dump_results()
+        self.dump_results()
\ No newline at end of file

From 3ac064dd02ee83d07e75a431f2ef6d2b7d0003e6 Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <aroyc@zn-osx-ayushr.local>
Date: Tue, 14 Jan 2025 01:29:45 -0600
Subject: [PATCH 23/26] reformatted

---
 src/powerpwn/copilot/oversharing/discovery.py    | 2 +-
 src/powerpwn/copilot_studio/modules/deep_scan.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/powerpwn/copilot/oversharing/discovery.py b/src/powerpwn/copilot/oversharing/discovery.py
index dee1fde..6cdf3d5 100644
--- a/src/powerpwn/copilot/oversharing/discovery.py
+++ b/src/powerpwn/copilot/oversharing/discovery.py
@@ -292,4 +292,4 @@ def run(self):
 # Example usage:
 if __name__ == "__main__":
     discovery = Discovery()
-    discovery.run()
\ No newline at end of file
+    discovery.run()
diff --git a/src/powerpwn/copilot_studio/modules/deep_scan.py b/src/powerpwn/copilot_studio/modules/deep_scan.py
index b461968..d56a5f9 100644
--- a/src/powerpwn/copilot_studio/modules/deep_scan.py
+++ b/src/powerpwn/copilot_studio/modules/deep_scan.py
@@ -1049,4 +1049,4 @@ def run(self):
                 else:
                     logging.error("Did not find a solution publisher prefix")
 
-        self.dump_results()
\ No newline at end of file
+        self.dump_results()

From 7e1c7dbac84b794f6e74258b2c9a72c7273aeb2e Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <aroyc@zn-osx-ayushr.local>
Date: Tue, 14 Jan 2025 01:37:48 -0600
Subject: [PATCH 24/26] reformat

---
 src/powerpwn/copilot_studio/modules/deep_scan.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/powerpwn/copilot_studio/modules/deep_scan.py b/src/powerpwn/copilot_studio/modules/deep_scan.py
index d56a5f9..b461968 100644
--- a/src/powerpwn/copilot_studio/modules/deep_scan.py
+++ b/src/powerpwn/copilot_studio/modules/deep_scan.py
@@ -1049,4 +1049,4 @@ def run(self):
                 else:
                     logging.error("Did not find a solution publisher prefix")
 
-        self.dump_results()
+        self.dump_results()
\ No newline at end of file

From 0db9074e0fe3dc904f4120e7b61c3e5e95474605 Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <aroyc@zn-osx-ayushr.local>
Date: Tue, 14 Jan 2025 01:41:31 -0600
Subject: [PATCH 25/26] attempting to fix

---
 src/powerpwn/copilot_studio/modules/deep_scan.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/powerpwn/copilot_studio/modules/deep_scan.py b/src/powerpwn/copilot_studio/modules/deep_scan.py
index 80ebfa9..0c4d5de 100644
--- a/src/powerpwn/copilot_studio/modules/deep_scan.py
+++ b/src/powerpwn/copilot_studio/modules/deep_scan.py
@@ -1048,5 +1048,4 @@ def run(self):
 
                 else:
                     logging.error("Did not find a solution publisher prefix")
-                    
-        self.dump_results()
\ No newline at end of file
+        self.dump_results()

From 54d3727361cfc1b93ff2be9a3bdb1d20776b7ad1 Mon Sep 17 00:00:00 2001
From: Ayush RoyChowdhury <aroyc@zn-osx-ayushr.local>
Date: Wed, 15 Jan 2025 13:29:42 -0600
Subject: [PATCH 26/26] added back chat exists output

---
 src/powerpwn/copilot_studio/final_results/chat_exists_output.txt | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 src/powerpwn/copilot_studio/final_results/chat_exists_output.txt

diff --git a/src/powerpwn/copilot_studio/final_results/chat_exists_output.txt b/src/powerpwn/copilot_studio/final_results/chat_exists_output.txt
new file mode 100644
index 0000000..e69de29