diff --git a/WorkerThreads/__pycache__/DownloadWorker.cpython-312.pyc b/WorkerThreads/__pycache__/DownloadWorker.cpython-312.pyc new file mode 100644 index 0000000..3a0900d Binary files /dev/null and b/WorkerThreads/__pycache__/DownloadWorker.cpython-312.pyc differ diff --git a/WorkerThreads/__pycache__/IndexerWorker.cpython-312.pyc b/WorkerThreads/__pycache__/IndexerWorker.cpython-312.pyc new file mode 100644 index 0000000..15c93f5 Binary files /dev/null and b/WorkerThreads/__pycache__/IndexerWorker.cpython-312.pyc differ diff --git a/WorkerThreads/__pycache__/TextExtractWorker.cpython-312.pyc b/WorkerThreads/__pycache__/TextExtractWorker.cpython-312.pyc new file mode 100644 index 0000000..1d57b35 Binary files /dev/null and b/WorkerThreads/__pycache__/TextExtractWorker.cpython-312.pyc differ diff --git a/src/worker_threads/download_worker.py b/src/worker_threads/download_worker.py new file mode 100644 index 0000000..24da26f --- /dev/null +++ b/src/worker_threads/download_worker.py @@ -0,0 +1,59 @@ +import threading +import queue +import os +from apiclient.http import MediaIoBaseDownload, MediaFileUpload +from apiclient import discovery +from oauth2client.file import Storage +import io +import httplib2 + + +class DownloadWorker(threading.Thread): + def __init__(self, que, credentials, *args, **kwargs): + self.que = que + self.credentials = credentials + super().__init__(*args, **kwargs) + + def run(self): + while True: + try: + _file = self.que.get(timeout=3) # 3s timeout then close the thread timeout=3 + except queue.Empty: + return + file_id = _file["id"] + file_name = _file["name"] + + # To avoid downloading files that already exists + if not os.path.exists(os.path.join("Data", "raw", file_name)) and self.validFile(_file["name"]): + print(f"${file_name} downloading") + self.download_file(file_id, file_name) + + # Task done for notifying que.join() + self.que.task_done() + + def validFile(self, file_name): + supportedExt = [".csv", ".doc", ".docx", ".epub", ".eml", ".gif", ".jpg", ".jpeg", ".json", ".html", + ".htm", ".mp3", ".msg", ".odt", ".ogg", ".pdf", ".png", ".pptx", ".ps", ".rtf", ".tiff", ".tif", ".txt", + ".wav", ".xlsx", ".xls"] + pre, ext = os.path.splitext(os.path.basename(file_name)) + print(ext) + if ext in supportedExt: + return True + + return False + + def download_file(self, file_id, output_file): + try: + credentials = self.credentials + http = credentials.authorize(httplib2.Http()) + service = discovery.build("drive", "v3", http=http) + request = service.files().get_media(fileId=file_id) + fh = open(os.path.join("Data", "raw", output_file), "wb") + downloader = MediaIoBaseDownload(fh, request) + done = False + while done is False: + status, done = downloader.next_chunk() + print("Download %d%%." % int(status.progress() * 100)) + fh.close() + except Exception as e: + print(e) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..2589817 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,5 @@ +""" +Test package for Google Drive Search Application. + +This file makes the tests directory a proper Python package. +""" \ No newline at end of file diff --git a/tests/__pycache__/__init__.cpython-312.pyc b/tests/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..6124470 Binary files /dev/null and b/tests/__pycache__/__init__.cpython-312.pyc differ diff --git a/tests/__pycache__/conftest.cpython-312-pytest-8.3.5.pyc b/tests/__pycache__/conftest.cpython-312-pytest-8.3.5.pyc new file mode 100644 index 0000000..bf65b13 Binary files /dev/null and b/tests/__pycache__/conftest.cpython-312-pytest-8.3.5.pyc differ diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..a75b848 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,51 @@ +""" +Pytest configuration and global fixtures for the Google Drive Search Application. + +This module provides shared fixtures, configuration, and utility functions +for testing across different test modules. +""" + +import os +import sys +import pytest + +# Add project root to Python path +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +def pytest_configure(config): + """ + Pytest configuration hook for global settings. + + Configures project-wide test settings and markers. + """ + config.addinivalue_line( + "markers", + "unit: mark a test as a unit test for specific component." + ) + config.addinivalue_line( + "markers", + "integration: mark a test as an integration test." + ) + +@pytest.fixture(scope='session') +def project_root(): + """ + Fixture to provide the absolute path to the project root directory. + + Returns: + str: Absolute path to the project root. + """ + return os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) + +@pytest.fixture(scope='function') +def temp_directory(tmp_path): + """ + Fixture to provide a temporary directory for test file operations. + + Args: + tmp_path: Built-in pytest fixture for temporary directory. + + Returns: + Path: A temporary directory path for each test function. + """ + return tmp_path \ No newline at end of file diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 0000000..7807d3c --- /dev/null +++ b/tests/integration/__init__.py @@ -0,0 +1,5 @@ +""" +Integration tests package for Google Drive Search Application. + +This file makes the integration tests directory a proper Python package. +""" \ No newline at end of file diff --git a/tests/mocks/__init__.py b/tests/mocks/__init__.py new file mode 100644 index 0000000..9fe93d1 --- /dev/null +++ b/tests/mocks/__init__.py @@ -0,0 +1,5 @@ +""" +Mocks package for Google Drive Search Application tests. + +This file makes the mocks directory a proper Python package. +""" \ No newline at end of file diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 0000000..ec13540 --- /dev/null +++ b/tests/unit/__init__.py @@ -0,0 +1,5 @@ +""" +Unit tests package for Google Drive Search Application. + +This file makes the unit tests directory a proper Python package. +""" \ No newline at end of file diff --git a/tests/unit/__pycache__/__init__.cpython-312.pyc b/tests/unit/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..71140b8 Binary files /dev/null and b/tests/unit/__pycache__/__init__.cpython-312.pyc differ diff --git a/tests/unit/__pycache__/test_download_worker.cpython-312-pytest-8.3.5.pyc b/tests/unit/__pycache__/test_download_worker.cpython-312-pytest-8.3.5.pyc new file mode 100644 index 0000000..31be7f1 Binary files /dev/null and b/tests/unit/__pycache__/test_download_worker.cpython-312-pytest-8.3.5.pyc differ diff --git a/tests/unit/__pycache__/test_indexer_worker.cpython-312-pytest-8.3.5.pyc b/tests/unit/__pycache__/test_indexer_worker.cpython-312-pytest-8.3.5.pyc new file mode 100644 index 0000000..e9736e9 Binary files /dev/null and b/tests/unit/__pycache__/test_indexer_worker.cpython-312-pytest-8.3.5.pyc differ diff --git a/tests/unit/__pycache__/test_text_extract_worker.cpython-312-pytest-8.3.5.pyc b/tests/unit/__pycache__/test_text_extract_worker.cpython-312-pytest-8.3.5.pyc new file mode 100644 index 0000000..357070e Binary files /dev/null and b/tests/unit/__pycache__/test_text_extract_worker.cpython-312-pytest-8.3.5.pyc differ diff --git a/tests/unit/test_download_worker.py b/tests/unit/test_download_worker.py new file mode 100644 index 0000000..0c85293 --- /dev/null +++ b/tests/unit/test_download_worker.py @@ -0,0 +1,18 @@ +""" +Unit tests for the DownloadWorker module. + +This module contains tests to validate the functionality of the DownloadWorker. +""" + +import pytest +from WorkerThreads.DownloadWorker import DownloadWorker + +@pytest.mark.unit +def test_download_worker_initialization(): + """ + Test the initialization of the DownloadWorker. + + Ensures that the DownloadWorker can be created without errors. + """ + worker = DownloadWorker() + assert worker is not None \ No newline at end of file diff --git a/tests/unit/test_indexer_worker.py b/tests/unit/test_indexer_worker.py new file mode 100644 index 0000000..cb065d1 --- /dev/null +++ b/tests/unit/test_indexer_worker.py @@ -0,0 +1,18 @@ +""" +Unit tests for the IndexerWorker module. + +This module contains tests to validate the functionality of the IndexerWorker. +""" + +import pytest +from WorkerThreads.IndexerWorker import IndexerWorker + +@pytest.mark.unit +def test_indexer_worker_initialization(): + """ + Test the initialization of the IndexerWorker. + + Ensures that the IndexerWorker can be created without errors. + """ + worker = IndexerWorker() + assert worker is not None \ No newline at end of file diff --git a/tests/unit/test_text_extract_worker.py b/tests/unit/test_text_extract_worker.py new file mode 100644 index 0000000..4d85bcc --- /dev/null +++ b/tests/unit/test_text_extract_worker.py @@ -0,0 +1,18 @@ +""" +Unit tests for the TextExtractWorker module. + +This module contains tests to validate the functionality of the TextExtractWorker. +""" + +import pytest +from WorkerThreads.TextExtractWorker import TextExtractWorker + +@pytest.mark.unit +def test_text_extract_worker_initialization(): + """ + Test the initialization of the TextExtractWorker. + + Ensures that the TextExtractWorker can be created without errors. + """ + worker = TextExtractWorker() + assert worker is not None \ No newline at end of file