Skip to content
Draft
Binary file not shown.
Binary file not shown.
Binary file not shown.
59 changes: 59 additions & 0 deletions src/worker_threads/download_worker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import threading
import queue
import os
from apiclient.http import MediaIoBaseDownload, MediaFileUpload
from apiclient import discovery
from oauth2client.file import Storage
import io
import httplib2


class DownloadWorker(threading.Thread):
def __init__(self, que, credentials, *args, **kwargs):
self.que = que
self.credentials = credentials
super().__init__(*args, **kwargs)

def run(self):
while True:
try:
_file = self.que.get(timeout=3) # 3s timeout then close the thread timeout=3
except queue.Empty:
return
file_id = _file["id"]
file_name = _file["name"]

# To avoid downloading files that already exists
if not os.path.exists(os.path.join("Data", "raw", file_name)) and self.validFile(_file["name"]):
print(f"${file_name} downloading")
self.download_file(file_id, file_name)

# Task done for notifying que.join()
self.que.task_done()

def validFile(self, file_name):
supportedExt = [".csv", ".doc", ".docx", ".epub", ".eml", ".gif", ".jpg", ".jpeg", ".json", ".html",
".htm", ".mp3", ".msg", ".odt", ".ogg", ".pdf", ".png", ".pptx", ".ps", ".rtf", ".tiff", ".tif", ".txt",
".wav", ".xlsx", ".xls"]
pre, ext = os.path.splitext(os.path.basename(file_name))
print(ext)
if ext in supportedExt:
return True

return False

def download_file(self, file_id, output_file):
try:
credentials = self.credentials
http = credentials.authorize(httplib2.Http())
service = discovery.build("drive", "v3", http=http)
request = service.files().get_media(fileId=file_id)
fh = open(os.path.join("Data", "raw", output_file), "wb")
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print("Download %d%%." % int(status.progress() * 100))
fh.close()
except Exception as e:
print(e)
5 changes: 5 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""
Test package for Google Drive Search Application.

This file makes the tests directory a proper Python package.
"""
Binary file added tests/__pycache__/__init__.cpython-312.pyc
Binary file not shown.
Binary file not shown.
51 changes: 51 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""
Pytest configuration and global fixtures for the Google Drive Search Application.

This module provides shared fixtures, configuration, and utility functions
for testing across different test modules.
"""

import os
import sys
import pytest

# Add project root to Python path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

def pytest_configure(config):
"""
Pytest configuration hook for global settings.

Configures project-wide test settings and markers.
"""
config.addinivalue_line(
"markers",
"unit: mark a test as a unit test for specific component."
)
config.addinivalue_line(
"markers",
"integration: mark a test as an integration test."
)

@pytest.fixture(scope='session')
def project_root():
"""
Fixture to provide the absolute path to the project root directory.

Returns:
str: Absolute path to the project root.
"""
return os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))

@pytest.fixture(scope='function')
def temp_directory(tmp_path):
"""
Fixture to provide a temporary directory for test file operations.

Args:
tmp_path: Built-in pytest fixture for temporary directory.

Returns:
Path: A temporary directory path for each test function.
"""
return tmp_path
5 changes: 5 additions & 0 deletions tests/integration/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""
Integration tests package for Google Drive Search Application.

This file makes the integration tests directory a proper Python package.
"""
5 changes: 5 additions & 0 deletions tests/mocks/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""
Mocks package for Google Drive Search Application tests.

This file makes the mocks directory a proper Python package.
"""
5 changes: 5 additions & 0 deletions tests/unit/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""
Unit tests package for Google Drive Search Application.

This file makes the unit tests directory a proper Python package.
"""
Binary file added tests/unit/__pycache__/__init__.cpython-312.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
18 changes: 18 additions & 0 deletions tests/unit/test_download_worker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""
Unit tests for the DownloadWorker module.

This module contains tests to validate the functionality of the DownloadWorker.
"""

import pytest
from WorkerThreads.DownloadWorker import DownloadWorker

@pytest.mark.unit
def test_download_worker_initialization():
"""
Test the initialization of the DownloadWorker.

Ensures that the DownloadWorker can be created without errors.
"""
worker = DownloadWorker()
assert worker is not None
18 changes: 18 additions & 0 deletions tests/unit/test_indexer_worker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""
Unit tests for the IndexerWorker module.

This module contains tests to validate the functionality of the IndexerWorker.
"""

import pytest
from WorkerThreads.IndexerWorker import IndexerWorker

@pytest.mark.unit
def test_indexer_worker_initialization():
"""
Test the initialization of the IndexerWorker.

Ensures that the IndexerWorker can be created without errors.
"""
worker = IndexerWorker()
assert worker is not None
18 changes: 18 additions & 0 deletions tests/unit/test_text_extract_worker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""
Unit tests for the TextExtractWorker module.

This module contains tests to validate the functionality of the TextExtractWorker.
"""

import pytest
from WorkerThreads.TextExtractWorker import TextExtractWorker

@pytest.mark.unit
def test_text_extract_worker_initialization():
"""
Test the initialization of the TextExtractWorker.

Ensures that the TextExtractWorker can be created without errors.
"""
worker = TextExtractWorker()
assert worker is not None