Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
habedi committed Oct 20, 2024
1 parent a838a55 commit b04253b
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 68 deletions.
28 changes: 28 additions & 0 deletions mongo_analyser/program.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import argparse

import mongo_analyser.extractor as data_extractor


def main():
parser = argparse.ArgumentParser(description='Usage: mongo_analyser <command> [<args>]'
'Commands:'
' analyse_schema Analyse MongoDB schema and generate metadata'
' extract_data Extract data from MongoDB and save to JSON file'
)

parser.add_argument('command', help='Subcommand to run')

# Parse the arguments
args = parser.parse_args()

# Dispatch based on the subcommand
if args.command == 'analyse_schema':
data_extractor.main()
elif args.command == 'extract_data':
data_extractor.main()
else:
parser.print_help()


if __name__ == "__main__":
main()
68 changes: 0 additions & 68 deletions tests/test_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,71 +64,3 @@ def test_convert_to_json_compatible():
assert result["int_field"] == 123
assert result["date_field"] == "2024-01-01T00:00:00+00:00"
assert result["str_field"] == "test"

# def test_extract_data(mocker):
# # Arrange
# mock_client = mocker.patch("pymongo.MongoClient", autospec=True)
# mock_collection = mock_client.return_value.__getitem__.return_value.__getitem__.return_value
# mock_cursor = MagicMock()
# mock_cursor.limit.return_value = iter([{"field": "value"}])
# mock_collection.find.return_value = mock_cursor
#
# mock_gzip_open = mocker.patch("gzip.open", mocker.mock_open())
# mock_json_dump = mocker.patch("json.dump")
#
# timezone = pytz.timezone("UTC")
# schema = {"field": {"type": "str"}}
# output_file = "output.json.gz"
#
# # Act
# DataExtractor.extract_data("mongodb://localhost:27017", "test_db",
# "test_collection",
# schema, output_file, timezone, 1000, 100)
#
# # Assert
# mock_client.assert_called_once_with("mongodb://localhost:27017")
# mock_cursor.limit.assert_called_once_with(1)
# mock_gzip_open.assert_called_once_with(output_file, 'wt', encoding="utf-8")
# mock_json_dump.assert_called_once()


# def test_extract_data_no_limit(mocker):
# # Arrange
# mock_client = mocker.patch("pymongo.MongoClient", autospec=True)
# mock_collection = mock_client.return_value.__getitem__.return_value.__getitem__.return_value
# mock_cursor = MagicMock()
# mock_cursor.return_value = iter([{"field": "value"}])
# mock_collection.find.return_value = mock_cursor
#
# mock_gzip_open = mocker.patch("gzip.open", mocker.mock_open())
# mock_json_dump = mocker.patch("json.dump")
# mock_timezone = mocker.MagicMock()
#
# schema = {"field": {"type": "str"}}
# output_file = "output.json.gz"
#
# # Act
# DataExtractor.extract_data("mongodb://localhost:27017", "test_db",
# "test_collection",
# schema, output_file, mock_timezone, 1000, -1)
#
# # Assert
# mock_client.assert_called_once_with("mongodb://localhost:27017")
# mock_cursor.assert_called_once()
# mock_gzip_open.assert_called_once_with(output_file, 'wt', encoding="utf-8")
# mock_json_dump.assert_called_once()


# def test_extract_data_connection_failure(mocker):
#
# # Arrange
# # mock_client = mocker.patch("pymongo.MongoClient", side_effect=ConnectionFailure)
# timezone = pytz.timezone("UTC")
# schema = {"field": {"type": "str"}}
# output_file = "output.json.gz"
#
# # Act & Assert
# with pytest.raises(ConnectionFailure):
# DataExtractor.extract_data("mongodb://localhost:27017", "test_db",
# "test_collection",
# schema, output_file, timezone, 1000, 100)
2 changes: 2 additions & 0 deletions tests/test_shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def test_handle_binary_with_known_subtype(mocker):


def test_handle_binary_with_unknown_subtype(mocker):
"""Test for handling binary data with an unknown subtype."""
# Arrange
mock_value = mocker.Mock()
mock_value.subtype = 99 # Unknown subtype
Expand All @@ -84,6 +85,7 @@ def test_handle_binary_with_unknown_subtype(mocker):


def test_handle_binary_in_array(mocker):
"""Test for handling binary data in an array."""
# Arrange
mock_value = mocker.Mock()
mock_value.subtype = 4 # Known subtype 'binary<UUID>'
Expand Down

0 comments on commit b04253b

Please sign in to comment.