diff --git a/docs/_static/related-files-tab.png b/docs/_static/related-files-tab.png new file mode 100644 index 000000000..9675ab063 Binary files /dev/null and b/docs/_static/related-files-tab.png differ diff --git a/docs/user-guide/2-Storing-malware-samples.rst b/docs/user-guide/2-Storing-malware-samples.rst index 897093452..99151acd4 100644 --- a/docs/user-guide/2-Storing-malware-samples.rst +++ b/docs/user-guide/2-Storing-malware-samples.rst @@ -98,3 +98,23 @@ Using that feature we can chain together various files extracted during the anal - two memory dumps with unpacked code Finally, from both of these dumps we got a malware configuration. + + +Related files +------------- + +Sometimes we want to add the file that is related to the object, but not containing any malware i.e. pdf report. Every object of type **file** has designated tab for such purpose. + +.. image:: ../_static/related-files-tab.png + :target: ../_static/related_files_tab + :alt: related files tab + +In this tab you can: + +- view list of all related files associated with this object +- upload new related file +- delete related file +- download single related file +- download .zip file containing every related file + +Keep in mind that those actions might require special capabilities. For more details, please read :ref:`9. Sharing objects with other collaborators` diff --git a/docs/user-guide/7-Lucene-search.rst b/docs/user-guide/7-Lucene-search.rst index a26daa6f6..e3e8df741 100644 --- a/docs/user-guide/7-Lucene-search.rst +++ b/docs/user-guide/7-Lucene-search.rst @@ -549,3 +549,43 @@ Afterwards, you can see your newly added query as another black-coloured badge. .. image:: ../_static/7dXJkSH.png :target: ../_static/7dXJkSH.png :alt: + +Related file field (\ ``related.:``\ ) +------------------------------------------------------------ + +You can query objects by their related files. There are 4 ways to do it: + +* ``related.name:`` - query by related file's name + +.. code-block:: python + + related.name:"name.txt" + + +This field accepts wildcards. + +* ``related.size:`` - query by related file's size + +.. code-block:: python + + related.size:"<5kb" + + +* ``related.sha256:`` - query by related file's sha256 + +.. code-block:: python + related.sha256:"2ed91d820157c0530ffbae54122d998e0de6d958f266b682f7c528942f770470" + + +* ``related.count:`` - query by number of files related to the object + +.. code-block:: python + + # get files, which have at least 0 related files and no more than 2 + related.count:[0 TO 2] + # get files, which have at least 2 related files + related.count:">=2" + # get files, which have more than 2 related files + related.count:">2" + # get files, which have exactly 1 related file + related.count:"1" diff --git a/docs/user-guide/9-Sharing-objects.rst b/docs/user-guide/9-Sharing-objects.rst index abd39ee4e..68403f4b7 100644 --- a/docs/user-guide/9-Sharing-objects.rst +++ b/docs/user-guide/9-Sharing-objects.rst @@ -155,22 +155,22 @@ By default, ``admin`` private group has enabled all capabilities. All other grou Each capability has its own name and scope: -* +* **manage_users - Managing users and groups (system administration)** Allows to access all users and groups in MWDB. Rules described in *Who is who?* don't apply to users with that permission. Enables user to create new user accounts, new groups and change their capabilities and membership. Allows to manage attribute keys, define new ones, delete and set the group permissions for them. -* +* **share_queried_objects - Query for all objects in system** That one is a bit tricky and will be possibly deprecated. MWDB will automatically share object and all descendants with group if member directly accessed it via identifier (knows the hash e.g. have direct link to the object). It can be used for bot accounts, so they have access only to these objects that are intended to be processed by them. Internally, we abandoned that idea, so that capability may not be stable. -* +* **access_all_objects - Has access to all new uploaded objects into system** Capability used by ``everything`` group, useful when you want to make additional "everything" that is separate from the original one. Keep in mind that it applies only to the **uploads made during the capability was enabled**\ , so if you want the new group to be truly "everything", you may need to share the old objects manually. -* +* **sharing_with_all - Can share objects with all groups in system** Implies the access to the list of all group names, but without access to the membership information and management features. Allows to share object with arbitrary group in MWDB. It also allows the user to view full history of sharing an object (if the user has access to the object). @@ -180,27 +180,27 @@ Each capability has its own name and scope: Can view who uploaded object and filter by uploader. Without this capability users can filter by / see only users in their workspaces. -* +* **adding_tags - Can add tags** Allows to tag objects. This feature is disabled by default, as you may want to have only tags from automated analyses. -* +* **removing_tags - Can remove tags** Allows to remove tags. Tag doesn't have "owner", so user will be able to remove all tags from the object. -* +* **adding_comments - Can add comments** Allows to add comments to the objects. Keep in mind that comments are public. -* +* **removing_comments - Can remove (all) comments** Allows to remove **all** comments, not only these authored by the user. -* +* **adding_parents - Can add parents** Allows to add new relationships by specifying object parent during upload or adding new relationship between existing objects. @@ -215,22 +215,22 @@ Each capability has its own name and scope: Enables upload of files. Enabled by default for ``registered`` group. -* +* **adding_configs - Can upload configs** Enables upload of configurations. Configurations are intended to be uploaded by automated systems or trusted entities that follow the conventions. -* +* **adding_blobs - Can upload text blobs** Enables upload of blobs. Blobs may have similar meaning as configurations in terms of user roles. -* +* **reading_all_attributes - Has access to all attributes of object (including hidden)** With that capability, you can read all the attributes, even if you don't have ``read`` permission for that attribute key. It allows to list hidden attribute values. -* +* **adding_all_attributes - Can add all attributes to object** Enables group to add all the attributes, even if it doesn't have ``set`` permission for that attribute key. @@ -240,12 +240,12 @@ Each capability has its own name and scope: Allows to remove attribute from object. To remove attribute, you need to have ``set`` permission for key. Combined with ``adding_all_attributes``\ , allows to remove all attributes. -* +* **unlimited_requests - API requests are not rate-limited for this group** Disables rate limiting for users from that group, if rate limiting feature is enabled. -* +* **removing_objects - Can remove objects** Can remove all accessible objects from the MWDB. May be quite destructive, we suggest to keep that capability enabled only for ``admin`` account. @@ -260,7 +260,7 @@ Each capability has its own name and scope: Allows to use personalization features like favorites or quick queries. -* +* **karton_assign - Can assign existing analysis to the object** Allows to assign Karton analysis to the object by setting ``karton`` attribute or using dedicated API. @@ -269,6 +269,21 @@ Each capability has its own name and scope: **karton_reanalyze - Can resubmit any object for analysis** Can manually resubmit object to Karton. +* + **access_related_files - Can view and download RelatedFiles** + + Allows to view list of RelatedFiles and download them. + +* + **adding_related_files - Can upload new RelatedFiles** + + Allows to upload new RelatedFiles. + +* + **removing_related_files - removing_related_files** + + Allows to remove existing RelatedFiles. + User capabilities are the sum of all group capabilities. If you want to enable capability system-wide (e.g. enable all users to add tags), enable that capability for ``registered`` group or ``public`` group if you want to include guests. diff --git a/mwdb/app.py b/mwdb/app.py index c68d80fd5..05e06d638 100755 --- a/mwdb/app.py +++ b/mwdb/app.py @@ -44,6 +44,9 @@ FileDownloadZipResource, FileItemResource, FileResource, + RelatedFileItemResource, + RelatedFileResource, + RelatedFileZipDownloadResource, ) from mwdb.resources.group import GroupListResource, GroupMemberResource, GroupResource from mwdb.resources.karton import KartonAnalysisResource, KartonObjectResource @@ -263,6 +266,22 @@ def require_auth(): api.add_resource(FileDownloadResource, "/file//download") api.add_resource(FileDownloadZipResource, "/file//download/zip") +# RelatedFiles endpoints +api.add_resource( + RelatedFileResource, + "///related_file", +) +api.add_resource( + RelatedFileItemResource, + "//" + "/related_file/", +) +api.add_resource( + RelatedFileZipDownloadResource, + "//" + "/related_file/zip", +) + # Config endpoints api.add_resource(ConfigResource, "/config") api.add_resource(ConfigStatsResource, "/config/stats") diff --git a/mwdb/core/capabilities.py b/mwdb/core/capabilities.py index a426bf42e..d0b5f5450 100644 --- a/mwdb/core/capabilities.py +++ b/mwdb/core/capabilities.py @@ -47,6 +47,12 @@ class Capabilities(object): karton_reanalyze = "karton_reanalyze" # Can remove Karton analysis from the object karton_unassign = "karton_unassign" + # Can view and download RelatedFiles + access_related_files = "access_related_files" + # Can upload new RelatedFiles + adding_related_files = "adding_related_files" + # Can remove existing RelatedFiles + removing_related_files = "removing_related_files" @classmethod def all(cls): diff --git a/mwdb/core/file_util.py b/mwdb/core/file_util.py new file mode 100644 index 000000000..bd8320d4e --- /dev/null +++ b/mwdb/core/file_util.py @@ -0,0 +1,102 @@ +import io +import os +import shutil +import tempfile + +from mwdb.core.config import StorageProviderType, app_config +from mwdb.core.util import get_s3_client + + +def write_to_storage(file_stream, file_object): + file_stream.seek(0, os.SEEK_SET) + if app_config.mwdb.storage_provider == StorageProviderType.S3: + get_s3_client( + app_config.mwdb.s3_storage_endpoint, + app_config.mwdb.s3_storage_access_key, + app_config.mwdb.s3_storage_secret_key, + app_config.mwdb.s3_storage_region_name, + app_config.mwdb.s3_storage_secure, + app_config.mwdb.s3_storage_iam_auth, + ).put_object( + Bucket=app_config.mwdb.s3_storage_bucket_name, + Key=file_object._calculate_path(), + Body=file_stream, + ) + elif app_config.mwdb.storage_provider == StorageProviderType.DISK: + with open(file_object._calculate_path(), "wb") as f: + shutil.copyfileobj(file_stream, f) + else: + raise RuntimeError( + f"StorageProvider {app_config.mwdb.storage_provider} " f"is not supported" + ) + + +def get_from_storage(file_object): + if app_config.mwdb.storage_provider == StorageProviderType.S3: + # Stream coming from Boto3 get_object is not buffered and not seekable. + # We need to download it to the temporary file first. + stream = tempfile.TemporaryFile(mode="w+b") + try: + get_s3_client( + app_config.mwdb.s3_storage_endpoint, + app_config.mwdb.s3_storage_access_key, + app_config.mwdb.s3_storage_secret_key, + app_config.mwdb.s3_storage_region_name, + app_config.mwdb.s3_storage_secure, + app_config.mwdb.s3_storage_iam_auth, + ).download_fileobj( + Bucket=app_config.mwdb.s3_storage_bucket_name, + Key=file_object._calculate_path(), + Fileobj=stream, + ) + stream.seek(0, io.SEEK_SET) + return stream + except Exception: + stream.close() + raise + elif app_config.mwdb.storage_provider == StorageProviderType.DISK: + return open(file_object._calculate_path(), "rb") + else: + raise RuntimeError( + f"StorageProvider {app_config.mwdb.storage_provider} is not supported" + ) + + +def delete_from_storage(file_object): + if app_config.mwdb.storage_provider == StorageProviderType.S3: + get_s3_client( + app_config.mwdb.s3_storage_endpoint, + app_config.mwdb.s3_storage_access_key, + app_config.mwdb.s3_storage_secret_key, + app_config.mwdb.s3_storage_region_name, + app_config.mwdb.s3_storage_secure, + app_config.mwdb.s3_storage_iam_auth, + ).delete_object( + Bucket=app_config.mwdb.s3_storage_bucket_name, + Key=file_object._calculate_path(), + ) + elif app_config.mwdb.storage_provider == StorageProviderType.DISK: + os.remove(file_object._calculate_path()) + else: + raise RuntimeError( + f"StorageProvider {app_config.mwdb.storage_provider} " f"is not supported" + ) + + +def iterate_buffer(file_object, chunk_size=1024 * 256): + """ + Iterates over bytes in the file contents + """ + fh = file_object.open() + try: + if hasattr(fh, "stream"): + yield from fh.stream(chunk_size) + else: + while True: + chunk = fh.read(chunk_size) + if chunk: + yield chunk + else: + return + finally: + fh.close() diff --git a/mwdb/core/search/fields.py b/mwdb/core/search/fields.py index 82d410a97..99e11cd79 100644 --- a/mwdb/core/search/fields.py +++ b/mwdb/core/search/fields.py @@ -19,6 +19,7 @@ Member, Object, ObjectPermission, + RelatedFile, TextBlob, User, db, @@ -766,3 +767,85 @@ def _get_condition( else: condition = or_((self.column == value), File.alt_names.any(value)) return condition + + +class RelatedField(BaseField): + accepts_range = True + accepts_subfields = True + accepts_wildcards = True + + def count_condition(self, expression): + def parse_count_value(value): + try: + value = int(value) + if value < 0: + raise ValueError + except ValueError: + raise UnsupportedGrammarException( + "Field related.count accepts statements with " + "only correct integer values" + ) + return value + + if isinstance(expression, Range): + low_value = expression.low.value + high_value = expression.high.value + + if low_value != "*": + low_value = parse_count_value(low_value) + if high_value != "*": + high_value = parse_count_value(high_value) + + low_condition = ( + func.count() >= low_value + if expression.include_low + else func.count() > low_value + ) + high_condition = ( + func.count() <= high_value + if expression.include_high + else func.count() < high_value + ) + + if high_value == "*" and low_value == "*": + condition = True + elif high_value == "*": + condition = low_condition + elif low_value == "*": + condition = high_condition + else: + condition = and_(low_condition, high_condition) + else: + upload_value = parse_count_value(expression.value) + condition = func.count() == upload_value + + related = ( + db.session.query(RelatedFile.object_id) + .group_by(RelatedFile.object_id) + .having(condition) + ).all() + found_ids = [r[0] for r in related] + return RelatedFile.object_id.in_(found_ids) + + def _get_condition( + self, expression: Expression, subfields: List[Tuple[str, int]] + ) -> Any: + if len(subfields) <= 1: # [('related', 0)] + raise FieldNotQueryableException("One subfield is required") + if len(subfields) > 2: + raise FieldNotQueryableException(f"Too many subfields: {len(subfields)}") + + key = subfields[1][0] + + if key == "name": + field = StringField(RelatedFile.file_name) + elif key == "size": + field = SizeField(RelatedFile.file_size) + elif key == "sha256": + field = StringField(RelatedFile.sha256) + elif key == "count": + return self.column.any(self.count_condition(expression)) + else: + raise FieldNotQueryableException(f"No such subfield: {key}") + + return self.column.any(field._get_condition(expression, subfields)) diff --git a/mwdb/core/search/mappings.py b/mwdb/core/search/mappings.py index 27e8ce623..700597eba 100644 --- a/mwdb/core/search/mappings.py +++ b/mwdb/core/search/mappings.py @@ -23,6 +23,7 @@ JSONField, ListField, MultiField, + RelatedField, RelationField, ShareField, SharerField, @@ -58,6 +59,7 @@ "karton": UUIDField(Object.analyses, KartonAnalysis.id), "comment_author": CommentAuthorField(Object.comment_authors, User.login), "upload_count": UploadCountField(Object.upload_count), + "related": RelatedField(Object.related_files), }, File.__name__: { "name": FileNameField(File.file_name), diff --git a/mwdb/model/__init__.py b/mwdb/model/__init__.py index 2d6323e44..443f33ffc 100644 --- a/mwdb/model/__init__.py +++ b/mwdb/model/__init__.py @@ -47,7 +47,7 @@ def after_cursor_execute(conn, cursor, statement, parameters, context, executema from .blob import TextBlob # noqa: E402 from .comment import Comment # noqa: E402 from .config import Config, StaticConfig # noqa: E402 -from .file import File # noqa: E402 +from .file import File, RelatedFile # noqa: E402 from .group import Group, Member # noqa: E402 from .karton import KartonAnalysis, karton_object # noqa: E402 from .oauth import OpenIDProvider, OpenIDUserIdentity # noqa: E402 @@ -76,6 +76,7 @@ def after_cursor_execute(conn, cursor, statement, parameters, context, executema "OpenIDProvider", "OpenIDUserIdentity", "relation", + "RelatedFile", "QuickQuery", "Tag", "User", diff --git a/mwdb/model/file.py b/mwdb/model/file.py index a6914cbe8..32a27ec4e 100644 --- a/mwdb/model/file.py +++ b/mwdb/model/file.py @@ -6,22 +6,22 @@ import pyzipper from Cryptodome.Util.strxor import strxor_c -from sqlalchemy import or_ +from flask import g +from sqlalchemy import not_, or_ from sqlalchemy.dialects.postgresql.array import ARRAY from sqlalchemy.ext.mutable import MutableList from werkzeug.utils import secure_filename from mwdb.core.auth import AuthScope, generate_token, verify_token from mwdb.core.config import StorageProviderType, app_config -from mwdb.core.karton import send_file_to_karton -from mwdb.core.util import ( - calc_crc32, - calc_hash, - calc_magic, - calc_ssdeep, - get_fd_path, - get_s3_client, +from mwdb.core.file_util import ( + delete_from_storage, + get_from_storage, + iterate_buffer, + write_to_storage, ) +from mwdb.core.karton import send_file_to_karton +from mwdb.core.util import calc_crc32, calc_hash, calc_magic, calc_ssdeep, get_fd_path from . import db from .object import Object @@ -126,28 +126,7 @@ def get_or_create( file_obj.alt_names.append(original_filename) if is_new: - file_stream.seek(0, os.SEEK_SET) - if app_config.mwdb.storage_provider == StorageProviderType.S3: - get_s3_client( - app_config.mwdb.s3_storage_endpoint, - app_config.mwdb.s3_storage_access_key, - app_config.mwdb.s3_storage_secret_key, - app_config.mwdb.s3_storage_region_name, - app_config.mwdb.s3_storage_secure, - app_config.mwdb.s3_storage_iam_auth, - ).put_object( - Bucket=app_config.mwdb.s3_storage_bucket_name, - Key=file_obj._calculate_path(), - Body=file_stream, - ) - elif app_config.mwdb.storage_provider == StorageProviderType.DISK: - with open(file_obj._calculate_path(), "wb") as f: - shutil.copyfileobj(file_stream, f) - else: - raise RuntimeError( - f"StorageProvider {app_config.mwdb.storage_provider} " - f"is not supported" - ) + write_to_storage(file_stream, file_obj) file_obj.upload_stream = file_stream return file_obj, is_new @@ -223,34 +202,7 @@ def open(self): stream = os.fdopen(dupfd, "rb") stream.seek(0, os.SEEK_SET) return stream - if app_config.mwdb.storage_provider == StorageProviderType.S3: - # Stream coming from Boto3 get_object is not buffered and not seekable. - # We need to download it to the temporary file first. - stream = tempfile.TemporaryFile(mode="w+b") - try: - get_s3_client( - app_config.mwdb.s3_storage_endpoint, - app_config.mwdb.s3_storage_access_key, - app_config.mwdb.s3_storage_secret_key, - app_config.mwdb.s3_storage_region_name, - app_config.mwdb.s3_storage_secure, - app_config.mwdb.s3_storage_iam_auth, - ).download_fileobj( - Bucket=app_config.mwdb.s3_storage_bucket_name, - Key=self._calculate_path(), - Fileobj=stream, - ) - stream.seek(0, io.SEEK_SET) - return stream - except Exception: - stream.close() - raise - elif app_config.mwdb.storage_provider == StorageProviderType.DISK: - return open(self._calculate_path(), "rb") - else: - raise RuntimeError( - f"StorageProvider {app_config.mwdb.storage_provider} is not supported" - ) + return get_from_storage(self) def read(self): """ @@ -355,3 +307,206 @@ def get_by_download_token(download_token): def _send_to_karton(self): return send_file_to_karton(self) + + +class RelatedFile(db.Model): + __tablename__ = "related_file" + + id = db.Column(db.Integer, primary_key=True) + object_id = db.Column( + db.Integer, + db.ForeignKey("object.id", ondelete="CASCADE"), + nullable=False, + ) + file_name = db.Column(db.String, nullable=False) + file_size = db.Column(db.Integer, nullable=False) + sha256 = db.Column(db.String, nullable=False) + + related_object = db.relationship( + "Object", + back_populates="related_files", + lazy=True, + ) + + def _calculate_path(self): + if app_config.mwdb.storage_provider == StorageProviderType.DISK: + # upload_path = ( + # "related_files" + # if app_config.mwdb.related_files_folder == "" + # else app_config.mwdb.related_files_folder + "/related_files" + # ) + upload_path = "/app/uploads/related_files" + elif app_config.mwdb.storage_provider == StorageProviderType.S3: + upload_path = "related_files/" + else: + raise RuntimeError( + f"StorageProvider {app_config.mwdb.storage_provider} is not supported" + ) + + sample_sha256 = self.sha256.lower() + + if app_config.mwdb.hash_pathing: + # example: related_files/9/f/8/6/9f86d0818... + upload_path = os.path.join(upload_path, *list(sample_sha256)[0:4]) + + if app_config.mwdb.storage_provider == StorageProviderType.DISK: + upload_path = os.path.abspath(upload_path) + os.makedirs(upload_path, mode=0o755, exist_ok=True) + + return os.path.join(upload_path, sample_sha256) + + @classmethod + def create( + cls, + file_name, + file_stream, + main_obj_dhash, + ): + file_stream.seek(0, os.SEEK_END) + file_size = file_stream.tell() + if file_size == 0: + raise EmptyFileError + + sha256 = calc_hash(file_stream, hashlib.sha256(), lambda h: h.hexdigest()) + + main_obj = ( + db.session.query(Object).filter(Object.dhash == main_obj_dhash).first() + ) + # If main file doesn't exist or no access + if main_obj is None or not main_obj.has_explicit_access(g.auth_user): + raise ValueError( + "There is no object with this sha256 or you don't have access" + ) + + is_new = True + new_related_file = ( + db.session.query(RelatedFile).filter(RelatedFile.sha256 == sha256).first() + ) + # If RelatedFile already exists + if new_related_file is not None: + is_new = False + new_related_file = ( + db.session.query(RelatedFile) + .filter(RelatedFile.sha256 == sha256) + .filter(RelatedFile.object_id == main_obj.id) + .first() + ) + # If RelatedFile related to main_obj already exists + if new_related_file is not None: + raise FileExistsError("Related file with this sha256 already exists") + + new_related_file = RelatedFile( + object_id=main_obj.id, + file_name=secure_filename(file_name), + file_size=file_size, + sha256=sha256, + ) + + if is_new: + write_to_storage(file_stream, new_related_file) + + db.session.add(new_related_file) + db.session.commit() + + @classmethod + def access(cls, main_obj_identifier, identifier): + main_obj = ( + db.session.query(Object).filter(Object.dhash == main_obj_identifier).first() + ) + if main_obj is None: + raise ValueError( + "There is no object with this sha256 or you don't have access" + ) + if not main_obj.has_explicit_access(g.auth_user): + raise ValueError( + "There is no object with this sha256 or you don't have access" + ) + + related_file = ( + db.session.query(RelatedFile) + .filter(RelatedFile.sha256 == identifier) + .filter(RelatedFile.object_id == main_obj.id) + .first() + ) + # Empty list - no such RelatedFile + if related_file is None: + raise ValueError( + "There is no object with this sha256 or you don't have access" + ) + + return related_file + + @classmethod + def delete(cls, main_obj_identifier, identifier): + main_obj = ( + db.session.query(Object).filter(Object.dhash == main_obj_identifier).first() + ) + if main_obj is None: + raise ValueError( + "There is no object with this sha256 or you don't have access" + ) + if not main_obj.has_explicit_access(g.auth_user): + raise ValueError( + "There is no object with this sha256 or you don't have access" + ) + + related_file_obj = ( + db.session.query(RelatedFile) + .filter(RelatedFile.sha256 == identifier) + .filter(RelatedFile.object_id == main_obj.id) + .first() + ) + + if related_file_obj is None: + raise ValueError( + "There is no object with this sha256 or you don't have access" + ) + + is_last = False + other_related_file_obj = ( + db.session.query(RelatedFile) + .filter(RelatedFile.sha256 == identifier) + .filter(not_(RelatedFile.object_id == main_obj.id)) + .first() + ) + if other_related_file_obj is None: + is_last = True + + if is_last: + delete_from_storage(related_file_obj) + + db.session.delete(related_file_obj) + db.session.commit() + return + + def open(self): + """ + Opens the related file stream with contents. + """ + return get_from_storage(self) + + def iterate(self, chunk_size=1024 * 256): + """ + Iterates over bytes in the file contents + """ + return iterate_buffer(self, chunk_size) + + @classmethod + def zip_all(cls, main_obj_identifier): + main_obj = ( + db.session.query(Object).filter(Object.dhash == main_obj_identifier).first() + ) + if main_obj is None: + raise ValueError( + "There is no object with this sha256 or you don't have access" + ) + related_files = ( + db.session.query(RelatedFile).filter(RelatedFile.object_id == main_obj.id) + ).all() + + with tempfile.NamedTemporaryFile() as temp_file: + with open(temp_file.name, "rb") as reader: + with pyzipper.ZipFile(temp_file, "w") as zip_file: + for rf in related_files: + zip_file.write(rf._calculate_path(), arcname=rf.file_name) + return reader.read() diff --git a/mwdb/model/migrations/versions/02f584212ea5_assign_new_capabilities_related_files.py b/mwdb/model/migrations/versions/02f584212ea5_assign_new_capabilities_related_files.py new file mode 100644 index 000000000..5da6d05bf --- /dev/null +++ b/mwdb/model/migrations/versions/02f584212ea5_assign_new_capabilities_related_files.py @@ -0,0 +1,60 @@ +"""assign new capabilities for related files + +Revision ID: 02f584212ea5 +Revises: 3c610b0ddebc +Create Date: 2022-12-14 12:03:22.613573 + +""" +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "02f584212ea5" +down_revision = "3c610b0ddebc" +branch_labels = None +depends_on = None + + +def upgrade(): + op.execute( + """ + UPDATE public.group + SET capabilities = array_append(capabilities, 'access_related_files') + WHERE name='public' OR array_position(capabilities, 'manage_users') IS NOT NULL; + """ + ) + op.execute( + """ + UPDATE public.group + SET capabilities = array_append(capabilities, 'adding_related_files') + WHERE name='registered' OR array_position(capabilities, 'manage_users') IS NOT NULL; + """ + ) + op.execute( + """ + UPDATE public.group + SET capabilities = array_append(capabilities, 'removing_related_files') + WHERE array_position(capabilities, 'manage_users') IS NOT NULL; + """ + ) + + +def downgrade(): + op.execute( + """ + UPDATE public.group + SET capabilities = array_remove(capabilities, 'access_related_files'); + """ + ) + op.execute( + """ + UPDATE public.group + SET capabilities = array_remove(capabilities, 'adding_related_files'); + """ + ) + op.execute( + """ + UPDATE public.group + SET capabilities = array_remove(capabilities, 'removing_related_files'); + """ + ) diff --git a/mwdb/model/migrations/versions/3c610b0ddebc_add_related_files_table.py b/mwdb/model/migrations/versions/3c610b0ddebc_add_related_files_table.py new file mode 100644 index 000000000..3286fb65b --- /dev/null +++ b/mwdb/model/migrations/versions/3c610b0ddebc_add_related_files_table.py @@ -0,0 +1,35 @@ +"""add related_files table + +Revision ID: 3c610b0ddebc +Revises: bd93d1497694 +Create Date: 2022-12-12 09:02:40.406370 + +""" +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "3c610b0ddebc" +down_revision = "bd93d1497694" +branch_labels = None +depends_on = None + + +def upgrade(): + op.create_table( + "related_file", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("object_id", sa.Integer(), nullable=False), + sa.Column("file_name", sa.String(), nullable=False), + sa.Column("file_size", sa.Integer, nullable=False), + sa.Column("sha256", sa.String(length=64), nullable=False), + sa.ForeignKeyConstraint( + ["object_id"], + ["object.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + + +def downgrade(): + op.drop_table("related_file") diff --git a/mwdb/model/object.py b/mwdb/model/object.py index 0db0e879f..d89f6a2cd 100644 --- a/mwdb/model/object.py +++ b/mwdb/model/object.py @@ -314,6 +314,12 @@ class Object(db.Model): lazy="joined", cascade="save-update, merge, delete", ) + related_files = db.relationship( + "RelatedFile", + back_populates="related_object", + lazy=True, + cascade="save-update, merge, delete", + ) followers = db.relationship( "User", secondary=favorites, back_populates="favorites", lazy="joined" diff --git a/mwdb/resources/file.py b/mwdb/resources/file.py index 9500670e5..c8f3fddbe 100644 --- a/mwdb/resources/file.py +++ b/mwdb/resources/file.py @@ -5,7 +5,7 @@ from mwdb.core.capabilities import Capabilities from mwdb.core.plugins import hooks from mwdb.core.rate_limit import rate_limited_resource -from mwdb.model import File +from mwdb.model import File, Object, RelatedFile, db from mwdb.model.file import EmptyFileError from mwdb.model.object import ObjectTypeConflictError from mwdb.schema.file import ( @@ -14,6 +14,7 @@ FileItemResponseSchema, FileLegacyCreateRequestSchema, FileListResponseSchema, + RelatedFileResponseSchema, ) from . import load_schema, requires_authorization, requires_capabilities @@ -613,3 +614,324 @@ def post(self, identifier): download_token = file.generate_download_token() schema = FileDownloadTokenResponseSchema() return schema.dump({"token": download_token}) + + +@rate_limited_resource +class RelatedFileResource(Resource): + def get(self, type, main_obj_identifier): + """ + --- + summary: Get list of related files + description: | + Returns list of related files for an object specified by sha256 + security: + - bearerAuth: [] + tags: + - related_file + parameters: + - in: path + name: type + schema: + type: string + enum: [file, config, blob, object] + description: Type of object + - in: path + name: main_obj_identifier + schema: + type: string + description: Main object identifier (SHA256) + required: true + responses: + 200: + description: List of related files + content: + application/json: + schema: RelatedFileResponseSchema + 404: + description: | + There is no object with provided sha256 + or user doesn't have access to it + 503: + description: | + Request canceled due to database statement timeout. + """ + main_object = ( + db.session.query(Object) + .filter(Object.dhash == main_obj_identifier) + .filter(g.auth_user.has_access_to_object(Object.id)) + ).first() + + if main_object is None: + raise NotFound( + "There is no object with provided sha256 or you don't have access to it" + ) + + if g.auth_user.has_rights(Capabilities.access_related_files): + related_files = ( + db.session.query(RelatedFile) + .filter(RelatedFile.object_id == main_object.id) + .all() + ) + else: + related_files = [] + + schema = RelatedFileResponseSchema() + return schema.dump({"related_files": related_files}) + + @requires_authorization + @requires_capabilities(Capabilities.adding_related_files) + def post(self, type, main_obj_identifier): + """ + --- + summary: Upload related file + description: | + Uploads a new related file. + + Requires `adding_related_files` capability. + security: + - bearerAuth: [] + tags: + - related_file + parameters: + - in: path + name: type + schema: + type: string + enum: [file, config, blob, object] + description: Type of object + - in: path + name: main_obj_identifier + schema: + type: string + description: Main object identifier (SHA256) + required: true + requestBody: + required: true + content: + multipart/form-data: + schema: + type: object + properties: + file: + type: string + format: binary + description: Related file contents to be uploaded + required: + - file + responses: + 200: + description: OK + 400: + description: Related file is empty + 403: + description: When user doesn't have `adding_related_files` capability + 404: + description: | + There is no object with provided sha256 + or user doesn't have access to it + 409: + description: Such related file already exists + 503: + description: | + Request canceled due to database statement timeout. + """ + try: + RelatedFile.create( + request.files["file"].filename, + request.files["file"].stream, + main_obj_identifier, + ) + except EmptyFileError: + raise BadRequest("Related file cannot be empty") + except ValueError: + raise NotFound( + "There is no object with provided sha256 or you don't have access to it" + ) + except FileExistsError: + raise Conflict("Such related file already exists") + + return Response("OK") + + +class RelatedFileItemResource(Resource): + @requires_authorization + @requires_capabilities(Capabilities.access_related_files) + def get(self, type, main_obj_identifier, identifier): + """ + --- + summary: Download related file + description: | + Returns related file contents. + + Requires `access_related_files` capability. + security: + - bearerAuth: [] + tags: + - related_file + parameters: + - in: path + name: type + schema: + type: string + enum: [file, config, blob, object] + description: Type of object + - in: path + name: main_obj_identifier + required: true + schema: + type: string + description: Main object identifier (SHA256) + - in: path + name: identifier + required: true + schema: + type: string + description: Related file identifier (SHA256) + responses: + 200: + description: Related file contents + content: + application/octet-stream: + schema: + type: string + format: binary + 403: + description: When user doesn't have `access_related_files` capability + 404: + description: | + When related file doesn't exist + or user doesn't have access to it. + 503: + description: | + Request canceled due to database statement timeout. + """ + + if not g.auth_user: + raise Unauthorized("Not authenticated.") + + try: + related_file_obj = RelatedFile.access(main_obj_identifier, identifier) + except ValueError: + raise NotFound("Related file not found") + + return Response( + related_file_obj.iterate(), + content_type="application/octet-stream", + headers={ + "Content-disposition": f"attachment; filename={related_file_obj.sha256}" + }, + ) + + @requires_authorization + @requires_capabilities(Capabilities.removing_related_files) + def delete(self, type, main_obj_identifier, identifier): + """ + --- + summary: Delete related file + description: | + Removes a related file from the database. + + Requires `removing_related_files` capability. + security: + - bearerAuth: [] + tags: + - related_file + parameters: + - in: path + name: type + schema: + type: string + enum: [file, config, blob, object] + description: Type of object + - in: path + name: main_obj_identifier + required: true + schema: + type: string + description: Main object identifier (SHA256) + - in: path + name: identifier + required: true + schema: + type: string + description: Related file identifier (SHA256) + responses: + 200: + description: When related file was deleted + 403: + description: When user doesn't have `removing_related_files` capability + 404: + description: | + When related file doesn't exist + or user doesn't have access to it. + 503: + description: | + Request canceled due to database statement timeout. + """ + try: + RelatedFile.delete(main_obj_identifier, identifier) + except ValueError: + raise NotFound( + "There is no file with provided sha256 or you don't have access to it" + ) + + return Response("OK") + + +class RelatedFileZipDownloadResource(Resource): + def get(self, type, main_obj_identifier): + """ + --- + summary: Download every related file for a provided main object + description: | + Returns zipped related file contents. + + Requires `access_related_files` capability. + security: + - bearerAuth: [] + tags: + - related_file + parameters: + - in: path + name: type + schema: + type: string + enum: [file, config, blob, object] + description: Type of object + - in: path + name: main_obj_identifier + required: true + schema: + type: string + description: Main object identifier (SHA256) + responses: + 200: + description: Contents of related file + content: + application/octet-stream: + schema: + type: string + format: binary + 403: + description: When user doesn't have `access_related_files` capability + 404: + description: | + There is no object with provided sha256 + or user doesn't have access to it + 503: + description: | + Request canceled due to database statement timeout. + """ + try: + zipped_related_files = RelatedFile.zip_all(main_obj_identifier) + except ValueError: + raise NotFound( + "There is no object with provided sha256 or you don't have access to it" + ) + + zip_file_name = "related_files_" + main_obj_identifier + ".zip" + return Response( + zipped_related_files, + content_type="application/octet-stream", + headers={"Content-disposition": f"attachment; filename={zip_file_name}"}, + ) diff --git a/mwdb/schema/file.py b/mwdb/schema/file.py index 8825135ce..b0a9114f3 100644 --- a/mwdb/schema/file.py +++ b/mwdb/schema/file.py @@ -73,3 +73,15 @@ class FileItemResponseSchema(ObjectItemResponseSchema): class FileDownloadTokenResponseSchema(Schema): token = fields.Str(required=True, allow_none=False) + + +class RelatedFileItemResponseSchema(Schema): + file_name = fields.Str(required=True, allow_none=False) + file_size = fields.Int(required=True, allow_none=False) + sha256 = fields.Str(required=True, allow_none=False) + + +class RelatedFileResponseSchema(Schema): + related_files = fields.Nested( + RelatedFileItemResponseSchema, many=True, required=True, allow_none=False + ) diff --git a/mwdb/web/src/commons/api/index.jsx b/mwdb/web/src/commons/api/index.jsx index f5d0d6487..114329e07 100644 --- a/mwdb/web/src/commons/api/index.jsx +++ b/mwdb/web/src/commons/api/index.jsx @@ -444,6 +444,32 @@ function uploadFile(file, parent, upload_as, attributes, fileUploadTimeout) { return axios.post(`/file`, formData, { timeout: fileUploadTimeout }); } +function uploadRelatedFile(file, mainFileDhash, type = "object") { + let formData = new FormData(); + formData.append("file", file); + return axios.post(`/${type}/${mainFileDhash}/related_file`, formData); +} + +function downloadRelatedFile(mainFileDhash, id, type = "object") { + return axios.get(`/${type}/${mainFileDhash}/related_file/${id}`, { + responseType: "arraybuffer", + responseEncoding: "binary", + }); +} + +function deleteRelatedFile(mainFileDhash, id, type = "object") { + return axios.delete(`/${type}/${mainFileDhash}/related_file/${id}`); +} + +function getListOfRelatedFiles(mainFileDhash, type = "object") { + return axios.get(`/${type}/${mainFileDhash}/related_file`); +} + +function getZippedRelatedFilesLink(mainFileDhash, type = "object") { + const baseURL = getApiForEnvironment(); + return `${baseURL}/${type}/${mainFileDhash}/related_file/zip`; +} + function getRemoteNames() { return axios.get("/remote"); } @@ -627,6 +653,11 @@ export const api = { requestFileDownloadLink, requestZipFileDownloadLink, uploadFile, + uploadRelatedFile, + downloadRelatedFile, + deleteRelatedFile, + getListOfRelatedFiles, + getZippedRelatedFilesLink, getRemoteNames, pushObjectRemote, pullObjectRemote, diff --git a/mwdb/web/src/commons/auth/capabilities.jsx b/mwdb/web/src/commons/auth/capabilities.jsx index 822fd8239..f7dac5b6b 100644 --- a/mwdb/web/src/commons/auth/capabilities.jsx +++ b/mwdb/web/src/commons/auth/capabilities.jsx @@ -25,6 +25,9 @@ export const Capability = { kartonAssign: "karton_assign", kartonReanalyze: "karton_reanalyze", removingKarton: "karton_unassign", + accessRelatedFiles: "access_related_files", + addingRelatedFiles: "adding_related_files", + removingRelatedFiles: "removing_related_files", }; export let capabilitiesList = { @@ -59,6 +62,9 @@ export let capabilitiesList = { "Can assign existing analysis to the object (required by karton-mwdb-reporter)", [Capability.kartonReanalyze]: "Can resubmit any object for analysis", [Capability.removingKarton]: "Can remove analysis from object", + [Capability.accessRelatedFiles]: "Can view and download related files", + [Capability.addingRelatedFiles]: "Can upload new related files", + [Capability.removingRelatedFiles]: "Can remove existing related files", }; afterPluginsLoaded(() => { diff --git a/mwdb/web/src/components/ShowObject/Views/RelatedFilesTab.jsx b/mwdb/web/src/components/ShowObject/Views/RelatedFilesTab.jsx new file mode 100644 index 000000000..89041d668 --- /dev/null +++ b/mwdb/web/src/components/ShowObject/Views/RelatedFilesTab.jsx @@ -0,0 +1,271 @@ +import React, { useCallback, useContext, useEffect, useState } from "react"; +import { Link } from "react-router-dom"; +import { toast } from "react-toastify"; + +import { FontAwesomeIcon } from "@fortawesome/react-fontawesome"; +import { + faPlus, + faExternalLinkSquare, + faDownload, + faTrash, + faArchive, +} from "@fortawesome/free-solid-svg-icons"; + +import { APIContext } from "@mwdb-web/commons/api"; +import { ObjectContext } from "@mwdb-web/commons/context"; +import { + ObjectAction, + ObjectTab, + ConfirmationModal, + getErrorMessage, +} from "@mwdb-web/commons/ui"; +import { humanFileSize, downloadData } from "@mwdb-web/commons/helpers"; +import ReactModal from "react-modal"; + +async function updateRelatedFiles(api, context) { + const { updateObjectData } = context; + try { + let response = await api.getListOfRelatedFiles(context.object.sha256); + updateObjectData({ + related_files: response.data.related_files, + }); + } catch (error) { + toast(getErrorMessage(error), { + type: "error", + }); + } +} + +function RelatedFileItem({ file_name, file_size, sha256 }) { + const api = useContext(APIContext); + const context = useContext(ObjectContext); + const [isConfirmationModalOpen, setConfirmationModalOpen] = useState(null); + const linkStyle = { + display: "inline-block", + padding: "8px", + }; + const tdStyle = { + padding: "10px 20px 10px 20px", + width: "80%", + borderRight: "none", + borderLeft: "none", + }; + + return ( + + + {file_name} +
+ {humanFileSize(file_size)} + + + { + let content = await api.downloadRelatedFile( + context.object.sha256, + sha256 + ); + downloadData( + content.data, + file_name, + "application/octet-stream" + ); + }} + > + + + setConfirmationModalOpen(true)} + > + + + { + setConfirmationModalOpen(false); + }} + onConfirm={async () => { + try { + await api.deleteRelatedFile( + context.object.sha256, + sha256 + ); + toast("Related file deleted successfully", { + type: "success", + }); + } catch (error) { + toast(getErrorMessage(error), { + type: "error", + }); + } + updateRelatedFiles(api, context); + setConfirmationModalOpen(false); + }} + message={`Are you sure you want to delete this related file?`} + buttonStyle="btn-success" + confirmText="Yes" + /> + + + ); +} + +function ShowRelatedFiles() { + const api = useContext(APIContext); + const context = useContext(ObjectContext); + const { updateObjectData } = context; + + const getRelatedFiles = useCallback(updateRelatedFiles, [ + api, + updateObjectData, + context.object.sha256, + ]); + + // JS throws a warning "Line ***: React Hook useEffect has missing dependencies: 'api' and 'context'" + // Those dependencies are skipped on purpose + // To disable this warning I used 'eslint-disable-next-line' + useEffect(() => { + getRelatedFiles(api, context); + // eslint-disable-next-line + }, [getRelatedFiles]); + + if (!context.object.related_files) { + return
Loading...
; + } + if (context.object.related_files.length === 0) { + return
Nothing to show here
; + } + + return ( + + + + + + {context.object.related_files.map((related_file) => ( + + ))} +
+ File + + Actions +
+ ); +} + +export default function RelatedFilesTab() { + const [showModal, setShowModal] = useState(); + const [file, setFile] = useState(null); + const context = useContext(ObjectContext); + const api = useContext(APIContext); + + const modalStyle = { + content: { + top: "50%", + left: "50%", + right: "auto", + bottom: "auto", + marginRight: "-50%", + transform: "translate(-50%, -50%)", + }, + }; + + async function handleSubmit() { + try { + await api.uploadRelatedFile(file, context.object.sha256); + updateRelatedFiles(api, context); + toast("Related file uploaded successfully", { + type: "success", + }); + } catch (error) { + toast(getErrorMessage(error), { + type: "error", + }); + } + } + + async function handleDownloadAll() { + window.location.href = await api.getZippedRelatedFilesLink( + context.object.sha256 + ); + } + + return ( +
+ { + setShowModal(true); + }} + />, + , + ]} + /> + { + setShowModal(false); + }} + style={modalStyle} + > +
{ + handleSubmit(); + setShowModal(false); + }} + > + setFile(event.target.files[0])} + /> + +
+
+
+ ); +} diff --git a/mwdb/web/src/components/ShowObject/index.jsx b/mwdb/web/src/components/ShowObject/index.jsx index e8be8f093..afb75a817 100644 --- a/mwdb/web/src/components/ShowObject/index.jsx +++ b/mwdb/web/src/components/ShowObject/index.jsx @@ -1,6 +1,7 @@ export { default as ShowObject } from "./ShowObject"; export { default as RelationsTab } from "./Views/RelationsTab"; +export { default as RelatedFilesTab } from "./Views/RelatedFilesTab"; export { default as LatestConfigTab } from "./Views/LatestConfigTab"; export { default as DownloadAction } from "./Actions/DownloadAction"; diff --git a/mwdb/web/src/components/ShowSample.jsx b/mwdb/web/src/components/ShowSample.jsx index 75cef20b3..077f43f5c 100644 --- a/mwdb/web/src/components/ShowSample.jsx +++ b/mwdb/web/src/components/ShowSample.jsx @@ -8,6 +8,7 @@ import { useTabContext, LatestConfigTab, RelationsTab, + RelatedFilesTab, DownloadAction, ZipAction, FavoriteAction, @@ -349,6 +350,7 @@ export default function ShowSample(props) { , ]} /> + ); diff --git a/tests/backend/test_permissions.py b/tests/backend/test_permissions.py index 5ddfbc4ae..cbe0ac059 100644 --- a/tests/backend/test_permissions.py +++ b/tests/backend/test_permissions.py @@ -239,3 +239,84 @@ def test_removing_object_with_comments(admin_session): with ShouldRaise(status_code=404): admin_session.get_sample(sample.dhash) + + +def test_adding_related_files(admin_session): + testCase = RelationTestCase(admin_session) + + Alice = testCase.new_user("Alice") + Bob = testCase.new_user("Bob", capabilities=["adding_related_files"]) + + SampleA = testCase.new_sample("SampleA") + SampleA.create(Alice, upload_as="public") + SampleB = testCase.new_sample("SampleB") + + # Alice doesn't have capability + with ShouldRaise(status_code=403): + Alice.session.add_related_file(SampleA.dhash, "RelatedFileA") + + # Everything works fine + Bob.session.add_related_file(SampleA.dhash, "RelatedFileA") + assert len( testCase.session.get_related_files(SampleA.dhash)['related_files'] ) == 1 + + # Bob can't add related files to objects not accessible for him + with ShouldRaise(status_code=404): + Bob.session.add_related_file(SampleB.dhash, "RelatedFileB") + + +def test_removing_related_files(admin_session): + testCase = RelationTestCase(admin_session) + + Alice = testCase.new_user("Alice") + Bob = testCase.new_user("Bob", capabilities=["removing_related_files"]) + + SampleA = testCase.new_sample("SampleA") + SampleA.create(Alice, upload_as="public") + SampleB = testCase.new_sample("SampleB") + + related_file_content = rand_string() + related_file_dhash = calc_sha256(related_file_content) + + # Related files added by admin session + testCase.session.add_related_file(SampleA.dhash, "RelatedFile", related_file_content) + testCase.session.add_related_file(SampleB.dhash, "RelatedFile", related_file_content) + + assert len( testCase.session.get_related_files(SampleA.dhash)['related_files'] ) == 1 + assert len( testCase.session.get_related_files(SampleB.dhash)['related_files'] ) == 1 + + # Alice doesn't have capability + with ShouldRaise(status_code=403): + Alice.session.remove_related_file(SampleA.dhash, related_file_dhash) + + # Bob can't remove related files for objects not accessible for him + with ShouldRaise(status_code=404): + Bob.session.remove_related_file(SampleB.dhash, related_file_dhash) + + # Everything works file + Bob.session.remove_related_file(SampleA.dhash, related_file_dhash) + + assert len( testCase.session.get_related_files(SampleA.dhash)['related_files'] ) == 0 + assert len( testCase.session.get_related_files(SampleB.dhash)['related_files'] ) == 1 + + +def test_accessing_related_files(admin_session): + testCase = RelationTestCase(admin_session) + + Alice = testCase.new_user("Alice") + Bob = testCase.new_user("Bob", capabilities=["access_related_files"]) + + SampleA = testCase.new_sample("SampleA") + SampleA.create(Alice, upload_as="public") + SampleB = testCase.new_sample("SampleB") + + testCase.session.add_related_file(SampleA.dhash, "RelatedFile") + + # Alice doesn't have capability + assert len( Alice.session.get_related_files(SampleA.dhash)['related_files'] ) == 0 + + # Everything works fine + assert len( Bob.session.get_related_files(SampleA.dhash)['related_files'] ) == 1 + + # Bob can't access related files for objects not accessible for him + with ShouldRaise(status_code=404): + Bob.session.get_related_files(SampleB.dhash) diff --git a/tests/backend/test_search.py b/tests/backend/test_search.py index bc8d872b4..e63a65f4e 100644 --- a/tests/backend/test_search.py +++ b/tests/backend/test_search.py @@ -2,7 +2,7 @@ import datetime from .relations import * -from .utils import base62uuid +from .utils import base62uuid, calc_sha256 from .utils import ShouldRaise from .utils import rand_string import random @@ -706,3 +706,85 @@ def test_search_multi(admin_session): wildcard_hash = samples[0].get("sha512")[:-100] + "*" with ShouldRaise(status_code=400): found_objs = test.search(f'file.multi:"{wildcard_hash}"') + + +def test_related_files_search(admin_session): + test = admin_session + + filename = base62uuid() + + # File A + file_content = base62uuid() + FileA = test.add_sample(filename, file_content) + + test.add_related_file(FileA["sha256"], "related.txt", "b"*1024) + test.add_related_file(FileA["sha256"], "related.mp3") + test.add_related_file(FileA["sha256"], "related.png") + + # File 2 + file_content = base62uuid() + FileB = test.add_sample(filename, file_content) + + test.add_related_file(FileB["sha256"], "related.txt", "b"*2048) + test.add_related_file(FileB["sha256"], "related.mp3") + test.add_related_file(FileB["sha256"], "related.jpg") + test.add_related_file(FileB["sha256"], "related.csv") + + # Related file name + results = [ + result["id"] for result in + test.search(f'related.name:"*.png" AND file.name:"{filename}"') + ] + assert sorted(results) == sorted([FileA["sha256"]]) + results = [ + result["id"] for result in + test.search(f'related.name:"related.mp3" AND file.name:"{filename}"') + ] + assert sorted(results) == sorted([FileA["sha256"], FileB["sha256"]]) + + # Related file size + results = [ + result["id"] for result in + test.search(f'related.size:">2000" AND file.name:"{filename}"') + ] + assert sorted(results) == sorted([FileB["sha256"]]) + results = [ + result["id"] for result in + test.search(f'related.size:"<=2048" AND file.name:"{filename}"') + ] + assert sorted(results) == sorted([FileA["sha256"], FileB["sha256"]]) + results = [ + result["id"] for result in + test.search(f'related.size:[1000 TO 1100] AND file.name:"{filename}"') + ] + assert sorted(results) == sorted([FileA["sha256"]]) + + # Related file sha256 + sha256 = calc_sha256("b"*1024) + results = [ + result["id"] for result in + test.search(f'related.sha256:"{sha256}" AND file.name:"{filename}"') + ] + assert sorted(results) == sorted([FileA["sha256"]]) + + # Related file count + results = [ + result["id"] for result in + test.search(f'related.count:"3" AND file.name:"{filename}"') + ] + assert sorted(results) == sorted([FileA["sha256"]]) + results = [ + result["id"] for result in + test.search(f'related.count:">3" AND file.name:"{filename}"') + ] + assert sorted(results) == sorted([FileB["sha256"]]) + results = [ + result["id"] for result in + test.search(f'related.count:"<=4" AND file.name:"{filename}"') + ] + assert sorted(results) == sorted([FileA["sha256"], FileB["sha256"]]) + results = [ + result["id"] for result in + test.search(f'related.count:[1 TO 3] AND file.name:"{filename}"') + ] + assert sorted(results) == sorted([FileA["sha256"]]) diff --git a/tests/backend/utils.py b/tests/backend/utils.py index 844404b2b..b095d53b3 100644 --- a/tests/backend/utils.py +++ b/tests/backend/utils.py @@ -5,6 +5,7 @@ import string import time import uuid +import hashlib import baseconv import requests @@ -19,6 +20,10 @@ def base62uuid(): return converter.encode(uuid4_as_int) +def calc_sha256(content): + return hashlib.sha256(bytes(str(content), "utf-8")).hexdigest() + + def rand_string(size=20): return "".join(random.choices(string.ascii_lowercase + string.digits, k=size)) @@ -489,3 +494,25 @@ def unassign_analysis_from_object(self, identifier, analysis_id): res = self.session.delete(self.mwdb_url + "/object/" + identifier + "/karton/" + analysis_id) res.raise_for_status() return res.json() + + def get_related_files(self, identifier): + res = self.session.get(self.mwdb_url + "/object/" + identifier + "/related_file") + res.raise_for_status() + return res.json() + + def add_related_file(self, main_obj_identifier, filename=None, content=None): + if filename is None: + filename = str(uuid.uuid4()) + + if content is None: + content = str(uuid.uuid4()) + + res = self.session.post( + self.mwdb_url + "/object/" + main_obj_identifier + "/related_file", + files={"file": (filename, content)}, + ) + res.raise_for_status() + + def remove_related_file(self, main_obj_identifier, identifier): + res = self.session.delete(self.mwdb_url + "/object/" + main_obj_identifier + "/related_file/" + identifier) + res.raise_for_status()