diff --git a/ingest/Snakefile b/ingest/Snakefile
index 89aed647ef..3929a5ed4e 100644
--- a/ingest/Snakefile
+++ b/ingest/Snakefile
@@ -310,7 +310,7 @@ rule prepare_metadata:
         sequence_hashes="results/sequence_hashes.ndjson",
         config="results/config.yaml",
     output:
-        metadata="results/metadata_post_prepare.json",
+        metadata="results/metadata_post_prepare.ndjson",
     params:
         log_level=LOG_LEVEL,
     shell:
@@ -328,11 +328,11 @@ rule prepare_metadata:
 rule group_segments:
     input:
         script="scripts/group_segments.py",
-        metadata="results/metadata_post_prepare.json",
+        metadata="results/metadata_post_prepare.ndjson",
         sequences="results/sequences.ndjson",
         config="results/config.yaml",
     output:
-        metadata="results/metadata_post_group.json",
+        metadata="results/metadata_post_group.ndjson",
         sequences="results/sequences_post_group.ndjson",
     params:
         log_level=LOG_LEVEL,
@@ -368,9 +368,9 @@ rule get_previous_submissions:
         # By delaying the start of the script
         script="scripts/call_loculus.py",
         prepped_metadata=(
-            "results/metadata_post_group.json"
+            "results/metadata_post_group.ndjson"
             if SEGMENTED
-            else "results/metadata_post_prepare.json"
+            else "results/metadata_post_prepare.ndjson"
         ),
         config="results/config.yaml",
     output:
@@ -395,9 +395,9 @@ rule compare_hashes:
         config="results/config.yaml",
         old_hashes="results/previous_submissions.json",
         metadata=(
-            "results/metadata_post_group.json"
+            "results/metadata_post_group.ndjson"
             if SEGMENTED
-            else "results/metadata_post_prepare.json"
+            else "results/metadata_post_prepare.ndjson"
         ),
     output:
         to_submit="results/to_submit.json",
@@ -431,9 +431,9 @@ rule prepare_files:
         script="scripts/prepare_files.py",
         config="results/config.yaml",
         metadata=(
-            "results/metadata_post_group.json"
+            "results/metadata_post_group.ndjson"
             if SEGMENTED
-            else "results/metadata_post_prepare.json"
+            else "results/metadata_post_prepare.ndjson"
         ),
         sequences=(
             "results/sequences_post_group.ndjson"
diff --git a/ingest/scripts/call_loculus.py b/ingest/scripts/call_loculus.py
index 2a1191cded..25a568db8f 100644
--- a/ingest/scripts/call_loculus.py
+++ b/ingest/scripts/call_loculus.py
@@ -411,8 +411,6 @@ def get_submitted(config: Config):
     logger.info(f"Backend has status of: {len(statuses)} sequence entries from ingest")
     logger.info(f"Ingest has submitted: {len(entries)} sequence entries to ingest")
 
-    logger.debug(entries)
-    logger.debug(statuses)
     for entry in entries:
         loculus_accession = entry["accession"]
         submitter = entry["submitter"]
diff --git a/ingest/scripts/compare_hashes.py b/ingest/scripts/compare_hashes.py
index 60174564e6..7062062f50 100644
--- a/ingest/scripts/compare_hashes.py
+++ b/ingest/scripts/compare_hashes.py
@@ -6,6 +6,7 @@
 from typing import Any
 
 import click
+import orjsonl
 import requests
 import yaml
 
@@ -171,7 +172,6 @@ def main(
         config.debug_hashes = True
 
     submitted: dict = json.load(open(old_hashes, encoding="utf-8"))
-    new_metadata = json.load(open(metadata, encoding="utf-8"))
 
     update_manager = SequenceUpdateManager(
         submit=[],
@@ -184,7 +184,9 @@ def main(
         config=config,
     )
 
-    for fasta_id, record in new_metadata.items():
+    for field in orjsonl.stream(metadata):
+        fasta_id = field["id"]
+        record = field["metadata"]
         if not config.segmented:
             insdc_accession_base = record["insdcAccessionBase"]
             if not insdc_accession_base:
diff --git a/ingest/scripts/group_segments.py b/ingest/scripts/group_segments.py
index dcfc427c37..ddc1499b81 100644
--- a/ingest/scripts/group_segments.py
+++ b/ingest/scripts/group_segments.py
@@ -1,6 +1,7 @@
 """Script to group segments together into sequence entries prior to submission to Loculus
-Example output for a single isolate with 3 segments:
-"KJ682796.1.L/KJ682809.1.M/KJ682819.1.S": {
+Example ndjson output for a single isolate with 3 segments:
+{"id": "KJ682796.1.L/KJ682809.1.M/KJ682819.1.S",
+"metadata": {
     "ncbiReleaseDate": "2014-07-06T00:00:00Z",
     "ncbiSourceDb": "GenBank",
     "authors": "D. Goedhals, F.J. Burt, J.T. Bester, R. Swanepoel",
@@ -15,7 +16,7 @@
     "hash_S": "f716ed13dca9c8a033d46da2f3dc2ff1",
     "hash": "ce7056d0bd7e3d6d3eca38f56b9d10f8",
     "submissionId": "KJ682796.1.L/KJ682809.1.M/KJ682819.1.S"
-},"""
+}}"""
 
 import hashlib
 import json
@@ -100,9 +101,11 @@ def main(
     segments = config.nucleotide_sequences
     number_of_segments = len(segments)
 
-    with open(input_metadata, encoding="utf-8") as file:
-        segment_metadata: dict[str, dict[str, str]] = json.load(file)
-    number_of_segmented_records = len(segment_metadata.keys())
+    number_of_segmented_records = 0
+    segment_metadata: dict[str, dict[str, str]] = {}
+    for record in orjsonl.stream(input_metadata):
+        segment_metadata[record["id"]] = record["metadata"]
+        number_of_segmented_records += 1
     logger.info(f"Found {number_of_segmented_records} individual segments in metadata file")
 
     # Group segments according to isolate, collection date and isolate specific values
@@ -174,7 +177,7 @@ def main(
     number_of_groups = len(grouped_accessions)
     group_lower_bound = number_of_segmented_records // number_of_segments
     group_upper_bound = number_of_segmented_records
-    logging.info(f"Total of {number_of_groups} groups left after merging")
+    logger.info(f"Total of {number_of_groups} groups left after merging")
     if number_of_groups < group_lower_bound:
         raise ValueError(
             {
@@ -192,11 +195,11 @@ def main(
             }
         )
 
-    # Add segment specific metadata for the segments
-    metadata: dict[str, dict[str, str]] = {}
     # Map from original accession to the new concatenated accession
     fasta_id_map: dict[Accession, Accession] = {}
 
+    count = 0
+
     for group in grouped_accessions:
         # Create key by concatenating all accession numbers with their segments
         # e.g. AF1234_S/AF1235_M/AF1236_L
@@ -241,12 +244,10 @@ def main(
             json.dumps(filtered_record, sort_keys=True).encode(), usedforsecurity=False
         ).hexdigest()
 
-        metadata[joint_key] = row
+        orjsonl.append(output_metadata, {"id": joint_key, "metadata": row})
+        count += 1
 
-    Path(output_metadata).write_text(
-        json.dumps(metadata, indent=4, sort_keys=True), encoding="utf-8"
-    )
-    logging.info(f"Wrote grouped metadata for {len(metadata)} sequences")
+    logger.info(f"Wrote grouped metadata for {count} sequences")
 
     count = 0
     count_ignored = 0
@@ -265,8 +266,8 @@ def main(
             },
         )
         count += 1
-    logging.info(f"Wrote {count} sequences")
-    logging.info(f"Ignored {count_ignored} sequences as not found in {input_seq}")
+    logger.info(f"Wrote {count} sequences")
+    logger.info(f"Ignored {count_ignored} sequences as not found in {input_seq}")
 
 
 if __name__ == "__main__":
diff --git a/ingest/scripts/prepare_files.py b/ingest/scripts/prepare_files.py
index ac28131285..d1b20e413b 100644
--- a/ingest/scripts/prepare_files.py
+++ b/ingest/scripts/prepare_files.py
@@ -1,6 +1,7 @@
 import csv
 import json
 import logging
+import os
 import sys
 from dataclasses import dataclass
 from pathlib import Path
@@ -101,65 +102,81 @@ def main(
         relevant_config = {key: full_config[key] for key in Config.__annotations__}
         config = Config(**relevant_config)
 
-    metadata = json.load(open(metadata_path, encoding="utf-8"))
     to_submit = json.load(open(to_submit_path, encoding="utf-8"))
     to_revise = json.load(open(to_revise_path, encoding="utf-8"))
     to_revoke = json.load(open(to_revoke_path, encoding="utf-8"))
 
-    metadata_submit = []
-    metadata_revise = []
-    metadata_submit_prior_to_revoke = []  # Only for multi-segmented case, sequences are revoked
-    # due to grouping changes and the newly grouped segments must be submitted as new sequences
     submit_ids = set()
     revise_ids = set()
     submit_prior_to_revoke_ids = set()
 
-    for fasta_id in to_submit:
-        metadata_submit.append(metadata[fasta_id])
-        submit_ids.update(ids_to_add(fasta_id, config))
+    def write_to_tsv_stream(data, filename, columns_list=None):
+        # Check if the file exists
+        file_exists = os.path.exists(filename)
 
-    for fasta_id, loculus_accession in to_revise.items():
-        revise_record = metadata[fasta_id]
-        revise_record["accession"] = loculus_accession
-        metadata_revise.append(revise_record)
-        revise_ids.update(ids_to_add(fasta_id, config))
+        with open(filename, "a", newline="", encoding="utf-8") as output_file:
+            keys = columns_list or data.keys()
+            dict_writer = csv.DictWriter(output_file, keys, delimiter="\t")
 
-    found_seq_to_revoke = False
-    for fasta_id in to_revoke:
-        metadata_submit_prior_to_revoke.append(metadata[fasta_id])
-        submit_prior_to_revoke_ids.update(ids_to_add(fasta_id, config))
+            # Write the header only if the file doesn't already exist
+            if not file_exists:
+                dict_writer.writeheader()
 
-    if found_seq_to_revoke:
-        revocation_notification(config, to_revoke)
+            dict_writer.writerow(data)
 
-    def write_to_tsv(data, filename):
-        if not data:
-            Path(filename).touch()
-            return
-        keys = data[0].keys()
-        with open(filename, "w", newline="", encoding="utf-8") as output_file:
-            dict_writer = csv.DictWriter(output_file, keys, delimiter="\t")
-            dict_writer.writeheader()
-            dict_writer.writerows(data)
+    columns_list = None
+    for field in orjsonl.stream(metadata_path):
+        fasta_id = field["id"]
+        record = field["metadata"]
+        if not columns_list:
+            columns_list = record.keys()
+
+        if fasta_id in to_submit:
+            write_to_tsv_stream(record, metadata_submit_path, columns_list)
+            submit_ids.update(ids_to_add(fasta_id, config))
+            continue
+
+        if fasta_id in to_revise:
+            record["accession"] = to_revise[fasta_id]
+            write_to_tsv_stream(record, metadata_revise_path, [*columns_list, "accession"])
+            revise_ids.update(ids_to_add(fasta_id, config))
+            continue
+
+        found_seq_to_revoke = False
+        if fasta_id in to_revoke:
+            submit_prior_to_revoke_ids.update(ids_to_add(fasta_id, config))
+            write_to_tsv_stream(record, metadata_submit_prior_to_revoke_path, columns_list)
+            found_seq_to_revoke = True
 
-    write_to_tsv(metadata_submit, metadata_submit_path)
-    write_to_tsv(metadata_revise, metadata_revise_path)
-    write_to_tsv(metadata_submit_prior_to_revoke, metadata_submit_prior_to_revoke_path)
+        if found_seq_to_revoke:
+            revocation_notification(config, to_revoke)
 
-    def stream_filter_to_fasta(input, output, keep):
+    def stream_filter_to_fasta(input, output, output_metadata, keep):
         if len(keep) == 0:
             Path(output).touch()
+            Path(output_metadata).touch()
             return
         with open(output, "w", encoding="utf-8") as output_file:
             for record in orjsonl.stream(input):
                 if record["id"] in keep:
                     output_file.write(f">{record['id']}\n{record['sequence']}\n")
 
-    stream_filter_to_fasta(input=sequences_path, output=sequences_submit_path, keep=submit_ids)
-    stream_filter_to_fasta(input=sequences_path, output=sequences_revise_path, keep=revise_ids)
+    stream_filter_to_fasta(
+        input=sequences_path,
+        output=sequences_submit_path,
+        output_metadata=metadata_submit_path,
+        keep=submit_ids,
+    )
+    stream_filter_to_fasta(
+        input=sequences_path,
+        output=sequences_revise_path,
+        output_metadata=metadata_revise_path,
+        keep=revise_ids,
+    )
     stream_filter_to_fasta(
         input=sequences_path,
         output=sequences_submit_prior_to_revoke_path,
+        output_metadata=metadata_submit_prior_to_revoke_path,
         keep=submit_prior_to_revoke_ids,
     )
 
diff --git a/ingest/scripts/prepare_metadata.py b/ingest/scripts/prepare_metadata.py
index 2f0bfe3e64..170b05bdfa 100644
--- a/ingest/scripts/prepare_metadata.py
+++ b/ingest/scripts/prepare_metadata.py
@@ -10,7 +10,6 @@
 import json
 import logging
 from dataclasses import dataclass
-from pathlib import Path
 
 import click
 import orjsonl
@@ -143,11 +142,9 @@ def main(
 
         record["hash"] = hashlib.md5(prehash.encode(), usedforsecurity=False).hexdigest()
 
-    meta_dict = {rec[fasta_id_field]: rec for rec in metadata}
+        orjsonl.append(output, {"id": record[fasta_id_field], "metadata": record})
 
-    Path(output).write_text(json.dumps(meta_dict, indent=4, sort_keys=True), encoding="utf-8")
-
-    logging.info(f"Saved metadata for {len(metadata)} sequences")
+    logger.info(f"Saved metadata for {len(metadata)} sequences")
 
 
 if __name__ == "__main__":
diff --git a/ingest/tests/expected_output_cchf/metadata_post_prepare.json b/ingest/tests/expected_output_cchf/metadata_post_prepare.json
deleted file mode 100644
index 9a57b572e4..0000000000
--- a/ingest/tests/expected_output_cchf/metadata_post_prepare.json
+++ /dev/null
@@ -1,152 +0,0 @@
-{
-    "KX013462.1": {
-        "authorAffiliations": "Chumakov Institute of Poliomyelitis and Viral Encephalitides",
-        "authors": "Lukashev, A. N.; Klimentov, A. S.; Smirnova, S. E.; Dzagurova, T. K.; Drexler, J. F.; Gmyl, A. P.",
-        "bioprojectAccession": "",
-        "biosampleAccession": "",
-        "geoLocAdmin1": "Astrakhan",
-        "geoLocCountry": "Russia",
-        "hash": "15c5b9d511b1c6c37a3ff43280f3f617",
-        "hostNameScientific": "Ixodoidea",
-        "hostTaxonId": "297308",
-        "insdcAccessionBase": "KX013462",
-        "insdcAccessionFull": "KX013462.1",
-        "insdcVersion": "1",
-        "isLabHost": "",
-        "ncbiReleaseDate": "2016-12-07T00:00:00Z",
-        "ncbiSourceDb": "GenBank",
-        "ncbiUpdateDate": "2016-12-07T00:00:00Z",
-        "ncbiVirusName": "Orthonairovirus haemorrhagiae",
-        "ncbiVirusTaxId": "3052518",
-        "sampleCollectionDate": "1989",
-        "segment": "L",
-        "specimenCollectorSampleId": "K229_194",
-        "insdcRawReadsAccession": "",
-        "submissionId": "KX013462.1"
-    },
-    "KX013463.1": {
-        "authorAffiliations": "Chumakov Institute of Poliomyelitis and Viral Encephalitides",
-        "authors": "Lukashev, A. N.; Klimentov, A. S.; Smirnova, S. E.; Dzagurova, T. K.; Drexler, J. F.; Gmyl, A. P.",
-        "bioprojectAccession": "",
-        "biosampleAccession": "",
-        "geoLocAdmin1": "Astrakhan",
-        "geoLocCountry": "Russia",
-        "hash": "c11e1b7951a73b14f70403bed5cf2d10",
-        "hostNameScientific": "Ixodoidea",
-        "hostTaxonId": "297308",
-        "insdcAccessionBase": "KX013463",
-        "insdcAccessionFull": "KX013463.1",
-        "insdcVersion": "1",
-        "isLabHost": "",
-        "ncbiReleaseDate": "2016-12-07T00:00:00Z",
-        "ncbiSourceDb": "GenBank",
-        "ncbiUpdateDate": "2016-12-07T00:00:00Z",
-        "ncbiVirusName": "Orthonairovirus haemorrhagiae",
-        "ncbiVirusTaxId": "3052518",
-        "sampleCollectionDate": "1989",
-        "segment": "M",
-        "specimenCollectorSampleId": "K229_194",
-        "insdcRawReadsAccession": "",
-        "submissionId": "KX013463.1"
-    },
-    "KX013464.1": {
-        "authorAffiliations": "Chumakov Institute of Poliomyelitis and Viral Encephalitides",
-        "authors": "Lukashev, A. N.; Klimentov, A. S.; Smirnova, S. E.; Dzagurova, T. K.; Drexler, J. F.; Gmyl, A. P.",
-        "bioprojectAccession": "",
-        "biosampleAccession": "",
-        "geoLocAdmin1": "Astrakhan",
-        "geoLocCountry": "Russia",
-        "hash": "7cd7b3085ef50ad49973b92979a21ee8",
-        "hostNameScientific": "Ixodoidea",
-        "hostTaxonId": "297308",
-        "insdcAccessionBase": "KX013464",
-        "insdcAccessionFull": "KX013464.1",
-        "insdcVersion": "1",
-        "isLabHost": "",
-        "ncbiReleaseDate": "2016-12-07T00:00:00Z",
-        "ncbiSourceDb": "GenBank",
-        "ncbiUpdateDate": "2016-12-07T00:00:00Z",
-        "ncbiVirusName": "Orthonairovirus haemorrhagiae",
-        "ncbiVirusTaxId": "3052518",
-        "sampleCollectionDate": "1989",
-        "segment": "S",
-        "specimenCollectorSampleId": "K229_194",
-        "insdcRawReadsAccession": "",
-        "submissionId": "KX013464.1"
-    },
-    "KX013483.1": {
-        "authorAffiliations": "Chumakov Institute of Poliomyelitis and Viral Encephalitides",
-        "authors": "Lukashev, A. N.; Klimentov, A. S.; Smirnova, S. E.; Dzagurova, T. K.; Drexler, J. F.; Gmyl, A. P.",
-        "bioprojectAccession": "",
-        "biosampleAccession": "",
-        "geoLocAdmin1": "",
-        "geoLocCountry": "Uganda",
-        "hash": "7b10a4e21daa8a2e693958761be17d53",
-        "hostNameScientific": "Homo sapiens",
-        "hostTaxonId": "9606",
-        "insdcAccessionBase": "KX013483",
-        "insdcAccessionFull": "KX013483.1",
-        "insdcVersion": "1",
-        "isLabHost": "",
-        "ncbiReleaseDate": "2016-12-07T00:00:00Z",
-        "ncbiSourceDb": "GenBank",
-        "ncbiUpdateDate": "2016-12-07T00:00:00Z",
-        "ncbiVirusName": "Orthonairovirus haemorrhagiae",
-        "ncbiVirusTaxId": "3052518",
-        "sampleCollectionDate": "1958",
-        "segment": "L",
-        "specimenCollectorSampleId": "Nakiwogo",
-        "insdcRawReadsAccession": "",
-        "submissionId": "KX013483.1"
-    },
-    "KX013485.1": {
-        "authorAffiliations": "Chumakov Institute of Poliomyelitis and Viral Encephalitides",
-        "authors": "Lukashev, A. N.; Klimentov, A. S.; Smirnova, S. E.; Dzagurova, T. K.; Drexler, J. F.; Gmyl, A. P.",
-        "bioprojectAccession": "",
-        "biosampleAccession": "",
-        "geoLocAdmin1": "",
-        "geoLocCountry": "Uganda",
-        "hash": "70954bc35782b5592858ac3f1a6bbf89",
-        "hostNameScientific": "Homo sapiens",
-        "hostTaxonId": "9606",
-        "insdcAccessionBase": "KX013485",
-        "insdcAccessionFull": "KX013485.1",
-        "insdcVersion": "1",
-        "isLabHost": "",
-        "ncbiReleaseDate": "2016-12-07T00:00:00Z",
-        "ncbiSourceDb": "GenBank",
-        "ncbiUpdateDate": "2016-12-07T00:00:00Z",
-        "ncbiVirusName": "Orthonairovirus haemorrhagiae",
-        "ncbiVirusTaxId": "3052518",
-        "sampleCollectionDate": "1958",
-        "segment": "S",
-        "specimenCollectorSampleId": "Nakiwogo",
-        "insdcRawReadsAccession": "",
-        "submissionId": "KX013485.1"
-    },
-    "KX096703.1": {
-        "authorAffiliations": "Public Health England, Research",
-        "authors": "Deryabin, ; Atshabar, B.; Sansyzbaev, Y.; Berezin, V.; Nurmakhanov, T.; Yeskhojayev, O.; Vilkova, A.; Shevtsov, A.; Hewson, R.; Atkinson, B.",
-        "bioprojectAccession": "",
-        "biosampleAccession": "",
-        "geoLocAdmin1": "Sairam district",
-        "geoLocCountry": "Kazakhstan",
-        "hash": "cd17a5f4dcc98e7a2afd1ab9f30274bc",
-        "hostNameScientific": "Hyalomma anatolicum",
-        "hostTaxonId": "176092",
-        "insdcAccessionBase": "KX096703",
-        "insdcAccessionFull": "KX096703.1",
-        "insdcVersion": "1",
-        "isLabHost": "",
-        "ncbiReleaseDate": "2016-04-30T00:00:00Z",
-        "ncbiSourceDb": "GenBank",
-        "ncbiUpdateDate": "2016-04-30T00:00:00Z",
-        "ncbiVirusName": "Orthonairovirus haemorrhagiae",
-        "ncbiVirusTaxId": "3052518",
-        "sampleCollectionDate": "2015",
-        "segment": "S",
-        "specimenCollectorSampleId": "tick pool #134",
-        "insdcRawReadsAccession": "",
-        "submissionId": "KX096703.1"
-    }
-}
\ No newline at end of file
diff --git a/ingest/tests/expected_output_cchf/metadata_post_prepare.ndjson b/ingest/tests/expected_output_cchf/metadata_post_prepare.ndjson
new file mode 100644
index 0000000000..a0763d1ceb
--- /dev/null
+++ b/ingest/tests/expected_output_cchf/metadata_post_prepare.ndjson
@@ -0,0 +1,6 @@
+ {     "id": "KX013462.1",     "metadata": {         "authorAffiliations": "Chumakov Institute of Poliomyelitis and Viral Encephalitides",         "authors": "Lukashev, A. N.; Klimentov, A. S.; Smirnova, S. E.; Dzagurova, T. K.; Drexler, J. F.; Gmyl, A. P.",         "bioprojectAccession": "",         "biosampleAccession": "",         "geoLocAdmin1": "Astrakhan",         "geoLocCountry": "Russia",         "hash": "15c5b9d511b1c6c37a3ff43280f3f617",         "hostNameScientific": "Ixodoidea",         "hostTaxonId": "297308",         "insdcAccessionBase": "KX013462",         "insdcAccessionFull": "KX013462.1",         "insdcVersion": "1",         "isLabHost": "",         "ncbiReleaseDate": "2016-12-07T00:00:00Z",         "ncbiSourceDb": "GenBank",         "ncbiUpdateDate": "2016-12-07T00:00:00Z",         "ncbiVirusName": "Orthonairovirus haemorrhagiae",         "ncbiVirusTaxId": "3052518",         "sampleCollectionDate": "1989",         "segment": "L",         "specimenCollectorSampleId": "K229_194",         "insdcRawReadsAccession": "",         "submissionId": "KX013462.1"     } }
+ {     "id": "KX013463.1",     "metadata": {         "authorAffiliations": "Chumakov Institute of Poliomyelitis and Viral Encephalitides",         "authors": "Lukashev, A. N.; Klimentov, A. S.; Smirnova, S. E.; Dzagurova, T. K.; Drexler, J. F.; Gmyl, A. P.",         "bioprojectAccession": "",         "biosampleAccession": "",         "geoLocAdmin1": "Astrakhan",         "geoLocCountry": "Russia",         "hash": "c11e1b7951a73b14f70403bed5cf2d10",         "hostNameScientific": "Ixodoidea",         "hostTaxonId": "297308",         "insdcAccessionBase": "KX013463",         "insdcAccessionFull": "KX013463.1",         "insdcVersion": "1",         "isLabHost": "",         "ncbiReleaseDate": "2016-12-07T00:00:00Z",         "ncbiSourceDb": "GenBank",         "ncbiUpdateDate": "2016-12-07T00:00:00Z",         "ncbiVirusName": "Orthonairovirus haemorrhagiae",         "ncbiVirusTaxId": "3052518",         "sampleCollectionDate": "1989",         "segment": "M",         "specimenCollectorSampleId": "K229_194",         "insdcRawReadsAccession": "",         "submissionId": "KX013463.1"     } }
+ {     "id": "KX013464.1",     "metadata": {         "authorAffiliations": "Chumakov Institute of Poliomyelitis and Viral Encephalitides",         "authors": "Lukashev, A. N.; Klimentov, A. S.; Smirnova, S. E.; Dzagurova, T. K.; Drexler, J. F.; Gmyl, A. P.",         "bioprojectAccession": "",         "biosampleAccession": "",         "geoLocAdmin1": "Astrakhan",         "geoLocCountry": "Russia",         "hash": "7cd7b3085ef50ad49973b92979a21ee8",         "hostNameScientific": "Ixodoidea",         "hostTaxonId": "297308",         "insdcAccessionBase": "KX013464",         "insdcAccessionFull": "KX013464.1",         "insdcVersion": "1",         "isLabHost": "",         "ncbiReleaseDate": "2016-12-07T00:00:00Z",         "ncbiSourceDb": "GenBank",         "ncbiUpdateDate": "2016-12-07T00:00:00Z",         "ncbiVirusName": "Orthonairovirus haemorrhagiae",         "ncbiVirusTaxId": "3052518",         "sampleCollectionDate": "1989",         "segment": "S",         "specimenCollectorSampleId": "K229_194",         "insdcRawReadsAccession": "",         "submissionId": "KX013464.1"     } }
+ {     "id": "KX013483.1",     "metadata": {         "authorAffiliations": "Chumakov Institute of Poliomyelitis and Viral Encephalitides",         "authors": "Lukashev, A. N.; Klimentov, A. S.; Smirnova, S. E.; Dzagurova, T. K.; Drexler, J. F.; Gmyl, A. P.",         "bioprojectAccession": "",         "biosampleAccession": "",         "geoLocAdmin1": "",         "geoLocCountry": "Uganda",         "hash": "7b10a4e21daa8a2e693958761be17d53",         "hostNameScientific": "Homo sapiens",         "hostTaxonId": "9606",         "insdcAccessionBase": "KX013483",         "insdcAccessionFull": "KX013483.1",         "insdcVersion": "1",         "isLabHost": "",         "ncbiReleaseDate": "2016-12-07T00:00:00Z",         "ncbiSourceDb": "GenBank",         "ncbiUpdateDate": "2016-12-07T00:00:00Z",         "ncbiVirusName": "Orthonairovirus haemorrhagiae",         "ncbiVirusTaxId": "3052518",         "sampleCollectionDate": "1958",         "segment": "L",         "specimenCollectorSampleId": "Nakiwogo",         "insdcRawReadsAccession": "",         "submissionId": "KX013483.1"     } }
+ {     "id": "KX013485.1",     "metadata": {         "authorAffiliations": "Chumakov Institute of Poliomyelitis and Viral Encephalitides",         "authors": "Lukashev, A. N.; Klimentov, A. S.; Smirnova, S. E.; Dzagurova, T. K.; Drexler, J. F.; Gmyl, A. P.",         "bioprojectAccession": "",         "biosampleAccession": "",         "geoLocAdmin1": "",         "geoLocCountry": "Uganda",         "hash": "70954bc35782b5592858ac3f1a6bbf89",         "hostNameScientific": "Homo sapiens",         "hostTaxonId": "9606",         "insdcAccessionBase": "KX013485",         "insdcAccessionFull": "KX013485.1",         "insdcVersion": "1",         "isLabHost": "",         "ncbiReleaseDate": "2016-12-07T00:00:00Z",         "ncbiSourceDb": "GenBank",         "ncbiUpdateDate": "2016-12-07T00:00:00Z",         "ncbiVirusName": "Orthonairovirus haemorrhagiae",         "ncbiVirusTaxId": "3052518",         "sampleCollectionDate": "1958",         "segment": "S",         "specimenCollectorSampleId": "Nakiwogo",         "insdcRawReadsAccession": "",         "submissionId": "KX013485.1"     } }
+ {     "id": "KX096703.1",     "metadata": {         "authorAffiliations": "Public Health England, Research",         "authors": "Deryabin, ; Atshabar, B.; Sansyzbaev, Y.; Berezin, V.; Nurmakhanov, T.; Yeskhojayev, O.; Vilkova, A.; Shevtsov, A.; Hewson, R.; Atkinson, B.",         "bioprojectAccession": "",         "biosampleAccession": "",         "geoLocAdmin1": "Sairam district",         "geoLocCountry": "Kazakhstan",         "hash": "cd17a5f4dcc98e7a2afd1ab9f30274bc",         "hostNameScientific": "Hyalomma anatolicum",         "hostTaxonId": "176092",         "insdcAccessionBase": "KX096703",         "insdcAccessionFull": "KX096703.1",         "insdcVersion": "1",         "isLabHost": "",         "ncbiReleaseDate": "2016-04-30T00:00:00Z",         "ncbiSourceDb": "GenBank",         "ncbiUpdateDate": "2016-04-30T00:00:00Z",         "ncbiVirusName": "Orthonairovirus haemorrhagiae",         "ncbiVirusTaxId": "3052518",         "sampleCollectionDate": "2015",         "segment": "S",         "specimenCollectorSampleId": "tick pool #134",         "insdcRawReadsAccession": "",         "submissionId": "KX096703.1"     } }
\ No newline at end of file
diff --git a/ingest/tests/expected_output_cchf/revise_metadata.tsv b/ingest/tests/expected_output_cchf/revise_metadata.tsv
new file mode 100644
index 0000000000..77fb9269a5
--- /dev/null
+++ b/ingest/tests/expected_output_cchf/revise_metadata.tsv
@@ -0,0 +1,2 @@
+authorAffiliations	authors	bioprojectAccession	biosampleAccession	geoLocAdmin1	geoLocCountry	hostNameScientific	hostTaxonId	isLabHost	ncbiReleaseDate	ncbiSourceDb	ncbiVirusName	ncbiVirusTaxId	sampleCollectionDate	specimenCollectorSampleId	ncbiUpdateDate_L	ncbiUpdateDate_M	ncbiUpdateDate_S	insdcVersion_L	insdcVersion_M	insdcVersion_S	insdcRawReadsAccession_L	insdcRawReadsAccession_M	insdcRawReadsAccession_S	hash_L	hash_M	hash_S	insdcAccessionBase_L	insdcAccessionBase_M	insdcAccessionBase_S	insdcAccessionFull_L	insdcAccessionFull_M	insdcAccessionFull_S	submissionId	hash	accession
+Public Health England, Research	Deryabin, ; Atshabar, B.; Sansyzbaev, Y.; Berezin, V.; Nurmakhanov, T.; Yeskhojayev, O.; Vilkova, A.; Shevtsov, A.; Hewson, R.; Atkinson, B.			Sairam district	Kazakhstan	Hyalomma anatolicum	176092		2016-04-30T00:00:00Z	GenBank	Orthonairovirus haemorrhagiae	3052518	2015	tick pool #134			2016-04-30T00:00:00Z			1						cd17a5f4dcc98e7a2afd1ab9f30274bc			KX096703			KX096703.1	KX096703.1.S	646cca576b1d24d397f5ecf0905fd5c1	LOC_0000VXA
diff --git a/ingest/tests/expected_output_cchf/submit_metadata.tsv b/ingest/tests/expected_output_cchf/submit_metadata.tsv
new file mode 100644
index 0000000000..140113d450
--- /dev/null
+++ b/ingest/tests/expected_output_cchf/submit_metadata.tsv
@@ -0,0 +1,2 @@
+authorAffiliations	authors	bioprojectAccession	biosampleAccession	geoLocAdmin1	geoLocCountry	hostNameScientific	hostTaxonId	isLabHost	ncbiReleaseDate	ncbiSourceDb	ncbiVirusName	ncbiVirusTaxId	sampleCollectionDate	specimenCollectorSampleId	ncbiUpdateDate_L	ncbiUpdateDate_M	ncbiUpdateDate_S	insdcVersion_L	insdcVersion_M	insdcVersion_S	insdcRawReadsAccession_L	insdcRawReadsAccession_M	insdcRawReadsAccession_S	hash_L	hash_M	hash_S	insdcAccessionBase_L	insdcAccessionBase_M	insdcAccessionBase_S	insdcAccessionFull_L	insdcAccessionFull_M	insdcAccessionFull_S	submissionId	hash
+Chumakov Institute of Poliomyelitis and Viral Encephalitides	Lukashev, A. N.; Klimentov, A. S.; Smirnova, S. E.; Dzagurova, T. K.; Drexler, J. F.; Gmyl, A. P.				Uganda	Homo sapiens	9606		2016-12-07T00:00:00Z	GenBank	Orthonairovirus haemorrhagiae	3052518	1958	Nakiwogo	2016-12-07T00:00:00Z		2016-12-07T00:00:00Z	1		1				7b10a4e21daa8a2e693958761be17d53		70954bc35782b5592858ac3f1a6bbf89	KX013483		KX013485	KX013483.1		KX013485.1	KX013483.1.L/KX013485.1.S	bb3a6d8df47cb2891e7b60030a40c335
diff --git a/ingest/tests/test_ingest.py b/ingest/tests/test_ingest.py
index 543f66d4aa..fe99b82e67 100644
--- a/ingest/tests/test_ingest.py
+++ b/ingest/tests/test_ingest.py
@@ -4,6 +4,8 @@
 import subprocess
 from pathlib import Path
 
+import orjsonl
+import pandas as pd
 import pytest
 
 # Define the paths to your test data and expected output
@@ -35,6 +37,25 @@ def compare_json_files(file1, file2):
     return json1 == json2
 
 
+def compare_ndjson_files(file1, file2):
+    def create_dict_from_ndjson(file):
+        dict = {}
+        for record in orjsonl.stream(file):
+            dict[record["id"]] = record["metadata"]
+
+    dict1 = create_dict_from_ndjson(file1)
+    dict2 = create_dict_from_ndjson(file2)
+
+    return dict1 == dict2
+
+def compare_tsv_files(file1, file2):
+    df1 = pd.read_csv(file1, sep="\t")
+    df2 = pd.read_csv(file2, sep="\t")
+
+    # Compare the contents
+    return df1.sort_index(axis=1).equals(df2.sort_index(axis=1))
+
+
 def run_snakemake(rule, touch=False):
     """
     Function to run Snakemake with the test data.
@@ -67,6 +88,7 @@ def test_snakemake():
     run_snakemake("group_segments")
     run_snakemake("get_previous_submissions", touch=True)  # Do not call_loculus
     run_snakemake("compare_hashes")
+    run_snakemake("prepare_files")
 
     # Check that the output files match the expected files
     for expected_file in EXPECTED_OUTPUT_DIR.glob("*.json"):
@@ -77,6 +99,22 @@ def test_snakemake():
             output_file,
         ), f"{output_file} does not match {expected_file}."
 
+    for expected_file in EXPECTED_OUTPUT_DIR.glob("*.tsv"):
+        output_file = OUTPUT_DIR / expected_file.name
+        assert output_file.exists(), f"{output_file} does not exist."
+        assert compare_tsv_files(
+            expected_file,
+            output_file,
+        ), f"{output_file} does not match {expected_file}."
+
+    for expected_file in EXPECTED_OUTPUT_DIR.glob("*.ndjson"):
+        output_file = OUTPUT_DIR / expected_file.name
+        assert output_file.exists(), f"{output_file} does not exist."
+        assert compare_ndjson_files(
+            expected_file,
+            output_file,
+        ), f"{output_file} does not match {expected_file}."
+
 
 if __name__ == "__main__":
     pytest.main(["-v"])