diff --git a/src/nexgen/tools/VDS_tools.py b/src/nexgen/tools/VDS_tools.py
index d99515b5..bef9b90f 100644
--- a/src/nexgen/tools/VDS_tools.py
+++ b/src/nexgen/tools/VDS_tools.py
@@ -113,7 +113,10 @@ def split_datasets(
             source_shape=(min(MAX_FRAMES_PER_DATASET, full_frames), *data_shape[1:]),
             start_index=min(MAX_FRAMES_PER_DATASET, max(int(start_idx), 0)),
         )
-        result.append(dset)
+        # if start index == 1000 then that source dataset is not used and we should
+        # not pass it on to use as a source for the VDS
+        if dset.start_index != MAX_FRAMES_PER_DATASET:
+            result.append(dset)
         start_idx -= MAX_FRAMES_PER_DATASET
         full_frames -= MAX_FRAMES_PER_DATASET
 
@@ -227,3 +230,33 @@ def vds_file_writer(
         vds.create_virtual_dataset("data", layout, fillvalue=-1)
     nxdata["data"] = h5py.ExternalLink(vds_filename.name, "data")
     vds_logger.info(f"{vds_filename} written and link added to NeXus file.")
+
+
+def clean_unused_links(
+    nxsfile: h5py.File,
+    vds_shape: Union[Tuple, List],
+    start_index: int = 0,
+):
+    """
+    Remove links to external data not used in VDS
+
+    Args:
+        nxsfile (h5py.File): Handle to NeXus file being written.
+        vds_shape (Union[Tuple, List]): Shape of the full dataset, usually defined as (num_frames, *image_size).
+        start_index(int): The start point for the source data. Defaults to 0.
+    """
+    vds_logger.info("Cleaning links unused in VDS ...")
+    # Where the VDS will go
+    nxdata = nxsfile["/entry/data"]
+    vds_length = vds_shape[0]
+    dataset_names = find_datasets_in_file(nxdata)
+    datasets = [nxdata[name] for name in dataset_names]
+    dataset_lengths = [d.shape[0] for d in datasets]
+    for i, dataset in enumerate(datasets):
+        # unlink datasets before the start of VDS
+        if sum(dataset_lengths[0 : i + 1]) < start_index:
+            del nxsfile["/entry/data"][dataset_names[i]]
+        # unlink datasets after the end of VDS
+        if sum(dataset_lengths[0:i]) > start_index + vds_length:
+            del nxsfile["/entry/data"][dataset_names[i]]
+    vds_logger.info("Links unused in VDS removed from NeXus file.")