galaxyproject · mvdbeek · Oct 1, 2025 · Sep 15, 2025 · Sep 18, 2025 · Sep 18, 2025
diff --git a/client/src/api/schema/schema.ts b/client/src/api/schema/schema.ts
@@ -14258,6 +14258,11 @@ export interface components {
              * @description Parameter name. Used when referencing parameter in workflows.
              */
             name?: string | null;
+            /**
+             * precreate_directory
+             * @default false
+             */
+            precreate_directory: boolean | null;
             /**
              * @description discriminator enum property added by openapi-typescript
              * @enum {string}

diff --git a/client/src/components/Tool/.claude/settings.local.json b/client/src/components/Tool/.claude/settings.local.json
@@ -0,0 +1,9 @@
+{
+  "permissions": {
+    "allow": [
+      "Read(//Users/mvandenb/src/galaxy/lib/galaxy/tool_util_models/**)"
+    ],
+    "deny": [],
+    "ask": []
+  }
+}
diff --git a/client/src/components/Tool/ToolSourceSchema.json b/client/src/components/Tool/ToolSourceSchema.json
diff --git a/lib/galaxy/dependencies/pinned-requirements.txt b/lib/galaxy/dependencies/pinned-requirements.txt
@@ -147,7 +147,7 @@ proto-plus==1.26.1
 protobuf==6.32.1
 prov==1.5.1
 psutil==7.1.0
-pulsar-galaxy-lib==0.15.10
+pulsar-galaxy-lib==0.15.11
 pyasn1==0.6.1
 pyasn1-modules==0.4.2
 pycparser==2.23 ; (implementation_name != 'PyPy' and platform_python_implementation != 'PyPy') or (implementation_name == 'pypy' and platform_python_implementation == 'PyPy')

diff --git a/lib/galaxy/job_execution/setup.py b/lib/galaxy/job_execution/setup.py
@@ -288,12 +288,18 @@ def compute_outputs(self) -> None:
             if da_false_path and not os.path.exists(da_false_path):
                 with open(da_false_path, "ab"):
                     pass
+            real_path = da.dataset.get_file_name(sync_cache=False)
+            false_extra_files_path = os.path.join(
+                os.path.dirname(da_false_path or real_path), da.dataset.dataset.extra_files_path_name
+            )
+
             mutable = da.dataset.dataset.external_filename is None
             dataset_path = DatasetPath(
                 da.dataset.dataset.id,
-                da.dataset.get_file_name(sync_cache=False),
+                real_path,
                 false_path=da_false_path,
                 mutable=mutable,
+                false_extra_files_path=false_extra_files_path,
             )
             job_outputs.append(JobOutput(da.name, da.dataset, dataset_path))
 

diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py
@@ -3015,7 +3015,7 @@ def setup_external_metadata(
     def get_output_destination(self, output_path):
         """
         Destination for outputs marked as from_work_dir. These must be copied with
-        the same basenme as the path for the ultimate output destination. This is
+        the same basename as the path for the ultimate output destination. This is
         required in the task case so they can be merged.
         """
         return os.path.join(self.working_directory, os.path.basename(output_path))

diff --git a/lib/galaxy/jobs/command_factory.py b/lib/galaxy/jobs/command_factory.py
@@ -288,13 +288,17 @@ def __handle_metadata(
 
 def __copy_if_exists_command(work_dir_output):
     source_file, destination = work_dir_output
+    is_directory = True if destination.endswith("_files") else False
+    test_flag = "-d" if is_directory else "-f"
+    recursive_flag = " -r" if is_directory else ""
+    delete_destination_dir = f" rmdir {destination}; " if is_directory else ""
     if "?" in source_file or "*" in source_file:
         source_file = source_file.replace("*", '"*"').replace("?", '"?"')
     # Check if source and destination exist.
     # Users can purge outputs before the job completes,
     # in that case we don't want to copy the output to a purged path.
     # Static, non work_dir_output files are handled in job_finish code.
-    return f'\nif [ -f "{source_file}" -a -f "{destination}" ] ; then cp "{source_file}" "{destination}" ; fi'
+    return f'\nif [ {test_flag} "{source_file}" -a {test_flag} "{destination}" ] ; then{delete_destination_dir} cp{recursive_flag} "{source_file}" "{destination}" ; fi'
 
 
 class CommandsBuilder:

diff --git a/lib/galaxy/jobs/runners/__init__.py b/lib/galaxy/jobs/runners/__init__.py
@@ -373,11 +373,13 @@ def get_work_dir_outputs(
         # Set up dict of dataset id --> output path; output path can be real or
         # false depending on outputs_to_working_directory
         output_paths = {}
+        output_extra_paths = {}
         for dataset_path in job_wrapper.job_io.get_output_fnames():
             path = dataset_path.real_path
             if asbool(job_wrapper.get_destination_configuration("outputs_to_working_directory", False)):
                 path = dataset_path.false_path
             output_paths[dataset_path.dataset_id] = path
+            output_extra_paths[dataset_path.dataset_id] = dataset_path.false_extra_files_path
 
         output_pairs = []
         # Walk job's output associations to find and use from_work_dir attributes.
@@ -397,9 +399,15 @@ def get_work_dir_outputs(
                     # Copy from working dir to HDA.
                     # TODO: move instead of copy to save time?
                     source_file = os.path.join(tool_working_directory, hda_tool_output.from_work_dir)
-                    destination = job_wrapper.get_output_destination(output_paths[dataset.dataset_id])
+                    if hda_tool_output.precreate_directory:
+                        # precreate directory, allows using `-d` check to avoid copying data to purged outputs
+                        dataset.dataset.create_extra_files_path()
+                        output_path = output_extra_paths[dataset.dataset_id]
+                        os.makedirs(output_path, exist_ok=True)
+                    else:
+                        output_path = output_paths[dataset.dataset_id]
                     if in_directory(source_file, tool_working_directory):
-                        output_pairs.append((source_file, destination))
+                        output_pairs.append((source_file, job_wrapper.get_output_destination(output_path)))
                     else:
                         # Security violation.
                         log.exception(

diff --git a/lib/galaxy/tool_util/parser/output_objects.py b/lib/galaxy/tool_util/parser/output_objects.py
@@ -52,6 +52,7 @@ class ToolOutputBase(Dictifiable):
     name: str
     label: Optional[str]
     hidden: bool
+    precreate_directory: bool
 
     def __init__(
         self,
@@ -70,6 +71,7 @@ def __init__(
         self.hidden = hidden
         self.collection = False
         self.from_expression = from_expression
+        self.precreate_directory = False
 
     def to_dict(self, view="collection", value_mapper=None, app=None):
         return super().to_dict(view=view, value_mapper=value_mapper)
@@ -131,6 +133,7 @@ def __init__(
         self.change_format: List[ChangeFormatModel] = []
         self.implicit = implicit
         self.from_work_dir: Optional[str] = None
+        self.precreate_directory: bool = False
         self.dataset_collector_descriptions: List[DatasetCollectionDescription] = []
         self.default_identifier_source: Optional[str] = None
         self.count: Optional[int] = None
@@ -178,6 +181,7 @@ def to_model(self) -> ToolOutputDataModel:
             metadata_source=self.metadata_source,
             discover_datasets=[d.to_model() for d in self.dataset_collector_descriptions],
             from_work_dir=self.from_work_dir,
+            precreate_directory=self.precreate_directory,
         )
 
     @staticmethod
@@ -193,6 +197,7 @@ def from_dict(name: str, output_dict: Dict[str, Any], app: Optional[ToolOutputAc
         output.count = output_dict.get("count", 1)
         output.filters = []
         output.from_work_dir = output_dict.get("from_work_dir")
+        output.precreate_directory = output_dict.get("precreate_directory") or False
         output.hidden = output_dict.get("hidden") or False
         # TODO: implement tool output action group fixes
         if app is not None:
@@ -223,6 +228,7 @@ def __init__(self, name, output_type, from_expression, label=None, filters=None,
         self.change_format = []
         self.implicit = False
         self.from_work_dir = None
+        self.precreate_directory = False
 
         self.dataset_collector_descriptions = []
 

diff --git a/lib/galaxy/tool_util/parser/xml.py b/lib/galaxy/tool_util/parser/xml.py
@@ -589,6 +589,7 @@ def _parse_output(
         output.count = int(data_elem.get("count", 1))
         output.filters = data_elem.findall("filter")
         output.from_work_dir = data_elem.get("from_work_dir", None)
+        output.precreate_directory = data_elem.get("precreate_directory") or False
         profile_version = Version(self.parse_profile())
         if output.from_work_dir and profile_version < Version("21.09"):
             # We started quoting from_work_dir outputs in 21.09.

diff --git a/lib/galaxy/tool_util/xsd/galaxy.xsd b/lib/galaxy/tool_util/xsd/galaxy.xsd
@@ -6041,9 +6041,14 @@ One prominent use case is interval data with a non-standard column order that ca
     </xs:attribute>
     <xs:attribute name="from_work_dir" type="xs:string">
       <xs:annotation>
-        <xs:documentation xml:lang="en">Relative path to a file produced by the
-tool in its working directory. Output's contents are set to this file's
-contents. The behaviour when this file does not exist in the working directory is undefined; the resulting dataset could be empty or the tool execution could fail.</xs:documentation>
+        <xs:documentation xml:lang="en">Relative path to a file or directory produced by the
+tool in its working directory. Output's contents are set to this paths'
+contents. The behaviour when this path does not exist in the working directory is undefined; the resulting dataset could be empty or the tool execution could fail. To collect directory contents set ``precreate`` to true</xs:documentation>
+      </xs:annotation>
+    </xs:attribute>
+    <xs:attribute name="precreate_directory" type="xs:boolean" default="false">
+      <xs:annotation>
+        <xs:documentation xml:lang="en">Boolean indicating whether to precreate output directory. (Default is ``false``.)</xs:documentation>
       </xs:annotation>
     </xs:attribute>
     <xs:attribute name="hidden" type="xs:boolean" default="false">
@@ -6346,7 +6351,7 @@ derive collection's type (e.g. ``collection_type``) from.</xs:documentation>
 ``<data>`` and ``<collection>`` tags can contain one or more ``<filter>`` tags. Each ``<filter>`` tag contains a Python code
 block to be executed to test whether to include this output in the outputs the
 tool ultimately creates. If the code of each of these filters, when executed, returns ``True``,
-the output dataset is retained, i.e. the output is excluded if at least one evaluates to ``False``. 
+the output dataset is retained, i.e. the output is excluded if at least one evaluates to ``False``.
 In these code blocks the tool parameters appear
 as Python variables and are thus referred to without the $ used for the Cheetah
 template (used in the ``<command>`` tag). Variables that are part of

diff --git a/lib/galaxy/tool_util_models/tool_outputs.py b/lib/galaxy/tool_util_models/tool_outputs.py
@@ -95,6 +95,7 @@ class GenericToolOutputDataset(
             description="Relative path to a file produced by the tool in its working directory. Output’s contents are set to this file’s contents.",
         ),
     ] = None
+    precreate_directory: Optional[bool] = False
 
 
 class ToolOutputDataset(GenericToolOutputDataset[bool, str]): ...

diff --git a/lib/tool_shed/webapp/frontend/src/schema/schema.ts b/lib/tool_shed/webapp/frontend/src/schema/schema.ts
@@ -3180,6 +3180,11 @@ export interface components {
              * @description Parameter name. Used when referencing parameter in workflows.
              */
             name: string
+            /**
+             * precreate_directory
+             * @default false
+             */
+            precreate_directory: boolean | null
             /**
              * @description discriminator enum property added by openapi-typescript
              * @enum {string}

diff --git a/test/functional/tools/create_directory_index.xml b/test/functional/tools/create_directory_index.xml
@@ -1,13 +1,13 @@
 <tool id="create_directory_index" name="Create directory index" version="1.0" profile="24.2">
     <command><![CDATA[
-mkdir '$index.extra_files_path' &&
-cp '$reference' '$index.extra_files_path/1.fasta'
+mkdir 'out_dir' &&
+cp '$reference' 'out_dir/1.fasta'
     ]]></command>
     <inputs>
         <param name="reference" type="data" label="Select a genome to index" help="Build an index for this FASTA sequence." format="fasta"/>
     </inputs>
     <outputs>
-        <data name="index" format="bwa_mem2_index"/>
+        <data name="index" format="bwa_mem2_index" precreate_directory="true" from_work_dir="out_dir"/>
     </outputs>
     <tests>
         <test>

diff --git a/test/integration/test_extended_metadata.py b/test/integration/test_extended_metadata.py
@@ -45,6 +45,7 @@
     "environment_variables",
     "all_output_types",
     "discover_sort_by",
+    "create_directory_index",
 ]
 
 

diff --git a/test/integration/test_job_outputs_to_working_directory.py b/test/integration/test_job_outputs_to_working_directory.py
@@ -17,5 +17,5 @@ def handle_galaxy_config_kwds(cls, config):
 instance = integration_util.integration_module_instance(JobOutputsToWorkingDirectoryIntegrationInstance)
 
 test_tools = integration_util.integration_tool_runner(
-    ["output_format", "output_empty_work_dir", "collection_creates_pair_from_work_dir"]
+    ["output_format", "output_empty_work_dir", "collection_creates_pair_from_work_dir", "create_directory_index"]
 )
diff --git a/test/integration/test_pulsar_embedded.py b/test/integration/test_pulsar_embedded.py
@@ -45,5 +45,6 @@ def handle_galaxy_config_kwds(cls, config):
         "detect_errors",
         "tool_directory_copy",
         "metadata_columns",
+        "create_directory_index",
     ]
 )