pyiron · niklassiemer · Aug 19, 2025 · Aug 19, 2025
diff --git a/pyiron_rdm/OpenbisAixTended.py b/pyiron_rdm/OpenbisAixTended.py
@@ -767,15 +767,15 @@ def download(
         filename = self.file_list[0].split("/")[-1]
         if destination is None:
             destination = "data/" + self.openbis.download_prefix
-        if not os.path.exists(destination):
-            os.mkdir(destination)
 
         if create_default_folders:
             filename_dest = os.path.join(
                 destination, self.permId, "original", filename
             )
         else:
             filename_dest = os.path.join(destination, filename)
+
+        os.makedirs(os.path.dirname(filename_dest), exist_ok=True)
         try:
             max_retries = 5
             retries = 0

diff --git a/pyiron_rdm/ob_OT_bam.py b/pyiron_rdm/ob_OT_bam.py
@@ -107,7 +107,7 @@ def get_inv_parent(parent_name, cdict, props_dict, options):
     elif parent_name == "interatomic_potential":
         ob_type, where_clause, requested_attrs = intpot_par(cdict)
     elif parent_name == "pseudopotential":
-        ob_type, parents = pseudopot_par(options)
+        ob_type, permids = pseudopot_par(options)
     elif parent_name == "wf_reference":
         ob_type, ob_code = wfref_par(cdict)
 

diff --git a/pyiron_rdm/ob_OT_sfb1394.py b/pyiron_rdm/ob_OT_sfb1394.py
@@ -191,7 +191,7 @@ def pseudopotential_suggester(o, structure, **kwargs):
     return
 
 
-def slow_pseudopotential_matcher(
+def slow_pseudopotential_suggester(
     o, job
 ):  # could also take job['POTCAR'] instead? In case already loaded somewhere?
     potcar = job["POTCAR"]
@@ -229,56 +229,26 @@ def get_subsystems(chemsys: str) -> list:
     return ["-".join(combination) for combination in all_combinations]
 
 
-def crystalline_material_suggester(o, structure, tol: float = 0.02, space_group_number: int|None =None, match_subcomposition: bool =False, openbis_kwargs: dict|None = None):
-    """Suggests a list of crystalline materials from the openBIS inventory for a structure of interest.
-
-        Args: 
-            o (pybis.Openbis): The openBIS session object used to query crystalline materials.
-            structure (Atoms | str): The structure for which to find materials in the openBIS instance.
-            tol (float): Tolerance factor for matching chemical composition.
-                Materials are accepted if the absolute difference in atomic percentage for each element between 
-                the candidate and reference structure is less than `100 * tol`. For example, a `tol` of 0.01 (or 1%) 
-                means atomic percentages must be within +/- 1% of the reference.
-            space_group (int, optional): The space group number to filter materials by. Defaults to None.
-            match_subcomposition (bool, optional): Whether to search for materials for each subsystem based on their
-                composition and tolerance. Defaults to False.
-            openbis_kwargs (dict, optional): Expert feature. A dictionary of openBIS-specific codes and their values
-                to apply additional filtering. Defaults to None.
-        Returns:
-            pybis.things.Things: An openBIS query result object. The data can be accessed as a pandas DataFrame via
-                `.df` attribute.
-        """
-
-    openbis_kwargs = openbis_kwargs if openbis_kwargs is not None else {}
-
-    if isinstance(structure, str):
-        try:
-            from ase import Atoms
-            structure = Atoms(structure)
-        except ImportError:
-            raise ImportError('For the parsing of structure like strings, ase needs to be installed.')
-
-    chem_system = "-".join(sorted(set(structure.get_chemical_symbols())))
+def crystalline_material_suggester(o, structure, tol: float = 0.02, **kwargs):
+    # tolerance is a decimal number
+    chem_system = "-".join(sorted(structure.get_species_symbols()))
+    # space_group = 'SPACE_GROUP_' + str(structure.get_symmetry().spacegroup['Number'])
 
     # atomic composition of structure
-    species_dict = dict()
-    for i in structure.get_chemical_symbols():
-         species_dict[i] = species_dict.get(i, 0) + 1
+    species_dict = dict(structure.get_number_species_atoms())
     atomic_pct_dict = get_atomic_percent_dict(species_dict)
 
-
     # matching candidates from openBIS
     candidates = []
     for chemical_system in get_subsystems(chem_system):
-        where_dict = {"CHEMICAL_SYSTEM": chemical_system,}
-        prop_list = list(openbis_kwargs.keys()) + ["CHEMICAL_SYSTEM"]
-        if space_group_number is not None:
-            where_dict['SPACE_GROUP_SHORT'] =  'SPACE_GROUP_' + str(space_group_number)
-            prop_list += ['SPACE_GROUP_SHORT']
         candidates += o.get_objects(
             type="CRYSTALLINE_MATERIAL",
-            where=where_dict,
-            props=prop_list
+            where={
+                "CHEMICAL_SYSTEM": chemical_system,
+                # 'SPACE_GROUP_SHORT': space_group,
+            },
+            props=list(kwargs.keys()) + ["CHEMICAL_SYSTEM"],
+            # props=list(kwargs.keys()) + ['CHEMICAL_SYSTEM', 'SPACE_GROUP_SHORT']
         )
 
     # define properties to display
@@ -299,12 +269,7 @@ def crystalline_material_suggester(o, structure, tol: float = 0.02, space_group_
         from ast import literal_eval
 
         candidate_atomic_pct = literal_eval(atomic_pct)
-        subsystem_pct_dict = atomic_pct_dict.copy()
-
-        if match_subcomposition:
-            subsystem_pct_dict = get_atomic_percent_dict({k: atomic_pct_dict[k] for k in candidate_atomic_pct})
-        if is_within_tolerance(subsystem_pct_dict, candidate_atomic_pct, tol):
-            filtered.append(candidate.permId)            
+        if is_within_tolerance(atomic_pct_dict, candidate_atomic_pct, tol):
+            filtered.append(candidate.permId)
 
-    ob_objects= o.get_objects(permId=filtered, props=props)
-    return ob_objects
+    return o.get_objects(permId=filtered, props=props)
diff --git a/pyiron_rdm/ob_cfg_bam.py b/pyiron_rdm/ob_cfg_bam.py
@@ -1,4 +1,7 @@
-def format_json_string(json_string):
+def format_json_string(cdict_json):
+    import json
+
+    json_string = json.dumps(cdict_json, indent=2)
     json_string = json_string.replace('\n', '<br>')
     result = []
     for index, char in enumerate(json_string):
@@ -16,15 +19,11 @@ def map_cdict_to_ob(o, cdict, concept_dict):
     # cdict = flat concept_dict
 
     if 'structure_name' in cdict.keys():
-        json_file = cdict['path'] + cdict['structure_name'] + '_concept_dict.json'
         props = {}
     else:
-        json_file = cdict['path'] + '_concept_dict.json'
         props = {'bam_username': o.get_session_info().userName} # TODO can we avoid o as input?
 
-    with open(json_file, 'r') as file:
-        json_string = file.read()
-    json_string = format_json_string(json_string)
+    json_string = format_json_string(concept_dict)
 
     props |= {
         'conceptual_dictionary': json_string,

diff --git a/pyiron_rdm/ob_cfg_sfb1394.py b/pyiron_rdm/ob_cfg_sfb1394.py
@@ -1,4 +1,7 @@
-def format_json_string(json_string):
+def format_json_string(cdict_json):
+    import json
+
+    json_string = json.dumps(cdict_json, indent=2)
     json_string = json_string.replace("\n", "<br>")
     result = []
     for index, char in enumerate(json_string):
@@ -16,15 +19,11 @@ def map_cdict_to_ob(o, cdict, concept_dict):
     # cdict = flat concept_dict
 
     if "structure_name" in cdict.keys():
-        json_file = cdict["path"] + cdict["structure_name"] + "_concept_dict.json"
         props = {}
     else:
-        json_file = cdict["path"] + "_concept_dict.json"
         props = {"user_name": o.get_session_info().userName}
 
-    with open(json_file, "r") as file:
-        json_string = file.read()
-    json_string = format_json_string(json_string)
+    json_string = format_json_string(concept_dict)
 
     props |= {
         "pyiron_conceptual_dictionary": json_string,

diff --git a/pyiron_rdm/sfb_to_bam.py b/pyiron_rdm/sfb_to_bam.py
@@ -0,0 +1,222 @@
+import os
+import json
+from pathlib import Path
+import shutil
+
+from ob.classic import openbis_login
+from ob.ob_upload import openbis_validate
+from ob.ob_upload import openbis_upload_validated
+
+# Assumes just one file with the extension !
+def rename_files_to_basename(path_in_cdict, dest_dir, extensions=('.h5', '.json', '.yml')):
+    dest_dir = Path(dest_dir)
+    base_name = Path(path_in_cdict).name
+
+    rename_map = {
+        '.json': f'{base_name}_concept_dict.json',
+        '.yml': f'{base_name}_environment.yml'
+    }
+
+    for ext in extensions:
+        matching_files = list(dest_dir.glob(f'*{ext}'))
+        if matching_files:
+            source = matching_files[0]
+            target_name = rename_map.get(ext, f'{base_name}{ext}')
+            target = dest_dir / target_name
+            shutil.move(str(source), str(target))
+    new_path_in_cdict = str(dest_dir / base_name)
+    return new_path_in_cdict
+
+def get_structures(o, job_to_copy):
+    # Parent structure = structure to upload to BAM
+    structure_child_id = [
+    ch_id for ch_id in job_to_copy.children if 'SAMPLE' in ch_id
+    ][0]
+    structure_child = o.get_object(structure_child_id)
+    structure_parent_id = [
+        p_id for p_id in o.get_object(structure_child_id).parents if 'SAMPLE' in p_id
+    ][0]
+    structure_parent = o.get_object(structure_parent_id)
+    return structure_child, structure_parent
+
+def get_datasets(o, obj_to_copy, object_type='job', dest='temp'):
+    '''
+    object_type: 'job' or 'structure' -> different dataset types
+
+    returns cdict_path = relative path to the concept_dict.json file
+    '''
+    cdict_path = ''
+
+    allowed_object_types = ['job', 'structure']
+    if object_type not in allowed_object_types:
+        raise ValueError(f'Arg object_type must be one of {allowed_object_types}, got: {object_type}.')
+
+    elif object_type == 'job':
+        ds_types = ['PYIRON_HDF5', 'PYIRON_CONCEPT_DICT_DATA']
+    elif object_type == 'structure':
+        ds_types = ['CRYS-STRUCT_DATA', 'PYIRON_CONCEPT_DICT_DATA']
+
+    for ds_type in ds_types:
+        datasets = obj_to_copy.get_datasets(type=ds_type)
+        for ds in datasets:
+            ds = o.get_dataset(ds.permId)
+            ds.download(
+                destination=dest, 
+                wait_until_finished=True,
+                create_default_folders=False
+            )
+            if ds_type == 'PYIRON_CONCEPT_DICT_DATA':
+                cdict_path = os.path.join(
+                    dest,
+                    ds.file_list[0].split("/")[-1]
+                )
+
+    return cdict_path
+
+def get_pseudopotentials(o_bam, job_to_copy):
+    pseudopot_ids_sfb = [p_id for p_id in job_to_copy.parents if 'POTENTIAL' in p_id]
+    pseudopot_names = [p_id.split('/')[-1] for p_id in pseudopot_ids_sfb]
+    pseudopots_bam = o_bam.get_objects(type='PSEUDOPOTENTIAL', code=pseudopot_names)
+    pseudopot_ids_bam = [p.permId for p in pseudopots_bam]
+    return pseudopot_ids_bam
+
+def get_materials(o_sfb, o_bam, struct_to_copy):
+    material_ids_sfb = [m_id for m_id in struct_to_copy.parents if 'MATERIAL' in m_id]
+    materials_sfb = o_sfb.get_objects(material_ids_sfb)
+    mats_sfb = [m.p.get('$name') for m in materials_sfb]
+    material_ids_bam = [
+        o_bam.get_objects(type='MATERIAL_V1', where={'$name': mat})[0].permId for mat in mats_sfb
+    ]
+    return material_ids_bam
+
+def convert_and_upload(o, space, project, collection, cdict_path, 
+                       dest='temp', parent_ids=None, options={}):
+    with open(cdict_path, 'r') as f:
+        loaded_cdict = json.load(f)
+
+    # Returns a list of the following tuples for each cdict submitted
+    # cdict, props_dict, object_type, ds_types, ob_parents, object_name
+    validated_to_upload = openbis_validate(o, space, project, collection, loaded_cdict, options=options)
+    cdict, props_dict, object_type, ds_types, ob_parents, object_name = validated_to_upload[0]
+
+    # Rename datasets and change cdict path (locally) to match file location
+    path_in_cdict = cdict["structure_name"] if 'structure_name' in cdict.keys() else cdict['path']
+    cdict['path'] = rename_files_to_basename(path_in_cdict=path_in_cdict, dest_dir=dest)
+    cdict['path'] = f'{dest}/' if 'structure_name' in cdict.keys() else cdict['path']
+    object_id = openbis_upload_validated(
+        o, space, project, collection, object_name, 
+        object_type, ob_parents, props_dict, ds_types, 
+        cdict, parent_ids = parent_ids
+    )
+    return object_id
+
+# delete downloaded files
+def cleanup(dest):
+    dest_path = Path(dest)
+    if dest_path.exists() and dest_path.is_dir():
+        shutil.rmtree(dest_path)
+
+def sfb_to_bam(
+    username_bam, space_bam, project_bam, collection_bam,
+    username_sfb, s3_config_path, job_ids=None, collection_sfb=None,
+    include_structure=True, options={}
+):
+    """
+    Copies job objects and its datasets from the SFB openBIS instance to the BAM instance.
+
+    Parameters:
+    -----------
+    username_bam (str): The BAM instance username for authentication.
+    space_bam (str): The BAM space to upload the job(s) to.
+    project_bam (str): The BAM project to upload the job(s) to.
+    collection_bam (str): The BAM collection to upload the job(s) to.
+
+    username_sfb (str): The SFB instance username for authentication.
+    s3_config_path (str): Path to the configuration file containing S3 credentials 
+                            for accessing the SFB instance.
+
+    job_ids (str or list, optional): The SFB job id(s) (string or list of strings) to copy 
+                                      from the SFB instance. Default is None.
+
+    collection_sfb (str, optional): The SFB collection (full string) whose all jobs will be copied. 
+                                    If specified, it overwrites 'job_ids'. Default is None.
+
+    include_structure (bool, optional): Whether to also copy the structure object and its datasets. 
+                                        Defaults to True. Set to False for jobs like TableJob.
+
+    options (dict, optional): Additional upload options, same function as ob.upload_classic_pyiron.
+                                Specify 'materials' to avoid the automatic matching. 
+                                Defaults to an empty dictionary.
+
+    Notes:
+    ------
+    - If both 'job_ids' and 'collection_sfb' are specified, the 'collection_sfb' parameter takes 
+    precedence, and all jobs in the specified collection will be copied.
+    """
+
+    if not (job_ids or collection_sfb):
+        raise ValueError('One of the following arguments must be specified: job_ids, collection_sfb')
+
+    # job_ids to be either a string or list of strings of job permIds or identifiers
+    job_ids = [job_ids] if isinstance(job_ids, str) else job_ids
+
+    print('SFB openBIS login:')
+    o_sfb = openbis_login(url="https://openbis.imm.rwth-aachen.de/openbis/webapp/eln-lims/", 
+                      username=username_sfb, instance="sfb1394", s3_config_path=s3_config_path)
+    print('BAM openBIS login:')
+    o_bam = openbis_login(url="https://test3.datastore.bam.de/", username=username_bam, instance="bam")
+
+    if collection_sfb:
+        job_ids = [job.permId for job in o_sfb.get_objects(type='PYIRON_JOB*', collection=collection_sfb)]
+
+    dest = 'temp'
+    job_dest = f'{dest}/job'
+    child_struct_dest = f'{dest}/struct_final'
+    par_struct_dest = f'{dest}/struct'
+
+    for job_id in job_ids:
+        job_to_copy = o_sfb.get_object(job_id)
+        job_cdict_path = get_datasets(o_sfb, job_to_copy, object_type='job', dest=job_dest)
+
+        material_ids = options.get('materials', [])
+        material_ids = [material_ids] if isinstance(material_ids, str) else material_ids
+        if include_structure:
+            child_struct_to_copy, par_struct_to_copy = get_structures(o_sfb, job_to_copy)
+            child_struct_cdict_path = get_datasets(o_sfb, child_struct_to_copy, object_type='structure', dest=child_struct_dest)
+            par_struct_cdict_path = get_datasets(o_sfb, par_struct_to_copy, object_type='structure', dest=par_struct_dest)
+
+            if not material_ids:
+                material_ids = get_materials(o_sfb, o_bam, par_struct_to_copy)
+
+        pseudopot_ids = options.get('pseudopotentials', [])
+        pseudopot_ids = [pseudopot_ids] if isinstance(pseudopot_ids, str) else pseudopot_ids
+        if not pseudopot_ids:
+            pseudopot_ids = get_pseudopotentials(o_bam, job_to_copy)
+        options = {
+            'materials': material_ids,
+            'pseudopotentials' : pseudopot_ids
+        }
+
+        par_struct_identifier = None
+        if include_structure:
+            par_struct_identifier = convert_and_upload(
+                o_bam, space_bam, project_bam, collection_bam, par_struct_cdict_path, 
+                dest = par_struct_dest, parent_ids = None, options = options
+            )
+        job_identifier = convert_and_upload(
+            o_bam, space_bam, project_bam, collection_bam, job_cdict_path, 
+            dest = job_dest, parent_ids = par_struct_identifier, options = options
+        )
+        if include_structure:
+            child_struct_identifier = convert_and_upload(
+                o_bam, space_bam, project_bam, collection_bam, child_struct_cdict_path, 
+                dest = child_struct_dest, parent_ids = job_identifier, options = options
+            )
+
+
+        cleanup(dest)
+
+    o_sfb.logout()
+    o_bam.logout()
+
+    print('Upload successful 🙂')