diff --git a/pyiron_rdm/OpenbisAixTended.py b/pyiron_rdm/OpenbisAixTended.py
index 51c2d6d..1788736 100644
--- a/pyiron_rdm/OpenbisAixTended.py
+++ b/pyiron_rdm/OpenbisAixTended.py
@@ -767,8 +767,6 @@ def download(
filename = self.file_list[0].split("/")[-1]
if destination is None:
destination = "data/" + self.openbis.download_prefix
- if not os.path.exists(destination):
- os.mkdir(destination)
if create_default_folders:
filename_dest = os.path.join(
@@ -776,6 +774,8 @@ def download(
)
else:
filename_dest = os.path.join(destination, filename)
+
+ os.makedirs(os.path.dirname(filename_dest), exist_ok=True)
try:
max_retries = 5
retries = 0
diff --git a/pyiron_rdm/ob_OT_bam.py b/pyiron_rdm/ob_OT_bam.py
index f297a12..0be6fa0 100644
--- a/pyiron_rdm/ob_OT_bam.py
+++ b/pyiron_rdm/ob_OT_bam.py
@@ -107,7 +107,7 @@ def get_inv_parent(parent_name, cdict, props_dict, options):
elif parent_name == "interatomic_potential":
ob_type, where_clause, requested_attrs = intpot_par(cdict)
elif parent_name == "pseudopotential":
- ob_type, parents = pseudopot_par(options)
+ ob_type, permids = pseudopot_par(options)
elif parent_name == "wf_reference":
ob_type, ob_code = wfref_par(cdict)
diff --git a/pyiron_rdm/ob_OT_sfb1394.py b/pyiron_rdm/ob_OT_sfb1394.py
index 60eb88d..db687cf 100644
--- a/pyiron_rdm/ob_OT_sfb1394.py
+++ b/pyiron_rdm/ob_OT_sfb1394.py
@@ -191,7 +191,7 @@ def pseudopotential_suggester(o, structure, **kwargs):
return
-def slow_pseudopotential_matcher(
+def slow_pseudopotential_suggester(
o, job
): # could also take job['POTCAR'] instead? In case already loaded somewhere?
potcar = job["POTCAR"]
@@ -229,56 +229,26 @@ def get_subsystems(chemsys: str) -> list:
return ["-".join(combination) for combination in all_combinations]
-def crystalline_material_suggester(o, structure, tol: float = 0.02, space_group_number: int|None =None, match_subcomposition: bool =False, openbis_kwargs: dict|None = None):
- """Suggests a list of crystalline materials from the openBIS inventory for a structure of interest.
-
- Args:
- o (pybis.Openbis): The openBIS session object used to query crystalline materials.
- structure (Atoms | str): The structure for which to find materials in the openBIS instance.
- tol (float): Tolerance factor for matching chemical composition.
- Materials are accepted if the absolute difference in atomic percentage for each element between
- the candidate and reference structure is less than `100 * tol`. For example, a `tol` of 0.01 (or 1%)
- means atomic percentages must be within +/- 1% of the reference.
- space_group (int, optional): The space group number to filter materials by. Defaults to None.
- match_subcomposition (bool, optional): Whether to search for materials for each subsystem based on their
- composition and tolerance. Defaults to False.
- openbis_kwargs (dict, optional): Expert feature. A dictionary of openBIS-specific codes and their values
- to apply additional filtering. Defaults to None.
- Returns:
- pybis.things.Things: An openBIS query result object. The data can be accessed as a pandas DataFrame via
- `.df` attribute.
- """
-
- openbis_kwargs = openbis_kwargs if openbis_kwargs is not None else {}
-
- if isinstance(structure, str):
- try:
- from ase import Atoms
- structure = Atoms(structure)
- except ImportError:
- raise ImportError('For the parsing of structure like strings, ase needs to be installed.')
-
- chem_system = "-".join(sorted(set(structure.get_chemical_symbols())))
+def crystalline_material_suggester(o, structure, tol: float = 0.02, **kwargs):
+ # tolerance is a decimal number
+ chem_system = "-".join(sorted(structure.get_species_symbols()))
+ # space_group = 'SPACE_GROUP_' + str(structure.get_symmetry().spacegroup['Number'])
# atomic composition of structure
- species_dict = dict()
- for i in structure.get_chemical_symbols():
- species_dict[i] = species_dict.get(i, 0) + 1
+ species_dict = dict(structure.get_number_species_atoms())
atomic_pct_dict = get_atomic_percent_dict(species_dict)
-
# matching candidates from openBIS
candidates = []
for chemical_system in get_subsystems(chem_system):
- where_dict = {"CHEMICAL_SYSTEM": chemical_system,}
- prop_list = list(openbis_kwargs.keys()) + ["CHEMICAL_SYSTEM"]
- if space_group_number is not None:
- where_dict['SPACE_GROUP_SHORT'] = 'SPACE_GROUP_' + str(space_group_number)
- prop_list += ['SPACE_GROUP_SHORT']
candidates += o.get_objects(
type="CRYSTALLINE_MATERIAL",
- where=where_dict,
- props=prop_list
+ where={
+ "CHEMICAL_SYSTEM": chemical_system,
+ # 'SPACE_GROUP_SHORT': space_group,
+ },
+ props=list(kwargs.keys()) + ["CHEMICAL_SYSTEM"],
+ # props=list(kwargs.keys()) + ['CHEMICAL_SYSTEM', 'SPACE_GROUP_SHORT']
)
# define properties to display
@@ -299,12 +269,7 @@ def crystalline_material_suggester(o, structure, tol: float = 0.02, space_group_
from ast import literal_eval
candidate_atomic_pct = literal_eval(atomic_pct)
- subsystem_pct_dict = atomic_pct_dict.copy()
-
- if match_subcomposition:
- subsystem_pct_dict = get_atomic_percent_dict({k: atomic_pct_dict[k] for k in candidate_atomic_pct})
- if is_within_tolerance(subsystem_pct_dict, candidate_atomic_pct, tol):
- filtered.append(candidate.permId)
+ if is_within_tolerance(atomic_pct_dict, candidate_atomic_pct, tol):
+ filtered.append(candidate.permId)
- ob_objects= o.get_objects(permId=filtered, props=props)
- return ob_objects
+ return o.get_objects(permId=filtered, props=props)
diff --git a/pyiron_rdm/ob_cfg_bam.py b/pyiron_rdm/ob_cfg_bam.py
index c5fab21..a687338 100644
--- a/pyiron_rdm/ob_cfg_bam.py
+++ b/pyiron_rdm/ob_cfg_bam.py
@@ -1,4 +1,7 @@
-def format_json_string(json_string):
+def format_json_string(cdict_json):
+ import json
+
+ json_string = json.dumps(cdict_json, indent=2)
json_string = json_string.replace('\n', '
')
result = []
for index, char in enumerate(json_string):
@@ -16,15 +19,11 @@ def map_cdict_to_ob(o, cdict, concept_dict):
# cdict = flat concept_dict
if 'structure_name' in cdict.keys():
- json_file = cdict['path'] + cdict['structure_name'] + '_concept_dict.json'
props = {}
else:
- json_file = cdict['path'] + '_concept_dict.json'
props = {'bam_username': o.get_session_info().userName} # TODO can we avoid o as input?
- with open(json_file, 'r') as file:
- json_string = file.read()
- json_string = format_json_string(json_string)
+ json_string = format_json_string(concept_dict)
props |= {
'conceptual_dictionary': json_string,
diff --git a/pyiron_rdm/ob_cfg_sfb1394.py b/pyiron_rdm/ob_cfg_sfb1394.py
index 28778df..b132209 100644
--- a/pyiron_rdm/ob_cfg_sfb1394.py
+++ b/pyiron_rdm/ob_cfg_sfb1394.py
@@ -1,4 +1,7 @@
-def format_json_string(json_string):
+def format_json_string(cdict_json):
+ import json
+
+ json_string = json.dumps(cdict_json, indent=2)
json_string = json_string.replace("\n", "
")
result = []
for index, char in enumerate(json_string):
@@ -16,15 +19,11 @@ def map_cdict_to_ob(o, cdict, concept_dict):
# cdict = flat concept_dict
if "structure_name" in cdict.keys():
- json_file = cdict["path"] + cdict["structure_name"] + "_concept_dict.json"
props = {}
else:
- json_file = cdict["path"] + "_concept_dict.json"
props = {"user_name": o.get_session_info().userName}
- with open(json_file, "r") as file:
- json_string = file.read()
- json_string = format_json_string(json_string)
+ json_string = format_json_string(concept_dict)
props |= {
"pyiron_conceptual_dictionary": json_string,
diff --git a/pyiron_rdm/sfb_to_bam.py b/pyiron_rdm/sfb_to_bam.py
new file mode 100644
index 0000000..f776092
--- /dev/null
+++ b/pyiron_rdm/sfb_to_bam.py
@@ -0,0 +1,222 @@
+import os
+import json
+from pathlib import Path
+import shutil
+
+from ob.classic import openbis_login
+from ob.ob_upload import openbis_validate
+from ob.ob_upload import openbis_upload_validated
+
+# Assumes just one file with the extension !
+def rename_files_to_basename(path_in_cdict, dest_dir, extensions=('.h5', '.json', '.yml')):
+ dest_dir = Path(dest_dir)
+ base_name = Path(path_in_cdict).name
+
+ rename_map = {
+ '.json': f'{base_name}_concept_dict.json',
+ '.yml': f'{base_name}_environment.yml'
+ }
+
+ for ext in extensions:
+ matching_files = list(dest_dir.glob(f'*{ext}'))
+ if matching_files:
+ source = matching_files[0]
+ target_name = rename_map.get(ext, f'{base_name}{ext}')
+ target = dest_dir / target_name
+ shutil.move(str(source), str(target))
+ new_path_in_cdict = str(dest_dir / base_name)
+ return new_path_in_cdict
+
+def get_structures(o, job_to_copy):
+ # Parent structure = structure to upload to BAM
+ structure_child_id = [
+ ch_id for ch_id in job_to_copy.children if 'SAMPLE' in ch_id
+ ][0]
+ structure_child = o.get_object(structure_child_id)
+ structure_parent_id = [
+ p_id for p_id in o.get_object(structure_child_id).parents if 'SAMPLE' in p_id
+ ][0]
+ structure_parent = o.get_object(structure_parent_id)
+ return structure_child, structure_parent
+
+def get_datasets(o, obj_to_copy, object_type='job', dest='temp'):
+ '''
+ object_type: 'job' or 'structure' -> different dataset types
+
+ returns cdict_path = relative path to the concept_dict.json file
+ '''
+ cdict_path = ''
+
+ allowed_object_types = ['job', 'structure']
+ if object_type not in allowed_object_types:
+ raise ValueError(f'Arg object_type must be one of {allowed_object_types}, got: {object_type}.')
+
+ elif object_type == 'job':
+ ds_types = ['PYIRON_HDF5', 'PYIRON_CONCEPT_DICT_DATA']
+ elif object_type == 'structure':
+ ds_types = ['CRYS-STRUCT_DATA', 'PYIRON_CONCEPT_DICT_DATA']
+
+ for ds_type in ds_types:
+ datasets = obj_to_copy.get_datasets(type=ds_type)
+ for ds in datasets:
+ ds = o.get_dataset(ds.permId)
+ ds.download(
+ destination=dest,
+ wait_until_finished=True,
+ create_default_folders=False
+ )
+ if ds_type == 'PYIRON_CONCEPT_DICT_DATA':
+ cdict_path = os.path.join(
+ dest,
+ ds.file_list[0].split("/")[-1]
+ )
+
+ return cdict_path
+
+def get_pseudopotentials(o_bam, job_to_copy):
+ pseudopot_ids_sfb = [p_id for p_id in job_to_copy.parents if 'POTENTIAL' in p_id]
+ pseudopot_names = [p_id.split('/')[-1] for p_id in pseudopot_ids_sfb]
+ pseudopots_bam = o_bam.get_objects(type='PSEUDOPOTENTIAL', code=pseudopot_names)
+ pseudopot_ids_bam = [p.permId for p in pseudopots_bam]
+ return pseudopot_ids_bam
+
+def get_materials(o_sfb, o_bam, struct_to_copy):
+ material_ids_sfb = [m_id for m_id in struct_to_copy.parents if 'MATERIAL' in m_id]
+ materials_sfb = o_sfb.get_objects(material_ids_sfb)
+ mats_sfb = [m.p.get('$name') for m in materials_sfb]
+ material_ids_bam = [
+ o_bam.get_objects(type='MATERIAL_V1', where={'$name': mat})[0].permId for mat in mats_sfb
+ ]
+ return material_ids_bam
+
+def convert_and_upload(o, space, project, collection, cdict_path,
+ dest='temp', parent_ids=None, options={}):
+ with open(cdict_path, 'r') as f:
+ loaded_cdict = json.load(f)
+
+ # Returns a list of the following tuples for each cdict submitted
+ # cdict, props_dict, object_type, ds_types, ob_parents, object_name
+ validated_to_upload = openbis_validate(o, space, project, collection, loaded_cdict, options=options)
+ cdict, props_dict, object_type, ds_types, ob_parents, object_name = validated_to_upload[0]
+
+ # Rename datasets and change cdict path (locally) to match file location
+ path_in_cdict = cdict["structure_name"] if 'structure_name' in cdict.keys() else cdict['path']
+ cdict['path'] = rename_files_to_basename(path_in_cdict=path_in_cdict, dest_dir=dest)
+ cdict['path'] = f'{dest}/' if 'structure_name' in cdict.keys() else cdict['path']
+ object_id = openbis_upload_validated(
+ o, space, project, collection, object_name,
+ object_type, ob_parents, props_dict, ds_types,
+ cdict, parent_ids = parent_ids
+ )
+ return object_id
+
+# delete downloaded files
+def cleanup(dest):
+ dest_path = Path(dest)
+ if dest_path.exists() and dest_path.is_dir():
+ shutil.rmtree(dest_path)
+
+def sfb_to_bam(
+ username_bam, space_bam, project_bam, collection_bam,
+ username_sfb, s3_config_path, job_ids=None, collection_sfb=None,
+ include_structure=True, options={}
+):
+ """
+ Copies job objects and its datasets from the SFB openBIS instance to the BAM instance.
+
+ Parameters:
+ -----------
+ username_bam (str): The BAM instance username for authentication.
+ space_bam (str): The BAM space to upload the job(s) to.
+ project_bam (str): The BAM project to upload the job(s) to.
+ collection_bam (str): The BAM collection to upload the job(s) to.
+
+ username_sfb (str): The SFB instance username for authentication.
+ s3_config_path (str): Path to the configuration file containing S3 credentials
+ for accessing the SFB instance.
+
+ job_ids (str or list, optional): The SFB job id(s) (string or list of strings) to copy
+ from the SFB instance. Default is None.
+
+ collection_sfb (str, optional): The SFB collection (full string) whose all jobs will be copied.
+ If specified, it overwrites 'job_ids'. Default is None.
+
+ include_structure (bool, optional): Whether to also copy the structure object and its datasets.
+ Defaults to True. Set to False for jobs like TableJob.
+
+ options (dict, optional): Additional upload options, same function as ob.upload_classic_pyiron.
+ Specify 'materials' to avoid the automatic matching.
+ Defaults to an empty dictionary.
+
+ Notes:
+ ------
+ - If both 'job_ids' and 'collection_sfb' are specified, the 'collection_sfb' parameter takes
+ precedence, and all jobs in the specified collection will be copied.
+ """
+
+ if not (job_ids or collection_sfb):
+ raise ValueError('One of the following arguments must be specified: job_ids, collection_sfb')
+
+ # job_ids to be either a string or list of strings of job permIds or identifiers
+ job_ids = [job_ids] if isinstance(job_ids, str) else job_ids
+
+ print('SFB openBIS login:')
+ o_sfb = openbis_login(url="https://openbis.imm.rwth-aachen.de/openbis/webapp/eln-lims/",
+ username=username_sfb, instance="sfb1394", s3_config_path=s3_config_path)
+ print('BAM openBIS login:')
+ o_bam = openbis_login(url="https://test3.datastore.bam.de/", username=username_bam, instance="bam")
+
+ if collection_sfb:
+ job_ids = [job.permId for job in o_sfb.get_objects(type='PYIRON_JOB*', collection=collection_sfb)]
+
+ dest = 'temp'
+ job_dest = f'{dest}/job'
+ child_struct_dest = f'{dest}/struct_final'
+ par_struct_dest = f'{dest}/struct'
+
+ for job_id in job_ids:
+ job_to_copy = o_sfb.get_object(job_id)
+ job_cdict_path = get_datasets(o_sfb, job_to_copy, object_type='job', dest=job_dest)
+
+ material_ids = options.get('materials', [])
+ material_ids = [material_ids] if isinstance(material_ids, str) else material_ids
+ if include_structure:
+ child_struct_to_copy, par_struct_to_copy = get_structures(o_sfb, job_to_copy)
+ child_struct_cdict_path = get_datasets(o_sfb, child_struct_to_copy, object_type='structure', dest=child_struct_dest)
+ par_struct_cdict_path = get_datasets(o_sfb, par_struct_to_copy, object_type='structure', dest=par_struct_dest)
+
+ if not material_ids:
+ material_ids = get_materials(o_sfb, o_bam, par_struct_to_copy)
+
+ pseudopot_ids = options.get('pseudopotentials', [])
+ pseudopot_ids = [pseudopot_ids] if isinstance(pseudopot_ids, str) else pseudopot_ids
+ if not pseudopot_ids:
+ pseudopot_ids = get_pseudopotentials(o_bam, job_to_copy)
+ options = {
+ 'materials': material_ids,
+ 'pseudopotentials' : pseudopot_ids
+ }
+
+ par_struct_identifier = None
+ if include_structure:
+ par_struct_identifier = convert_and_upload(
+ o_bam, space_bam, project_bam, collection_bam, par_struct_cdict_path,
+ dest = par_struct_dest, parent_ids = None, options = options
+ )
+ job_identifier = convert_and_upload(
+ o_bam, space_bam, project_bam, collection_bam, job_cdict_path,
+ dest = job_dest, parent_ids = par_struct_identifier, options = options
+ )
+ if include_structure:
+ child_struct_identifier = convert_and_upload(
+ o_bam, space_bam, project_bam, collection_bam, child_struct_cdict_path,
+ dest = child_struct_dest, parent_ids = job_identifier, options = options
+ )
+
+
+ cleanup(dest)
+
+ o_sfb.logout()
+ o_bam.logout()
+
+ print('Upload successful 🙂')
\ No newline at end of file