diff --git a/pyiron_rdm/OpenbisAixTended.py b/pyiron_rdm/OpenbisAixTended.py index 51c2d6d..1788736 100644 --- a/pyiron_rdm/OpenbisAixTended.py +++ b/pyiron_rdm/OpenbisAixTended.py @@ -767,8 +767,6 @@ def download( filename = self.file_list[0].split("/")[-1] if destination is None: destination = "data/" + self.openbis.download_prefix - if not os.path.exists(destination): - os.mkdir(destination) if create_default_folders: filename_dest = os.path.join( @@ -776,6 +774,8 @@ def download( ) else: filename_dest = os.path.join(destination, filename) + + os.makedirs(os.path.dirname(filename_dest), exist_ok=True) try: max_retries = 5 retries = 0 diff --git a/pyiron_rdm/ob_OT_bam.py b/pyiron_rdm/ob_OT_bam.py index f297a12..0be6fa0 100644 --- a/pyiron_rdm/ob_OT_bam.py +++ b/pyiron_rdm/ob_OT_bam.py @@ -107,7 +107,7 @@ def get_inv_parent(parent_name, cdict, props_dict, options): elif parent_name == "interatomic_potential": ob_type, where_clause, requested_attrs = intpot_par(cdict) elif parent_name == "pseudopotential": - ob_type, parents = pseudopot_par(options) + ob_type, permids = pseudopot_par(options) elif parent_name == "wf_reference": ob_type, ob_code = wfref_par(cdict) diff --git a/pyiron_rdm/ob_OT_sfb1394.py b/pyiron_rdm/ob_OT_sfb1394.py index 60eb88d..db687cf 100644 --- a/pyiron_rdm/ob_OT_sfb1394.py +++ b/pyiron_rdm/ob_OT_sfb1394.py @@ -191,7 +191,7 @@ def pseudopotential_suggester(o, structure, **kwargs): return -def slow_pseudopotential_matcher( +def slow_pseudopotential_suggester( o, job ): # could also take job['POTCAR'] instead? In case already loaded somewhere? potcar = job["POTCAR"] @@ -229,56 +229,26 @@ def get_subsystems(chemsys: str) -> list: return ["-".join(combination) for combination in all_combinations] -def crystalline_material_suggester(o, structure, tol: float = 0.02, space_group_number: int|None =None, match_subcomposition: bool =False, openbis_kwargs: dict|None = None): - """Suggests a list of crystalline materials from the openBIS inventory for a structure of interest. - - Args: - o (pybis.Openbis): The openBIS session object used to query crystalline materials. - structure (Atoms | str): The structure for which to find materials in the openBIS instance. - tol (float): Tolerance factor for matching chemical composition. - Materials are accepted if the absolute difference in atomic percentage for each element between - the candidate and reference structure is less than `100 * tol`. For example, a `tol` of 0.01 (or 1%) - means atomic percentages must be within +/- 1% of the reference. - space_group (int, optional): The space group number to filter materials by. Defaults to None. - match_subcomposition (bool, optional): Whether to search for materials for each subsystem based on their - composition and tolerance. Defaults to False. - openbis_kwargs (dict, optional): Expert feature. A dictionary of openBIS-specific codes and their values - to apply additional filtering. Defaults to None. - Returns: - pybis.things.Things: An openBIS query result object. The data can be accessed as a pandas DataFrame via - `.df` attribute. - """ - - openbis_kwargs = openbis_kwargs if openbis_kwargs is not None else {} - - if isinstance(structure, str): - try: - from ase import Atoms - structure = Atoms(structure) - except ImportError: - raise ImportError('For the parsing of structure like strings, ase needs to be installed.') - - chem_system = "-".join(sorted(set(structure.get_chemical_symbols()))) +def crystalline_material_suggester(o, structure, tol: float = 0.02, **kwargs): + # tolerance is a decimal number + chem_system = "-".join(sorted(structure.get_species_symbols())) + # space_group = 'SPACE_GROUP_' + str(structure.get_symmetry().spacegroup['Number']) # atomic composition of structure - species_dict = dict() - for i in structure.get_chemical_symbols(): - species_dict[i] = species_dict.get(i, 0) + 1 + species_dict = dict(structure.get_number_species_atoms()) atomic_pct_dict = get_atomic_percent_dict(species_dict) - # matching candidates from openBIS candidates = [] for chemical_system in get_subsystems(chem_system): - where_dict = {"CHEMICAL_SYSTEM": chemical_system,} - prop_list = list(openbis_kwargs.keys()) + ["CHEMICAL_SYSTEM"] - if space_group_number is not None: - where_dict['SPACE_GROUP_SHORT'] = 'SPACE_GROUP_' + str(space_group_number) - prop_list += ['SPACE_GROUP_SHORT'] candidates += o.get_objects( type="CRYSTALLINE_MATERIAL", - where=where_dict, - props=prop_list + where={ + "CHEMICAL_SYSTEM": chemical_system, + # 'SPACE_GROUP_SHORT': space_group, + }, + props=list(kwargs.keys()) + ["CHEMICAL_SYSTEM"], + # props=list(kwargs.keys()) + ['CHEMICAL_SYSTEM', 'SPACE_GROUP_SHORT'] ) # define properties to display @@ -299,12 +269,7 @@ def crystalline_material_suggester(o, structure, tol: float = 0.02, space_group_ from ast import literal_eval candidate_atomic_pct = literal_eval(atomic_pct) - subsystem_pct_dict = atomic_pct_dict.copy() - - if match_subcomposition: - subsystem_pct_dict = get_atomic_percent_dict({k: atomic_pct_dict[k] for k in candidate_atomic_pct}) - if is_within_tolerance(subsystem_pct_dict, candidate_atomic_pct, tol): - filtered.append(candidate.permId) + if is_within_tolerance(atomic_pct_dict, candidate_atomic_pct, tol): + filtered.append(candidate.permId) - ob_objects= o.get_objects(permId=filtered, props=props) - return ob_objects + return o.get_objects(permId=filtered, props=props) diff --git a/pyiron_rdm/ob_cfg_bam.py b/pyiron_rdm/ob_cfg_bam.py index c5fab21..a687338 100644 --- a/pyiron_rdm/ob_cfg_bam.py +++ b/pyiron_rdm/ob_cfg_bam.py @@ -1,4 +1,7 @@ -def format_json_string(json_string): +def format_json_string(cdict_json): + import json + + json_string = json.dumps(cdict_json, indent=2) json_string = json_string.replace('\n', '
') result = [] for index, char in enumerate(json_string): @@ -16,15 +19,11 @@ def map_cdict_to_ob(o, cdict, concept_dict): # cdict = flat concept_dict if 'structure_name' in cdict.keys(): - json_file = cdict['path'] + cdict['structure_name'] + '_concept_dict.json' props = {} else: - json_file = cdict['path'] + '_concept_dict.json' props = {'bam_username': o.get_session_info().userName} # TODO can we avoid o as input? - with open(json_file, 'r') as file: - json_string = file.read() - json_string = format_json_string(json_string) + json_string = format_json_string(concept_dict) props |= { 'conceptual_dictionary': json_string, diff --git a/pyiron_rdm/ob_cfg_sfb1394.py b/pyiron_rdm/ob_cfg_sfb1394.py index 28778df..b132209 100644 --- a/pyiron_rdm/ob_cfg_sfb1394.py +++ b/pyiron_rdm/ob_cfg_sfb1394.py @@ -1,4 +1,7 @@ -def format_json_string(json_string): +def format_json_string(cdict_json): + import json + + json_string = json.dumps(cdict_json, indent=2) json_string = json_string.replace("\n", "
") result = [] for index, char in enumerate(json_string): @@ -16,15 +19,11 @@ def map_cdict_to_ob(o, cdict, concept_dict): # cdict = flat concept_dict if "structure_name" in cdict.keys(): - json_file = cdict["path"] + cdict["structure_name"] + "_concept_dict.json" props = {} else: - json_file = cdict["path"] + "_concept_dict.json" props = {"user_name": o.get_session_info().userName} - with open(json_file, "r") as file: - json_string = file.read() - json_string = format_json_string(json_string) + json_string = format_json_string(concept_dict) props |= { "pyiron_conceptual_dictionary": json_string, diff --git a/pyiron_rdm/sfb_to_bam.py b/pyiron_rdm/sfb_to_bam.py new file mode 100644 index 0000000..f776092 --- /dev/null +++ b/pyiron_rdm/sfb_to_bam.py @@ -0,0 +1,222 @@ +import os +import json +from pathlib import Path +import shutil + +from ob.classic import openbis_login +from ob.ob_upload import openbis_validate +from ob.ob_upload import openbis_upload_validated + +# Assumes just one file with the extension ! +def rename_files_to_basename(path_in_cdict, dest_dir, extensions=('.h5', '.json', '.yml')): + dest_dir = Path(dest_dir) + base_name = Path(path_in_cdict).name + + rename_map = { + '.json': f'{base_name}_concept_dict.json', + '.yml': f'{base_name}_environment.yml' + } + + for ext in extensions: + matching_files = list(dest_dir.glob(f'*{ext}')) + if matching_files: + source = matching_files[0] + target_name = rename_map.get(ext, f'{base_name}{ext}') + target = dest_dir / target_name + shutil.move(str(source), str(target)) + new_path_in_cdict = str(dest_dir / base_name) + return new_path_in_cdict + +def get_structures(o, job_to_copy): + # Parent structure = structure to upload to BAM + structure_child_id = [ + ch_id for ch_id in job_to_copy.children if 'SAMPLE' in ch_id + ][0] + structure_child = o.get_object(structure_child_id) + structure_parent_id = [ + p_id for p_id in o.get_object(structure_child_id).parents if 'SAMPLE' in p_id + ][0] + structure_parent = o.get_object(structure_parent_id) + return structure_child, structure_parent + +def get_datasets(o, obj_to_copy, object_type='job', dest='temp'): + ''' + object_type: 'job' or 'structure' -> different dataset types + + returns cdict_path = relative path to the concept_dict.json file + ''' + cdict_path = '' + + allowed_object_types = ['job', 'structure'] + if object_type not in allowed_object_types: + raise ValueError(f'Arg object_type must be one of {allowed_object_types}, got: {object_type}.') + + elif object_type == 'job': + ds_types = ['PYIRON_HDF5', 'PYIRON_CONCEPT_DICT_DATA'] + elif object_type == 'structure': + ds_types = ['CRYS-STRUCT_DATA', 'PYIRON_CONCEPT_DICT_DATA'] + + for ds_type in ds_types: + datasets = obj_to_copy.get_datasets(type=ds_type) + for ds in datasets: + ds = o.get_dataset(ds.permId) + ds.download( + destination=dest, + wait_until_finished=True, + create_default_folders=False + ) + if ds_type == 'PYIRON_CONCEPT_DICT_DATA': + cdict_path = os.path.join( + dest, + ds.file_list[0].split("/")[-1] + ) + + return cdict_path + +def get_pseudopotentials(o_bam, job_to_copy): + pseudopot_ids_sfb = [p_id for p_id in job_to_copy.parents if 'POTENTIAL' in p_id] + pseudopot_names = [p_id.split('/')[-1] for p_id in pseudopot_ids_sfb] + pseudopots_bam = o_bam.get_objects(type='PSEUDOPOTENTIAL', code=pseudopot_names) + pseudopot_ids_bam = [p.permId for p in pseudopots_bam] + return pseudopot_ids_bam + +def get_materials(o_sfb, o_bam, struct_to_copy): + material_ids_sfb = [m_id for m_id in struct_to_copy.parents if 'MATERIAL' in m_id] + materials_sfb = o_sfb.get_objects(material_ids_sfb) + mats_sfb = [m.p.get('$name') for m in materials_sfb] + material_ids_bam = [ + o_bam.get_objects(type='MATERIAL_V1', where={'$name': mat})[0].permId for mat in mats_sfb + ] + return material_ids_bam + +def convert_and_upload(o, space, project, collection, cdict_path, + dest='temp', parent_ids=None, options={}): + with open(cdict_path, 'r') as f: + loaded_cdict = json.load(f) + + # Returns a list of the following tuples for each cdict submitted + # cdict, props_dict, object_type, ds_types, ob_parents, object_name + validated_to_upload = openbis_validate(o, space, project, collection, loaded_cdict, options=options) + cdict, props_dict, object_type, ds_types, ob_parents, object_name = validated_to_upload[0] + + # Rename datasets and change cdict path (locally) to match file location + path_in_cdict = cdict["structure_name"] if 'structure_name' in cdict.keys() else cdict['path'] + cdict['path'] = rename_files_to_basename(path_in_cdict=path_in_cdict, dest_dir=dest) + cdict['path'] = f'{dest}/' if 'structure_name' in cdict.keys() else cdict['path'] + object_id = openbis_upload_validated( + o, space, project, collection, object_name, + object_type, ob_parents, props_dict, ds_types, + cdict, parent_ids = parent_ids + ) + return object_id + +# delete downloaded files +def cleanup(dest): + dest_path = Path(dest) + if dest_path.exists() and dest_path.is_dir(): + shutil.rmtree(dest_path) + +def sfb_to_bam( + username_bam, space_bam, project_bam, collection_bam, + username_sfb, s3_config_path, job_ids=None, collection_sfb=None, + include_structure=True, options={} +): + """ + Copies job objects and its datasets from the SFB openBIS instance to the BAM instance. + + Parameters: + ----------- + username_bam (str): The BAM instance username for authentication. + space_bam (str): The BAM space to upload the job(s) to. + project_bam (str): The BAM project to upload the job(s) to. + collection_bam (str): The BAM collection to upload the job(s) to. + + username_sfb (str): The SFB instance username for authentication. + s3_config_path (str): Path to the configuration file containing S3 credentials + for accessing the SFB instance. + + job_ids (str or list, optional): The SFB job id(s) (string or list of strings) to copy + from the SFB instance. Default is None. + + collection_sfb (str, optional): The SFB collection (full string) whose all jobs will be copied. + If specified, it overwrites 'job_ids'. Default is None. + + include_structure (bool, optional): Whether to also copy the structure object and its datasets. + Defaults to True. Set to False for jobs like TableJob. + + options (dict, optional): Additional upload options, same function as ob.upload_classic_pyiron. + Specify 'materials' to avoid the automatic matching. + Defaults to an empty dictionary. + + Notes: + ------ + - If both 'job_ids' and 'collection_sfb' are specified, the 'collection_sfb' parameter takes + precedence, and all jobs in the specified collection will be copied. + """ + + if not (job_ids or collection_sfb): + raise ValueError('One of the following arguments must be specified: job_ids, collection_sfb') + + # job_ids to be either a string or list of strings of job permIds or identifiers + job_ids = [job_ids] if isinstance(job_ids, str) else job_ids + + print('SFB openBIS login:') + o_sfb = openbis_login(url="https://openbis.imm.rwth-aachen.de/openbis/webapp/eln-lims/", + username=username_sfb, instance="sfb1394", s3_config_path=s3_config_path) + print('BAM openBIS login:') + o_bam = openbis_login(url="https://test3.datastore.bam.de/", username=username_bam, instance="bam") + + if collection_sfb: + job_ids = [job.permId for job in o_sfb.get_objects(type='PYIRON_JOB*', collection=collection_sfb)] + + dest = 'temp' + job_dest = f'{dest}/job' + child_struct_dest = f'{dest}/struct_final' + par_struct_dest = f'{dest}/struct' + + for job_id in job_ids: + job_to_copy = o_sfb.get_object(job_id) + job_cdict_path = get_datasets(o_sfb, job_to_copy, object_type='job', dest=job_dest) + + material_ids = options.get('materials', []) + material_ids = [material_ids] if isinstance(material_ids, str) else material_ids + if include_structure: + child_struct_to_copy, par_struct_to_copy = get_structures(o_sfb, job_to_copy) + child_struct_cdict_path = get_datasets(o_sfb, child_struct_to_copy, object_type='structure', dest=child_struct_dest) + par_struct_cdict_path = get_datasets(o_sfb, par_struct_to_copy, object_type='structure', dest=par_struct_dest) + + if not material_ids: + material_ids = get_materials(o_sfb, o_bam, par_struct_to_copy) + + pseudopot_ids = options.get('pseudopotentials', []) + pseudopot_ids = [pseudopot_ids] if isinstance(pseudopot_ids, str) else pseudopot_ids + if not pseudopot_ids: + pseudopot_ids = get_pseudopotentials(o_bam, job_to_copy) + options = { + 'materials': material_ids, + 'pseudopotentials' : pseudopot_ids + } + + par_struct_identifier = None + if include_structure: + par_struct_identifier = convert_and_upload( + o_bam, space_bam, project_bam, collection_bam, par_struct_cdict_path, + dest = par_struct_dest, parent_ids = None, options = options + ) + job_identifier = convert_and_upload( + o_bam, space_bam, project_bam, collection_bam, job_cdict_path, + dest = job_dest, parent_ids = par_struct_identifier, options = options + ) + if include_structure: + child_struct_identifier = convert_and_upload( + o_bam, space_bam, project_bam, collection_bam, child_struct_cdict_path, + dest = child_struct_dest, parent_ids = job_identifier, options = options + ) + + + cleanup(dest) + + o_sfb.logout() + o_bam.logout() + + print('Upload successful 🙂') \ No newline at end of file