Source code for mischbares.utils.orchestrator_utils

""" A collection of functions for working with hdf5 files and generally with orchestrator data."""
import numpy as np
import h5py


[docs]def increment_name(name:str): """ Increment the number at the end of a string. Args: name (str): name of file Returns: str: Incremented name of file """ segments = name.split('_') if '.' in segments[-1]: #so that we can increment filenames too subsegment = segments[-1].split('.') subsegment[0] = str(int(subsegment[0])+1) segments[-1] = '.'.join(subsegment) else: segments[-1] = str(int(segments[-1])+1) return '_'.join(segments)
[docs]def highest_name(names:list): """ Tke in a list of strings which differ only by an integer, and return the one for which that integer is highest Args: names (list): list of strings Returns: str: highest name """ if len(names) == 1: return names[0] slen = min([len(i) for i in names]) leftindex = None rightindex = None for i in range(slen): for name in names: if name[i] != names[0][i]: leftindex = i break if leftindex is not None: break for i in range(-1,-slen-1,-1): for name in names: if name[i] != names[0][i]: rightindex = i break if rightindex is not None: break numbers = [int(s[leftindex:rightindex+1] if rightindex != -1 else s[leftindex:]) for s in names] return names[numbers.index(max(numbers))]
[docs]def dict_address(address,dict_keys): """ For a string of dict keys seperated by '/', get the value of d under that series Args: address (str): address of value dict_keys: dictionary to search Returns: dict: value at address """ address = address.split('/') if len(address) == 1: return dict_keys[address[0]] return dict_address('/'.join(address[1:]),dict_keys[address[0]])
[docs]def dict_address_set(address,dict_keys,val): """ For a string of dict keys seperated by '/', set the value of dictionary d at that address to val Args: address (str): address of value dict_keys (dict): dictionary to search val (int): value to set """ address = address.split('/') if len(address) == 1: dict_keys[address[0]] = val else: dict_address_set('/'.join(address[1:]),dict_keys[address[0]],val)
[docs]def save_dict_to_hdf5(dict_keys, filename, path='/', mode='w'): """ Save a dictionary to an hdf5 file. Args: dict (dict): dictionary to save filename (str): name of file path (str, optional): path where the hdf5 is saved. Defaults to '/'. mode (str, optional): mode to open the file. Defaults to 'w'. """ with h5py.File(filename, mode) as h5file: recursively_save_dict_contents_to_group(h5file, path, dict_keys)
[docs]def recursively_save_dict_contents_to_group( h5file, path, dict_keys): """ Save a dictionary to an hdf5 file. Args: h5file (h5py._hl.files.File): hdf5 file to save to path (str): path to save to dict_keys (dict): dictionary to save """ # pylint: disable=protected-access # argument type checking if not isinstance(dict_keys, dict): raise ValueError("must provide a dictionary") if not isinstance(path, str): raise ValueError("path must be a string") if not isinstance(h5file, h5py._hl.files.File): raise ValueError("must be an open h5py file") # save items to the hdf5 file for key, item in dict_keys.items(): key = str(key) if isinstance(item, list): item = np.array(item) if not isinstance(key, str): raise ValueError("dict keys must be strings to save to hdf5") # save strings, numpy.int64, and numpy.float64 types if isinstance(item, (np.int64, np.float64, str, float, np.float32,int)): #np.float, h5file[path + key] = item # save numpy arrays elif isinstance(item, np.ndarray): try: h5file[path + key] = item except: item = np.array(item).astype('|S9') h5file[path + key] = item if not np.array_equal(h5file[path + key][()], item): raise ValueError('The data representation in the HDF5 file does not match the original dict.') elif isinstance(item, list): h5file[path + key] = np.array(item) if not h5file[path + key] == np.array(item): raise ValueError('The data representation in the HDF5 file does not match the original dict.') # save dictionaries elif isinstance(item, dict): recursively_save_dict_contents_to_group(h5file, path + key + '/', item) elif item is None: h5file.create_group(path + key) # other types cannot be saved and will result in an error else: raise ValueError('Cannot save %s type.' % type(item))
[docs]def hdf5_group_to_dict(h5file, path): """ Take a group from somewhere within an hdf5 file, convert it to a dict, and return it. Args: h5file (str): hdf5 file to read from path (str): path to group Raises: ValueError: if the group is not a dictionary Returns: dict: dictionary of group """ # pylint: disable=protected-access data = {} for key in h5file[path].keys(): if isinstance(h5file[path+key+'/'],h5py._hl.group.Group): data.update({key:hdf5_group_to_dict(h5file,path+key+'/')}) elif isinstance(h5file[path+key+'/'],h5py._hl.dataset.Dataset): data.update({key:h5file[path+key+'/'][()]}) else: raise ValueError(f'somehow {h5file[key]} is neither an hdf5 group nor dataset') return data
[docs]def paths_in_hdf5(h5path, paths): """ Check if the input path or list of paths are valid for the input hdf5 file. Args: h5path (str): path to hdf5 file paths (str or list): path or list of paths to check Returns: bool : True if path is in hdf5, False if not. """ if isinstance(paths,str): paths = [paths] with h5py.File(h5path, 'r') as h5file: for path in paths: try: h5file[path] except: return False return True