""" A collection of functions for working with hdf5 files and generally with orchestrator data."""
import numpy as np
import h5py
[docs]def increment_name(name:str):
    """ Increment the number at the end of a string.
    Args:
        name (str): name of file
    Returns:
        str: Incremented name of file
    """
    segments = name.split('_')
    if '.' in segments[-1]:
        #so that we can increment filenames too
        subsegment = segments[-1].split('.')
        subsegment[0] = str(int(subsegment[0])+1)
        segments[-1] = '.'.join(subsegment)
    else:
        segments[-1] = str(int(segments[-1])+1)
    return '_'.join(segments) 
[docs]def highest_name(names:list):
    """ Tke in a list of strings which differ only by an integer,
    and return the one for which that integer is highest
    Args:
        names (list): list of strings
    Returns:
        str: highest name
    """
    if len(names) == 1:
        return names[0]
    slen = min([len(i) for i in names])
    leftindex = None
    rightindex = None
    for i in range(slen):
        for name in names:
            if name[i] != names[0][i]:
                leftindex = i
                break
        if leftindex is not None:
            break
    for i in range(-1,-slen-1,-1):
        for name in names:
            if name[i] != names[0][i]:
                rightindex = i
                break
        if rightindex is not None:
            break
    numbers = [int(s[leftindex:rightindex+1] if rightindex != -1 else s[leftindex:]) for s in names]
    return names[numbers.index(max(numbers))] 
[docs]def dict_address(address,dict_keys):
    """ For a string of dict keys seperated by '/', get the value of d under that series
    Args:
        address (str): address of value
        dict_keys: dictionary to search
    Returns:
        dict: value at address
    """
    address = address.split('/')
    if len(address) == 1:
        return dict_keys[address[0]]
    return dict_address('/'.join(address[1:]),dict_keys[address[0]]) 
[docs]def dict_address_set(address,dict_keys,val):
    """ For a string of dict keys seperated by '/', set the value of dictionary d at that address to val
    Args:
        address (str): address of value
        dict_keys (dict): dictionary to search
        val (int): value to set
    """
    address = address.split('/')
    if len(address) == 1:
        dict_keys[address[0]] = val
    else:
        dict_address_set('/'.join(address[1:]),dict_keys[address[0]],val) 
[docs]def save_dict_to_hdf5(dict_keys, filename, path='/', mode='w'):
    """ Save a dictionary to an hdf5 file.
    Args:
        dict (dict): dictionary to save
        filename (str): name of file
        path (str, optional): path where the hdf5 is saved. Defaults to '/'.
        mode (str, optional): mode to open the file. Defaults to 'w'.
    """
    with h5py.File(filename, mode) as h5file:
        recursively_save_dict_contents_to_group(h5file, path, dict_keys) 
[docs]def recursively_save_dict_contents_to_group( h5file, path, dict_keys):
    """ Save a dictionary to an hdf5 file.
    Args:
        h5file (h5py._hl.files.File): hdf5 file to save to
        path (str): path to save to
        dict_keys (dict): dictionary to save
    """
    # pylint: disable=protected-access
    # argument type checking
    if not isinstance(dict_keys, dict):
        raise ValueError("must provide a dictionary")
    if not isinstance(path, str):
        raise ValueError("path must be a string")
    if not isinstance(h5file, h5py._hl.files.File):
        raise ValueError("must be an open h5py file")
    # save items to the hdf5 file
    for key, item in dict_keys.items():
        key = str(key)
        if isinstance(item, list):
            item = np.array(item)
        if not isinstance(key, str):
            raise ValueError("dict keys must be strings to save to hdf5")
        # save strings, numpy.int64, and numpy.float64 types
        if isinstance(item, (np.int64, np.float64, str, float, np.float32,int)): #np.float,
            h5file[path + key] = item
        # save numpy arrays
        elif isinstance(item, np.ndarray):
            try:
                h5file[path + key] = item
            except:
                item = np.array(item).astype('|S9')
                h5file[path + key] = item
            if not np.array_equal(h5file[path + key][()], item):
                raise ValueError('The data representation in the HDF5 file does not match the original dict.')
        elif isinstance(item, list):
            h5file[path + key] = np.array(item)
            if not h5file[path + key] == np.array(item):
                raise ValueError('The data representation in the HDF5 file does not match the original dict.')
        # save dictionaries
        elif isinstance(item, dict):
            recursively_save_dict_contents_to_group(h5file, path + key + '/', item)
        elif item is None:
            h5file.create_group(path + key)
        # other types cannot be saved and will result in an error
        else:
            raise ValueError('Cannot save %s type.' % type(item)) 
[docs]def hdf5_group_to_dict(h5file, path):
    """ Take a group from somewhere within an hdf5 file, convert it to a dict, and return it.
    Args:
        h5file (str): hdf5 file to read from
        path (str): path to group
    Raises:
        ValueError: if the group is not a dictionary
    Returns:
        dict: dictionary of group
    """
    # pylint: disable=protected-access
    data = {}
    for key in h5file[path].keys():
        if isinstance(h5file[path+key+'/'],h5py._hl.group.Group):
            data.update({key:hdf5_group_to_dict(h5file,path+key+'/')})
        elif isinstance(h5file[path+key+'/'],h5py._hl.dataset.Dataset):
            data.update({key:h5file[path+key+'/'][()]})
        else:
            raise ValueError(f'somehow {h5file[key]} is neither an hdf5 group nor dataset')
    return data 
[docs]def paths_in_hdf5(h5path, paths):
    """ Check if the input path or list of paths are valid for the input hdf5 file.
    Args:
        h5path (str): path to hdf5 file
        paths (str or list): path or list of paths to check
    Returns:
        bool : True if path is in hdf5, False if not.
    """
    if isinstance(paths,str):
        paths = [paths]
    with h5py.File(h5path, 'r') as h5file:
        for path in paths:
            try:
                h5file[path]
            except:
                return False
    return True