Source code for h5pywrappers.obj

# -*- coding: utf-8 -*-
# Copyright 2024 Matthew Fitzpatrick.
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 3.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <https://www.gnu.org/licenses/gpl-3.0.html>.
r"""For identifying and loading HDF5 objects.

"""



#####################################
## Load libraries/packages/modules ##
#####################################

# For performing deep copies.
import copy

# For checking whether a file exists at a given path, making directories, and
# for removing files.
import pathlib

# For removing directories.
import shutil



# For loading HDF5 files.
import h5py

# For validating and converting objects.
import czekitout.check
import czekitout.convert

# For defining classes that support enforced validation, updatability,
# pre-serialization, and de-serialization.
import fancytypes



##################################
## Define classes and functions ##
##################################

# List of public objects in objects.
__all__ = ["ID",
           "load"]



def _check_and_convert_filename(params):
    obj_name = "filename"
    kwargs = {"obj": params[obj_name], "obj_name": obj_name}
    filename = czekitout.convert.to_str_from_str_like(**kwargs)

    return filename



def _pre_serialize_filename(filename):
    obj_to_pre_serialize = filename
    serializable_rep = obj_to_pre_serialize
    
    return serializable_rep



def _de_pre_serialize_filename(serializable_rep):
    filename = serializable_rep

    return filename



def _check_and_convert_path_in_file(params):
    obj_name = "path_in_file"
    kwargs = {"obj": params[obj_name], "obj_name": obj_name}
    path_in_file = czekitout.convert.to_str_from_str_like(**kwargs)

    current_func_name = "_check_and_convert_path_in_file"

    if len(path_in_file) == 0:
        err_msg = globals()[current_func_name+"_err_msg_1"]
        raise ValueError(err_msg)

    return path_in_file



def _pre_serialize_path_in_file(path_in_file):
    obj_to_pre_serialize = path_in_file
    serializable_rep = obj_to_pre_serialize
    
    return serializable_rep



def _de_pre_serialize_path_in_file(serializable_rep):
    path_in_file = serializable_rep

    return path_in_file



_default_skip_validation_and_conversion = False



[docs] class ID(fancytypes.PreSerializableAndUpdatable): r"""A parameter set specifying an HDF5 object in an HDF5 file or an HDF5 file to be. Parameters ---------- filename : `str` The relative or absolute filename of the HDF5 file that contains the HDF5 object of interest. path_in_file : `str` The HDF5 path to the HDF5 object of interest contained in the HDF5 file specified by ``filename``. skip_validation_and_conversion : `bool`, optional Let ``validation_and_conversion_funcs`` and ``core_attrs`` denote the attributes :attr:`~fancytypes.Checkable.validation_and_conversion_funcs` and :attr:`~fancytypes.Checkable.core_attrs` respectively, both of which being `dict` objects. Let ``params_to_be_mapped_to_core_attrs`` denote the `dict` representation of the constructor parameters excluding the parameter ``skip_validation_and_conversion``, where each `dict` key ``key`` is a different constructor parameter name, excluding the name ``"skip_validation_and_conversion"``, and ``params_to_be_mapped_to_core_attrs[key]`` would yield the value of the constructor parameter with the name given by ``key``. If ``skip_validation_and_conversion`` is set to ``False``, then for each key ``key`` in ``params_to_be_mapped_to_core_attrs``, ``core_attrs[key]`` is set to ``validation_and_conversion_funcs[key] (params_to_be_mapped_to_core_attrs)``. Otherwise, if ``skip_validation_and_conversion`` is set to ``True``, then ``core_attrs`` is set to ``params_to_be_mapped_to_core_attrs.copy()``. This option is desired primarily when the user wants to avoid potentially expensive deep copies and/or conversions of the `dict` values of ``params_to_be_mapped_to_core_attrs``, as it is guaranteed that no copies or conversions are made in this case. """ ctor_param_names = ("filename", "path_in_file") kwargs = {"namespace_as_dict": globals(), "ctor_param_names": ctor_param_names} _validation_and_conversion_funcs_ = \ fancytypes.return_validation_and_conversion_funcs(**kwargs) _pre_serialization_funcs_ = \ fancytypes.return_pre_serialization_funcs(**kwargs) _de_pre_serialization_funcs_ = \ fancytypes.return_de_pre_serialization_funcs(**kwargs) del ctor_param_names, kwargs def __init__(self, filename, path_in_file, skip_validation_and_conversion=\ _default_skip_validation_and_conversion): ctor_params = {key: val for key, val in locals().items() if (key not in ("self", "__class__"))} kwargs = ctor_params kwargs["skip_cls_tests"] = True fancytypes.PreSerializableAndUpdatable.__init__(self, **kwargs) return None
[docs] @classmethod def get_validation_and_conversion_funcs(cls): validation_and_conversion_funcs = \ cls._validation_and_conversion_funcs_.copy() return validation_and_conversion_funcs
[docs] @classmethod def get_pre_serialization_funcs(cls): pre_serialization_funcs = \ cls._pre_serialization_funcs_.copy() return pre_serialization_funcs
[docs] @classmethod def get_de_pre_serialization_funcs(cls): de_pre_serialization_funcs = \ cls._de_pre_serialization_funcs_.copy() return de_pre_serialization_funcs
def _check_and_convert_obj_id(params): param_name = "obj_id" name_of_obj_alias_of_obj_id = params.get("name_of_obj_alias_of_"+param_name, param_name) obj_name = param_name obj = copy.deepcopy(params[obj_name]) accepted_types = (ID,) kwargs = {"obj": obj, "obj_name": name_of_obj_alias_of_obj_id, "accepted_types": accepted_types} czekitout.check.if_instance_of_any_accepted_types(**kwargs) obj_id = obj return obj_id def _pre_serialize_obj_id(obj_id): obj_to_pre_serialize = obj_id serializable_rep = obj_to_pre_serialize.pre_serialize() return serializable_rep def _de_pre_serialize_obj_id(serializable_rep): kwargs = {"serializable_rep": serializable_rep, "skip_validation_and_conversion": True} obj_id = ID.de_pre_serialize(**kwargs) return obj_id def _check_and_convert_read_only(params): obj_name = "read_only" kwargs = {"obj": params[obj_name], "obj_name": obj_name} read_only = czekitout.convert.to_bool(**kwargs) return read_only _default_read_only = True
[docs] def load(obj_id, read_only=_default_read_only): r"""Load an HDF5 object from an HDF5 file. Note that users can access the HDF5 file object to which the HDF5 object of interest belongs via ``obj.file``, where ``obj`` is the HDF5 object of interest. To close the HDF5 file, users can run the command ``obj.file.close()``, however by doing so, any other HDF5 objects belonging to that file will become unusable. Parameters ---------- obj_id : :class:`h5pywrappers.obj.ID` The parameter set specifying the HDF5 object of interest. read_only : `bool`, optional If ``read_only`` is set to ``True``, then the HDF5 object of interest cannot be modified after loading it. Otherwise, if ``read_only`` is set to ``False``, then the HDF5 object of interest can be modified after loading it. Returns ------- obj : :class:`h5py.Group` | :class:`h5py.Dataset` The HDF5 object of interest. """ params = locals() global_symbol_table = globals() for param_name in params: func_name = "_check_and_convert_" + param_name func_alias = global_symbol_table[func_name] params[param_name] = func_alias(params) kwargs = params obj = _load(**kwargs) return obj
def _load(obj_id, read_only): read_only = _pre_load(obj_id, read_only) obj_id_core_attrs = obj_id.get_core_attrs(deep_copy=False) filename = obj_id_core_attrs["filename"] path_in_file = obj_id_core_attrs["path_in_file"] if read_only: file_obj = h5py.File(filename, "r") else: file_obj = h5py.File(filename, "a") obj = file_obj[path_in_file] return obj def _pre_load(obj_id, read_only): obj_id_core_attrs = obj_id.get_core_attrs(deep_copy=False) filename = obj_id_core_attrs["filename"] path_in_file = obj_id_core_attrs["path_in_file"] file_mode = "r" if read_only else "a" current_func_name = "_pre_load" try: if not pathlib.Path(filename).is_file(): raise FileNotFoundError with h5py.File(filename, file_mode) as file_obj: pass except FileNotFoundError: err_msg = globals()[current_func_name+"_err_msg_1"].format(filename) raise FileNotFoundError(err_msg) except PermissionError: err_msg = globals()[current_func_name+"_err_msg_2"].format(filename) raise PermissionError(err_msg) except OSError as err: if "file signature not found" in str(err): err_msg = globals()[current_func_name+"_err_msg_3"].format(filename) else: err_msg = globals()[current_func_name+"_err_msg_4"].format(filename) raise OSError(err_msg) with h5py.File(filename, file_mode) as file_obj: if path_in_file not in file_obj: unformatted_err_msg = globals()[current_func_name+"_err_msg_5"] err_msg = unformatted_err_msg.format(path_in_file, filename) raise ValueError(err_msg) return read_only def _pre_save(obj_id): obj_id_core_attrs = obj_id.get_core_attrs(deep_copy=False) filename = obj_id_core_attrs["filename"] path_in_file = obj_id.core_attrs["path_in_file"] first_new_dir_made = _mk_parent_dir(filename) current_func_name = "_pre_save" try: file_does_not_exist = (not pathlib.Path(filename).is_file()) except PermissionError: unformatted_err_msg = globals()[current_func_name+"_err_msg_1"] err_msg = unformatted_err_msg.format(filename) raise PermissionError(err_msg) if file_does_not_exist: try: with h5py.File(filename, "w") as file_obj: pass except PermissionError: unformatted_err_msg = globals()[current_func_name+"_err_msg_1"] err_msg = unformatted_err_msg.format(filename) raise PermissionError(err_msg) pathlib.Path(filename).unlink() else: try: with h5py.File(filename, "a") as file_obj: pass except PermissionError: unformatted_err_msg = globals()[current_func_name+"_err_msg_2"] err_msg = unformatted_err_msg.format(filename) raise PermissionError(err_msg) except OSError as err: key = (current_func_name + "_err_msg_3" if ("file signature not found" in str(err)) else current_func_name + "_err_msg_4") err_msg = globals()[key].format(filename) raise OSError(err_msg) _check_for_intermediary_datasets_along_path_in_file(obj_id) if first_new_dir_made is not None: shutil.rmtree(first_new_dir_made) return None def _mk_parent_dir(filename): current_func_name = "_mk_parent_dir" try: parent_dir_path = pathlib.Path(filename).resolve().parent temp_dir_path = pathlib.Path(parent_dir_path.root) parent_dir_did_not_already_exist = False for path_part in parent_dir_path.parts[1:]: temp_dir_path = pathlib.Path.joinpath(temp_dir_path, path_part) if not temp_dir_path.is_dir(): parent_dir_did_not_already_exist = True break pathlib.Path(parent_dir_path).mkdir(parents=True, exist_ok=True) except PermissionError: err_msg = globals()[current_func_name+"_err_msg_1"].format(filename) raise PermissionError(err_msg) first_new_dir_made = (temp_dir_path if parent_dir_did_not_already_exist else None) return first_new_dir_made def _check_for_intermediary_datasets_along_path_in_file(obj_id): obj_id_core_attrs = obj_id.get_core_attrs(deep_copy=False) filename = obj_id_core_attrs["filename"] path_in_file = obj_id.core_attrs["path_in_file"] file_is_not_new = pathlib.Path(filename).is_file() current_func_name = "_check_for_intermediary_datasets_along_path_in_file" if file_is_not_new: with h5py.File(filename, "a") as file_obj: if path_in_file not in file_obj: path_in_file = pathlib.Path(path_in_file) num_parents = len(path_in_file.parents) for parent_idx in range(-1, -num_parents-1, -1): path = str(path_in_file.parents[num_parents+parent_idx]) if path in file_obj: if isinstance(file_obj[path], h5py._hl.dataset.Dataset): key = current_func_name + "_err_msg_1" unformatted_err_msg = globals()[key] err_msg = unformatted_err_msg.format(path_in_file, path, filename) raise ValueError(err_msg) else: break return None ########################### ## Define error messages ## ########################### _check_and_convert_path_in_file_err_msg_1 = \ ("The object ``path_in_file`` must be a non-empty string.") _pre_load_err_msg_1 = \ ("No file exists at the file path ``'{}'``.") _pre_load_err_msg_2 = \ ("Cannot access the file path ``'{}'`` because of insufficient " "permissions.") _pre_load_err_msg_3 = \ ("No HDF5 file exists at the file path ``'{}'``.") _pre_load_err_msg_4 = \ ("Unable to synchronously open the HDF5 file at the file path ``'{}'``: " "see traceback for details.") _pre_load_err_msg_5 = \ ("No HDF5 object was found at the HDF5 path ``'{}'`` of the HDF5 file " "at the file path ``'{}'``.") _pre_save_err_msg_1 = \ _pre_load_err_msg_2 _pre_save_err_msg_2 = \ ("Cannot write to the file at the file path ``'{}'`` because of " "insufficient permissions.") _pre_save_err_msg_3 = \ _pre_load_err_msg_3 _pre_save_err_msg_4 = \ _pre_load_err_msg_4 _mk_parent_dir_err_msg_1 = \ _pre_load_err_msg_2 _check_for_intermediary_datasets_along_path_in_file_err_msg_1 = \ ("The object ``path_in_file``, which stores the string ``'{}'``, does not " "specify a valid HDF5 path: there is an HDF5 dataset at the intermediate " "path ``'{}'`` of the HDF5 file ``'{}'``.")