Module `medpicpy.io`

medpicpy's lower level functions for reading individual images.

Expand source code

"""medpicpy's lower level functions for reading individual images.

"""
import os
import ntpath
# import pydicom
import cv2
import SimpleITK as sitk
import numpy as np

from . import config
import logging

logging.getLogger(__name__)
mmap_counter = 0

def load_image(path, use_memory_mapping=False):
    """Load in any image or image series from a path

    Args:
        path (str): path to image or directory for a series
        use_memory_mapping (bool, optional): [description]. Hold large datasets on disk instead of in memory.

    Returns:
        np.Array: image in numpy format
    """


    image_name = ntpath.basename(path)
    image_as_array = None
    if os.path.isdir(path): # if its a directory its a dicom series or something similar
        series = load_series(path, use_memory_mapping=use_memory_mapping)
        return series
    
    extension = image_name.split(".")[-1]   # i want it to be the last thing
    if extension == "dcm":    # for loading an individual (2d) dicom
        try:
            image = sitk.ReadImage(path)
            image_as_array = sitk.GetArrayFromImage(image)
            image_as_array = image_as_array[0]  # only the first one since this is a 2d dico
            if config.rescale:
                if config.rescale_options["method"] == "per_image":
                    max_value = np.max(image_as_array)
                    min_value = np.min(image_as_array)
                    image_as_array = rescale_image(image_as_array, max_value, min_value)
                elif config.rescale_options["method"] == "from_dtype":
                    if int(image.GetMetaData("0028|0103")) == 0:    # unsigned int
                        max_value = 2 ** int(image.GetMetaData("0028|0101")) - 1 # http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.7.6.3.html
                        min_value = 0
                        image_as_array = rescale_image(image_as_array, max_value, min_value)
                    elif int(image.GetMetaData("0028|0103")) == 1:  # signed int
                        bits_stored = int(image.GetMetaData("0028|0101"))
                        min_value = (2 ** (bits_stored - 1))
                        max_value = (2 ** (bits_stored - 1)) - 1
                        image_as_array = rescale_image(image_as_array, max_value, min_value)
                else:
                    print("Pixel Representation not 0 or 1!")   # TODO: throw error
                    exit(0)

        except RuntimeError:
            if config.suppress_errors:
                print(f"Error reading Dicom! {path} must be non-image.")
            else:
                raise

    elif extension == "npy" or extension == "npz":
        image_as_array = np.load(path)
        image_as_array = rescale_opencv_image(image_as_array)
    elif extension == "gz" or extension == "nii":   # nifti files
        # https://brainder.org/2012/09/23/the-nifti-file-format/
        image = sitk.ReadImage(path)    
        image_as_array = sitk.GetArrayFromImage(image)
        if config.rescale and config.rescale_options["method"] == "from_dtype":
            datatype = int(image.GetMetaData("datatype"))
            if datatype == 2:
                image_as_array = image_as_array.astype(np.uint8)
            elif datatype == 4:
                image_as_array = image_as_array.astype(np.int16)
            elif datatype == 16:
                image_as_array = image_as_array.astype(np.int32)
            elif datatype == 512:
                image_as_array = image_as_array.astype(np.uint16)
            else:
                print(f"MedPicPy currently doesn't support datatype {datatype} of nii.gz, make a pull request.")
                exit(0)
        if config.rescale and config.rescale_options["method"] == "per_image":
            image_as_array = rescale_opencv_image(image_as_array)
    else:
        image_as_array = cv2.imread(path, cv2.IMREAD_UNCHANGED)
        if image_as_array is None:  # opencv couldn't read it, maybe sitk can
            try:
                image = sitk.ReadImage(path)    
                image_as_array = sitk.GetArrayFromImage(image)  # SITK also can't read it
            except RuntimeError:
                logging.debug(f"Suppressing sitk not being able to read file: {path}")  
                if config.suppress_errors:  # if we've got here then SITK and OpenCV can't read the image so return None. 
                    return None
                else:
                    raise
        image_as_array = rescale_opencv_image(image_as_array)

    if use_memory_mapping and image_as_array is not None:
        mmap_name = get_counter_and_update()
        mmap = np.memmap(mmap_name, dtype=np.float32, mode="w+", shape=image_as_array.shape)
        mmap[:] = image_as_array[:]
        return mmap
    else:
        return image_as_array

def np_dtype_max_and_min(dtype):
    """Takes a numpy dytpe and 
    returns the maximum and minimum possible values 
    for that dtype. 

    Args:
        dtype (numpy dtype): dtype to find max and min values

    Returns:
        (number, number): max and min value for given dtype, can be float or int
    """
    if np.issubdtype(dtype, np.integer):
        #int things
        int_info = np.iinfo(dtype)
        max_val = int_info.max
        min_val = int_info.min
        return max_val, min_val
    elif np.issubdtype(dtype, np.floating):
        #float things
        float_info = np.finfo(dtype)
        max_val = float_info.max
        min_val = float_info.min
        return max_val, min_val
    else:
        #TODO: raise exception
        pass
    
def rescale_opencv_image(image_as_numpy):
    """Rescale an image that has been loaded 
    using opencv. This loads the images directly into 
    numpy format. 

    Args:
        image_as_numpy (np array): numpy array containing image data

    Returns:
        np array: rescaled image
    """
    if config.rescale_options["method"] == "per_image":
        max_value = np.max(image_as_numpy)
        min_value = np.min(image_as_numpy)

        if max_value == min_value:
            return np.zeros_like(image_as_numpy)    # avoid dividing by zero
        else:
            return rescale_image(image_as_numpy, max_value, min_value)
    elif config.rescale_options["method"] == "from_dtype":
        shape = image_as_numpy.shape
        if len(shape) == 3:
            # We know its 3D but not 3 Channel because 3 Channel isn't allowed. 
            return rescale_opencv_image_3d(image_as_numpy)

        depth = image_as_numpy.dtype
        max_value, min_value = np_dtype_max_and_min(depth)
        return rescale_image(image_as_numpy, max_value, min_value)

def rescale_opencv_image_3d(image_as_numpy):
    """Rescale an image with more than 
    2-dimensions.

    Args:
        image_as_numpy (np array): numpy array containing image data

    Returns:
        np array: rescaled 3d image
    """
    rescaled_arr = np.zeros(image_as_numpy.shape, dtype=np.float32)
    for i in range(len(image_as_numpy)):        
        rescaled_arr[i] = rescale_opencv_image(image_as_numpy[i])
    return rescaled_arr

def rescale_image(image,
    upper_bound,
    lower_bound):
    """General feature scaling function. Takes an 
    image to rescale and the upper and lower possible 
    value for that image data. 

    Args:
        image (np array): array containing image data
        upper_bound (int): max possible pixel value in image
        lower_bound (int): min possible pixel value in image

    Returns:
        np.array: rescaled image
    """
    top = (image - lower_bound) * (config.rescale_options["max"] - config.rescale_options["min"])
    final = (top / (upper_bound - lower_bound)) + config.rescale_options["min"]
    return final + config.rescale_options["min"]


def load_series(path, use_memory_mapping=False): # for more than 2d dicoms. 
    """Load an image series from a directory(e.g. dicom)

    Args:
        path (str): Path to directory of series
        use_memory_mapping (bool, optional): [description]. Hold large datasets on disk instead of in memory.

    Returns:
        np.Array: Image in numpy array format
    """
    series_reader = sitk.ImageSeriesReader()
    file_names = series_reader.GetGDCMSeriesFileNames(path)
    series_reader.SetFileNames(file_names)
    image = series_reader.Execute()
    array = sitk.GetArrayFromImage(image)

    if use_memory_mapping:
        mmap_name = get_counter_and_update()
        mmap = np.memmap(mmap_name, dtype=np.float32, mode="w+", shape=array.shape)
        mmap[:] = array[:]
        return mmap
    else:
        return array

def allocate_array(shape, use_memory_mapping=False):
    """Allocates space for a numpy array, and abstracts over memory mapping.

    Args:
        shape (tuple): shape of numpy array
        use_memory_mapping (bool, optional): Store array on disk instead of in memory. Defaults to False.

    Returns:
        np.array: numpy array with given shape. 
    """
    if use_memory_mapping:
        mmap_name = get_counter_and_update()
        mmap = np.memmap(mmap_name, dtype=np.float32, mode="w+", shape=shape)
        return mmap
    else:
        return np.zeros(shape)

def get_counter_and_update():
    global mmap_counter
    val = mmap_counter
    path = config.cache_location + "/medpicpy_cache/" + str(val) + ".dat"
    mmap_counter += 1
    return path

Functions

def allocate_array(shape, use_memory_mapping=False)

Allocates space for a numpy array, and abstracts over memory mapping.

Args

shape : tuple: shape of numpy array
use_memory_mapping : bool, optional: Store array on disk instead of in memory. Defaults to False.

Returns

np.array: numpy array with given shape.

Expand source code

def allocate_array(shape, use_memory_mapping=False):
    """Allocates space for a numpy array, and abstracts over memory mapping.

    Args:
        shape (tuple): shape of numpy array
        use_memory_mapping (bool, optional): Store array on disk instead of in memory. Defaults to False.

    Returns:
        np.array: numpy array with given shape. 
    """
    if use_memory_mapping:
        mmap_name = get_counter_and_update()
        mmap = np.memmap(mmap_name, dtype=np.float32, mode="w+", shape=shape)
        return mmap
    else:
        return np.zeros(shape)

def get_counter_and_update()

Expand source code

def get_counter_and_update():
    global mmap_counter
    val = mmap_counter
    path = config.cache_location + "/medpicpy_cache/" + str(val) + ".dat"
    mmap_counter += 1
    return path

def load_image(path, use_memory_mapping=False)

Load in any image or image series from a path

Args

path : str: path to image or directory for a series
use_memory_mapping : bool, optional: [description]. Hold large datasets on disk instead of in memory.

Returns

np.Array: image in numpy format

Expand source code

def load_image(path, use_memory_mapping=False):
    """Load in any image or image series from a path

    Args:
        path (str): path to image or directory for a series
        use_memory_mapping (bool, optional): [description]. Hold large datasets on disk instead of in memory.

    Returns:
        np.Array: image in numpy format
    """


    image_name = ntpath.basename(path)
    image_as_array = None
    if os.path.isdir(path): # if its a directory its a dicom series or something similar
        series = load_series(path, use_memory_mapping=use_memory_mapping)
        return series
    
    extension = image_name.split(".")[-1]   # i want it to be the last thing
    if extension == "dcm":    # for loading an individual (2d) dicom
        try:
            image = sitk.ReadImage(path)
            image_as_array = sitk.GetArrayFromImage(image)
            image_as_array = image_as_array[0]  # only the first one since this is a 2d dico
            if config.rescale:
                if config.rescale_options["method"] == "per_image":
                    max_value = np.max(image_as_array)
                    min_value = np.min(image_as_array)
                    image_as_array = rescale_image(image_as_array, max_value, min_value)
                elif config.rescale_options["method"] == "from_dtype":
                    if int(image.GetMetaData("0028|0103")) == 0:    # unsigned int
                        max_value = 2 ** int(image.GetMetaData("0028|0101")) - 1 # http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.7.6.3.html
                        min_value = 0
                        image_as_array = rescale_image(image_as_array, max_value, min_value)
                    elif int(image.GetMetaData("0028|0103")) == 1:  # signed int
                        bits_stored = int(image.GetMetaData("0028|0101"))
                        min_value = (2 ** (bits_stored - 1))
                        max_value = (2 ** (bits_stored - 1)) - 1
                        image_as_array = rescale_image(image_as_array, max_value, min_value)
                else:
                    print("Pixel Representation not 0 or 1!")   # TODO: throw error
                    exit(0)

        except RuntimeError:
            if config.suppress_errors:
                print(f"Error reading Dicom! {path} must be non-image.")
            else:
                raise

    elif extension == "npy" or extension == "npz":
        image_as_array = np.load(path)
        image_as_array = rescale_opencv_image(image_as_array)
    elif extension == "gz" or extension == "nii":   # nifti files
        # https://brainder.org/2012/09/23/the-nifti-file-format/
        image = sitk.ReadImage(path)    
        image_as_array = sitk.GetArrayFromImage(image)
        if config.rescale and config.rescale_options["method"] == "from_dtype":
            datatype = int(image.GetMetaData("datatype"))
            if datatype == 2:
                image_as_array = image_as_array.astype(np.uint8)
            elif datatype == 4:
                image_as_array = image_as_array.astype(np.int16)
            elif datatype == 16:
                image_as_array = image_as_array.astype(np.int32)
            elif datatype == 512:
                image_as_array = image_as_array.astype(np.uint16)
            else:
                print(f"MedPicPy currently doesn't support datatype {datatype} of nii.gz, make a pull request.")
                exit(0)
        if config.rescale and config.rescale_options["method"] == "per_image":
            image_as_array = rescale_opencv_image(image_as_array)
    else:
        image_as_array = cv2.imread(path, cv2.IMREAD_UNCHANGED)
        if image_as_array is None:  # opencv couldn't read it, maybe sitk can
            try:
                image = sitk.ReadImage(path)    
                image_as_array = sitk.GetArrayFromImage(image)  # SITK also can't read it
            except RuntimeError:
                logging.debug(f"Suppressing sitk not being able to read file: {path}")  
                if config.suppress_errors:  # if we've got here then SITK and OpenCV can't read the image so return None. 
                    return None
                else:
                    raise
        image_as_array = rescale_opencv_image(image_as_array)

    if use_memory_mapping and image_as_array is not None:
        mmap_name = get_counter_and_update()
        mmap = np.memmap(mmap_name, dtype=np.float32, mode="w+", shape=image_as_array.shape)
        mmap[:] = image_as_array[:]
        return mmap
    else:
        return image_as_array

def load_series(path, use_memory_mapping=False)

Load an image series from a directory(e.g. dicom)

Args

path : str: Path to directory of series
use_memory_mapping : bool, optional: [description]. Hold large datasets on disk instead of in memory.

Returns

np.Array: Image in numpy array format

Expand source code

def load_series(path, use_memory_mapping=False): # for more than 2d dicoms. 
    """Load an image series from a directory(e.g. dicom)

    Args:
        path (str): Path to directory of series
        use_memory_mapping (bool, optional): [description]. Hold large datasets on disk instead of in memory.

    Returns:
        np.Array: Image in numpy array format
    """
    series_reader = sitk.ImageSeriesReader()
    file_names = series_reader.GetGDCMSeriesFileNames(path)
    series_reader.SetFileNames(file_names)
    image = series_reader.Execute()
    array = sitk.GetArrayFromImage(image)

    if use_memory_mapping:
        mmap_name = get_counter_and_update()
        mmap = np.memmap(mmap_name, dtype=np.float32, mode="w+", shape=array.shape)
        mmap[:] = array[:]
        return mmap
    else:
        return array

def np_dtype_max_and_min(dtype)

Takes a numpy dytpe and returns the maximum and minimum possible values for that dtype.

Args

dtype : numpy dtype: dtype to find max and min values

Returns

(number, number): max and min value for given dtype, can be float or int

Expand source code

def np_dtype_max_and_min(dtype):
    """Takes a numpy dytpe and 
    returns the maximum and minimum possible values 
    for that dtype. 

    Args:
        dtype (numpy dtype): dtype to find max and min values

    Returns:
        (number, number): max and min value for given dtype, can be float or int
    """
    if np.issubdtype(dtype, np.integer):
        #int things
        int_info = np.iinfo(dtype)
        max_val = int_info.max
        min_val = int_info.min
        return max_val, min_val
    elif np.issubdtype(dtype, np.floating):
        #float things
        float_info = np.finfo(dtype)
        max_val = float_info.max
        min_val = float_info.min
        return max_val, min_val
    else:
        #TODO: raise exception
        pass

def rescale_image(image, upper_bound, lower_bound)

General feature scaling function. Takes an image to rescale and the upper and lower possible value for that image data.

Args

image : np array: array containing image data
upper_bound : int: max possible pixel value in image
lower_bound : int: min possible pixel value in image

Returns

np.array: rescaled image

Expand source code

def rescale_image(image,
    upper_bound,
    lower_bound):
    """General feature scaling function. Takes an 
    image to rescale and the upper and lower possible 
    value for that image data. 

    Args:
        image (np array): array containing image data
        upper_bound (int): max possible pixel value in image
        lower_bound (int): min possible pixel value in image

    Returns:
        np.array: rescaled image
    """
    top = (image - lower_bound) * (config.rescale_options["max"] - config.rescale_options["min"])
    final = (top / (upper_bound - lower_bound)) + config.rescale_options["min"]
    return final + config.rescale_options["min"]

def rescale_opencv_image(image_as_numpy)

Rescale an image that has been loaded using opencv. This loads the images directly into numpy format.

Args

image_as_numpy : np array: numpy array containing image data

Returns

np array: rescaled image

Expand source code

def rescale_opencv_image(image_as_numpy):
    """Rescale an image that has been loaded 
    using opencv. This loads the images directly into 
    numpy format. 

    Args:
        image_as_numpy (np array): numpy array containing image data

    Returns:
        np array: rescaled image
    """
    if config.rescale_options["method"] == "per_image":
        max_value = np.max(image_as_numpy)
        min_value = np.min(image_as_numpy)

        if max_value == min_value:
            return np.zeros_like(image_as_numpy)    # avoid dividing by zero
        else:
            return rescale_image(image_as_numpy, max_value, min_value)
    elif config.rescale_options["method"] == "from_dtype":
        shape = image_as_numpy.shape
        if len(shape) == 3:
            # We know its 3D but not 3 Channel because 3 Channel isn't allowed. 
            return rescale_opencv_image_3d(image_as_numpy)

        depth = image_as_numpy.dtype
        max_value, min_value = np_dtype_max_and_min(depth)
        return rescale_image(image_as_numpy, max_value, min_value)

def rescale_opencv_image_3d(image_as_numpy)

Rescale an image with more than 2-dimensions.

Args

image_as_numpy : np array: numpy array containing image data

Returns

np array: rescaled 3d image

Expand source code

def rescale_opencv_image_3d(image_as_numpy):
    """Rescale an image with more than 
    2-dimensions.

    Args:
        image_as_numpy (np array): numpy array containing image data

    Returns:
        np array: rescaled 3d image
    """
    rescaled_arr = np.zeros(image_as_numpy.shape, dtype=np.float32)
    for i in range(len(image_as_numpy)):        
        rescaled_arr[i] = rescale_opencv_image(image_as_numpy[i])
    return rescaled_arr