Source code for lcat.loading.scans

"""
Load data for a chest CT scan from both dicom and radiologist xml files.
"""
from __future__ import division, print_function
from collections import namedtuple
import math
import warnings

import numpy as np
import scipy.ndimage

import lcat.loading.annotations
import lcat.loading.images


# Scan datatype
Scan = namedtuple('Scan', ['patient_id', 'voxels', 'nodules', 'unit_cell'])


[docs]def load_scan(scan_folder, cubify=False): """ Loads the CT scan as a 3d voxel array, then loads the segmentation in the given dicom_folder by reading any xml files located in the folder and referencing dicom files as necessary. Returns a 3D mask with the same dimensions as the CT scan. TODO: Combine nodules referring to the same entity. Unfortunately there is currently no unique ID for each nodule, meaning that multiple radiologist reads result in multiple almost-identical nodules in the scan metadata. """ # Load the CT scan patient_id, voxels, unit_cell, sop_instance_uids = lcat.loading.images.load_folder(scan_folder) # Load all segmentations nodules = lcat.loading.annotations.load_radiologist_annotations(scan_folder, voxels.shape, sop_instance_uids) # Convert to scan datatype scan = Scan(patient_id, voxels, nodules, unit_cell) # Cubify if necessary if cubify: scan = cubify_scan(scan) return scan
[docs]def cubify_scan(scan): """ Given a scan, interpolate the data to make the unit cell cubic. The dimension(s) with the smallest magnitude(s) in the unit cell will remain the same. """ # Calculate scaling factors scaling_factors = get_scaling_factors(scan) # Calculate new unit cell size new_unit_cell = [step / factor for step, factor in zip(scan.unit_cell, scaling_factors)] # Rescale the voxels new_voxels = zoom_array(scan.voxels, scaling_factors) # Create nodules placeholder new_nodules = [] # For each nodule for nodule in scan.nodules: # Perform nodule mask interpolation new_nodules.append(rescale_nodule(nodule, scaling_factors)) # Return new Scan return Scan(scan.patient_id, new_voxels, new_nodules, new_unit_cell)
[docs]def get_scaling_factors(scan): """ Returns the scaling factors for the given scan. """ # Calculate the smallest step in unit_cell minimum_step = min(scan.unit_cell) # Calculate scaling factors scaling_factors = [step / minimum_step for step in scan.unit_cell] return scaling_factors
[docs]def zoom_array(array, factor, mode='nearest'): """ Rescale the given `array` along each axis by `factor`. If `factor` is a sequence, each axis will be zoomed by its corresponding factor in `factor`. """ with warnings.catch_warnings(): # Filter warnings we can't fix warnings.filterwarnings("ignore", message="From scipy 0.13.0, the output shape of zoom() " "is calculated with round() instead of int() - " "for these inputs the size of the returned array " "has changed.") # Return zoomed image return scipy.ndimage.zoom(array, factor, mode=mode)
[docs]def rescale_nodule(nodule, scaling_factors): """ This interpolation process is trickier than the voxel interpolation process, because we're not only rescaling the mask, we're also moving the origin of the mask. In order to properly perform this interpolation, we perform the following steps: (1) Find the scaled origin as real numbers (not rounded) (2) Find the scaled anti-origin (opposite corner) as real numbers (not rounded) (3) Count the number of scaled cells along each dimension for mask (4) Convert rounded extents to original space (5) Perform rescaling by axis coordinates """ # Load old extents old_starts = nodule.origin old_ends = [start + dim for start, dim in zip(old_starts, nodule.mask.shape)] # Calculate new extents new_starts = [start * factor for start, factor in zip(old_starts, scaling_factors)] new_ends = [end * factor for end, factor in zip(old_ends, scaling_factors)] # Calculate discrete extents discrete_starts = [int(math.floor(start)) for start in new_starts] discrete_ends = [int(math.ceil(end)) for end in new_ends] new_shape = [end - start for start, end in zip(discrete_starts, discrete_ends)] # Convert to original space unscaled_starts = [start / factor for start, factor in zip(discrete_starts, scaling_factors)] unscaled_ends = [end / factor for end, factor in zip(discrete_ends, scaling_factors)] # Generate interpolating points in original space axis_coordinates = [np.linspace(unscaled_start - start, unscaled_end - start, dim) for start, unscaled_start, unscaled_end, dim in zip(old_starts, unscaled_starts, unscaled_ends, new_shape)] # Perform interpolation new_mask = interpolate_array_by_axis(nodule.mask, axis_coordinates, output=float, mode='nearest') # Convert to binary array new_mask = new_mask > 0.5 # Return rescaled nodule return lcat.loading.annotations.Nodule(nodule.nodule_id, nodule.characteristics, discrete_starts, new_mask)
[docs]def interpolate_array(array, new_shape, output=None, mode='nearest'): """ Given an array and a target shape, perform an orthogonal transformation to the new shape using a spline interpolation. """ # Generate mapping by axis axis_coordinates = [np.linspace(0, old_dim, new_dim) for old_dim, new_dim in zip(array.shape, new_shape)] return interpolate_array_by_axis(array, axis_coordinates, output=output, mode=mode)
[docs]def interpolate_array_by_axis(array, axis_coordinates, output=None, mode='nearest'): # Expand axis coordinates coordinates = np.meshgrid(*axis_coordinates) # Perform interpolation # Note that mode only applies to input boundaries, the interpolation itself is a spline return scipy.ndimage.map_coordinates(array, coordinates, output=output, mode=mode)