"""
Load data for a chest CT scan from both dicom and radiologist xml files.
"""
from __future__ import division, print_function
from collections import namedtuple
import math
import warnings
import numpy as np
import scipy.ndimage
import lcat.loading.annotations
import lcat.loading.images
# Scan datatype
Scan = namedtuple('Scan', ['patient_id', 'voxels', 'nodules', 'unit_cell'])
[docs]def load_scan(scan_folder, cubify=False):
"""
Loads the CT scan as a 3d voxel array, then loads the segmentation in the given dicom_folder by
reading any xml files located in the folder and referencing dicom files as necessary. Returns a
3D mask with the same dimensions as the CT scan.
TODO: Combine nodules referring to the same entity. Unfortunately there is currently no unique
ID for each nodule, meaning that multiple radiologist reads result in multiple almost-identical
nodules in the scan metadata.
"""
# Load the CT scan
patient_id, voxels, unit_cell, sop_instance_uids = lcat.loading.images.load_folder(scan_folder)
# Load all segmentations
nodules = lcat.loading.annotations.load_radiologist_annotations(scan_folder, voxels.shape,
sop_instance_uids)
# Convert to scan datatype
scan = Scan(patient_id, voxels, nodules, unit_cell)
# Cubify if necessary
if cubify:
scan = cubify_scan(scan)
return scan
[docs]def cubify_scan(scan):
"""
Given a scan, interpolate the data to make the unit cell cubic. The dimension(s) with the
smallest magnitude(s) in the unit cell will remain the same.
"""
# Calculate scaling factors
scaling_factors = get_scaling_factors(scan)
# Calculate new unit cell size
new_unit_cell = [step / factor for step, factor in zip(scan.unit_cell, scaling_factors)]
# Rescale the voxels
new_voxels = zoom_array(scan.voxels, scaling_factors)
# Create nodules placeholder
new_nodules = []
# For each nodule
for nodule in scan.nodules:
# Perform nodule mask interpolation
new_nodules.append(rescale_nodule(nodule, scaling_factors))
# Return new Scan
return Scan(scan.patient_id, new_voxels, new_nodules, new_unit_cell)
[docs]def get_scaling_factors(scan):
"""
Returns the scaling factors for the given scan.
"""
# Calculate the smallest step in unit_cell
minimum_step = min(scan.unit_cell)
# Calculate scaling factors
scaling_factors = [step / minimum_step for step in scan.unit_cell]
return scaling_factors
[docs]def zoom_array(array, factor, mode='nearest'):
"""
Rescale the given `array` along each axis by `factor`. If `factor` is a sequence, each axis will
be zoomed by its corresponding factor in `factor`.
"""
with warnings.catch_warnings():
# Filter warnings we can't fix
warnings.filterwarnings("ignore", message="From scipy 0.13.0, the output shape of zoom() "
"is calculated with round() instead of int() - "
"for these inputs the size of the returned array "
"has changed.")
# Return zoomed image
return scipy.ndimage.zoom(array, factor, mode=mode)
[docs]def rescale_nodule(nodule, scaling_factors):
"""
This interpolation process is trickier than the voxel interpolation process, because we're not
only rescaling the mask, we're also moving the origin of the mask. In order to properly
perform this interpolation, we perform the following steps:
(1) Find the scaled origin as real numbers (not rounded)
(2) Find the scaled anti-origin (opposite corner) as real numbers (not rounded)
(3) Count the number of scaled cells along each dimension for mask
(4) Convert rounded extents to original space
(5) Perform rescaling by axis coordinates
"""
# Load old extents
old_starts = nodule.origin
old_ends = [start + dim for start, dim in zip(old_starts, nodule.mask.shape)]
# Calculate new extents
new_starts = [start * factor for start, factor in zip(old_starts, scaling_factors)]
new_ends = [end * factor for end, factor in zip(old_ends, scaling_factors)]
# Calculate discrete extents
discrete_starts = [int(math.floor(start)) for start in new_starts]
discrete_ends = [int(math.ceil(end)) for end in new_ends]
new_shape = [end - start for start, end in zip(discrete_starts, discrete_ends)]
# Convert to original space
unscaled_starts = [start / factor for start, factor in zip(discrete_starts, scaling_factors)]
unscaled_ends = [end / factor for end, factor in zip(discrete_ends, scaling_factors)]
# Generate interpolating points in original space
axis_coordinates = [np.linspace(unscaled_start - start, unscaled_end - start, dim)
for start, unscaled_start, unscaled_end, dim
in zip(old_starts, unscaled_starts, unscaled_ends, new_shape)]
# Perform interpolation
new_mask = interpolate_array_by_axis(nodule.mask, axis_coordinates, output=float,
mode='nearest')
# Convert to binary array
new_mask = new_mask > 0.5
# Return rescaled nodule
return lcat.loading.annotations.Nodule(nodule.nodule_id, nodule.characteristics,
discrete_starts, new_mask)
[docs]def interpolate_array(array, new_shape, output=None, mode='nearest'):
"""
Given an array and a target shape, perform an orthogonal transformation to the new shape using
a spline interpolation.
"""
# Generate mapping by axis
axis_coordinates = [np.linspace(0, old_dim, new_dim)
for old_dim, new_dim in zip(array.shape, new_shape)]
return interpolate_array_by_axis(array, axis_coordinates, output=output, mode=mode)
[docs]def interpolate_array_by_axis(array, axis_coordinates, output=None, mode='nearest'):
# Expand axis coordinates
coordinates = np.meshgrid(*axis_coordinates)
# Perform interpolation
# Note that mode only applies to input boundaries, the interpolation itself is a spline
return scipy.ndimage.map_coordinates(array, coordinates, output=output, mode=mode)