Skip to content

Commit

Permalink
Merge pull request #3 from czyjtu/feature/data-utils
Browse files Browse the repository at this point in the history
Initial
  • Loading branch information
Goader authored Apr 8, 2023
2 parents 59ad812 4015687 commit 696d2c6
Show file tree
Hide file tree
Showing 404 changed files with 3,166 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 127,4 @@ dmypy.json

# Pyre type checker
.pyre/
.idea/
3 changes: 3 additions & 0 deletions coronaryx/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 1,3 @@
from coronaryx.data import CoronagraphyScan, VesselBranch
from coronaryx.plot import plot_scan, plot_branch
from coronaryx.utils import read_dataset
3 changes: 3 additions & 0 deletions coronaryx/algorithms/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 1,3 @@
from coronaryx.algorithms.traverse import traverse_branch_edges, traverse_branch_nodes
from coronaryx.algorithms.structure import split_into_branches
from coronaryx.algorithms.extraction import sweep
48 changes: 48 additions & 0 deletions coronaryx/algorithms/extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 1,48 @@
import numpy as np
import numpy.typing as npt

import coronaryx.functions as cxf
from coronaryx.data import VesselBranch
from coronaryx.algorithms.traverse import traverse_branch_edges


# TODO do we really need width? maybe we should consider masks too? do we need to pass the extrapolation argument?
# TODO rename to sweep_along_branch?
def sweep(
branch: VesselBranch,
matrix: np.ndarray['n, m', float],
sampling_rate: int = 4,
width: int = 50,
return_roi_positives: bool = False
) -> np.ndarray['n, width', float] | tuple[np.ndarray['n, width', float], np.ndarray['n', float]]:

sweeps = []
positives = []
for v, w in traverse_branch_edges(branch):
v = np.array(v, dtype=np.float32)
w = np.array(w, dtype=np.float32)
# TODO vvv
# we could normalize this and omit `sampling_rate`
# maybe sampling_rate is good? maybe we should get another argument - sampling_strategy
# it will be either 'norm' or 'edge', norm means sampling_rate is times per unit vector,
# and edge is times per edge, which will likely result in inconsistent distances between samples
vessel_direction = w - v
orthonormal_vector = cxf.orthonormal_2d_vector(vessel_direction) # TODO make direction deterministic? is it now?

multipliers = np.arange(width) - width / 2 0.5
sweep_vector = multipliers[:, np.newaxis] * orthonormal_vector

for i in range(sampling_rate):
anchor = v (vessel_direction * i) / sampling_rate
sweep_points = anchor sweep_vector
sweep_cut = cxf.interp_matrix(matrix, sweep_points[:, 0], sweep_points[:, 1])

sweeps.append(sweep_cut)
positives.append(branch.scan.in_any_roi(anchor))

if return_roi_positives:
return np.array(sweeps), np.array(positives)
return np.array(sweeps)


# TODO define alias functions for unraveling, width calculation and so on? should we?
27 changes: 27 additions & 0 deletions coronaryx/algorithms/structure.py
Original file line number Diff line number Diff line change
@@ -0,0 1,27 @@
import networkx as nx

from coronaryx.data import CoronagraphyScan, VesselBranch


def split_into_branches(scan: CoronagraphyScan) -> list[VesselBranch]:
branches = []
branching_nodes = []

G: nx.Graph = scan.centerline.copy()
for node in G.nodes:
if len(G[node]) >= 3:
branching_nodes.append(node)

G.remove_nodes_from(branching_nodes)

for component in nx.connected_components(G):
subgraph = G.subgraph(component).copy()
branches.append(subgraph)

# adding the branching nodes to each branch, if they were connected
for branching_node in branching_nodes:
for bn_neighbor in scan.centerline[branching_node]:
if bn_neighbor in component:
subgraph.add_edge(branching_node, bn_neighbor)

return [VesselBranch(scan, branch) for branch in branches]
15 changes: 15 additions & 0 deletions coronaryx/algorithms/traverse.py
Original file line number Diff line number Diff line change
@@ -0,0 1,15 @@
from typing import Any

import networkx as nx

from coronaryx.data import VesselBranch


def traverse_branch_nodes(branch: VesselBranch) -> list[Any]:
start_node, end_node = branch.boundary_nodes
return list(nx.dfs_preorder_nodes(branch.branch, source=start_node))


def traverse_branch_edges(branch: VesselBranch) -> list[tuple[Any, Any]]:
nodes = traverse_branch_nodes(branch)
return list(zip(nodes[:-1], nodes[1:]))
46 changes: 46 additions & 0 deletions coronaryx/data.py
Original file line number Diff line number Diff line change
@@ -0,0 1,46 @@
from dataclasses import dataclass
from typing import Any

import numpy as np
import numpy.typing as npt
import networkx as nx


@dataclass
class ROI:
start_x: float
start_y: float
end_x: float
end_y: float

def __contains__(self, item: tuple[int, int] | np.ndarray['2', float]) -> bool:
y, x = item
return self.start_x <= x < self.end_x and self.start_y <= y < self.end_y


@dataclass
class CoronagraphyScan:
name: str
scan: np.ndarray['scan_height, scan_width', np.uint8]
vessel_mask: np.ndarray['scan_height, scan_width', bool]
centerline: nx.Graph
rois: list[ROI]

def in_any_roi(self, item: tuple[int, int] | np.ndarray['2', float]) -> bool:
return any([item in roi for roi in self.rois])


@dataclass
class VesselBranch:
scan: CoronagraphyScan
branch: nx.Graph

def __post_init__(self):
# TODO some kind of deterministic sorting? preferably from the vessel start to the end
self._boundary_nodes: tuple[Any, Any] = \
tuple([node for node in self.branch.nodes if len(self.branch[node]) == 1])
assert len(self._boundary_nodes) == 2

@property
def boundary_nodes(self) -> tuple[Any, Any]:
return self._boundary_nodes
58 changes: 58 additions & 0 deletions coronaryx/functions.py
Original file line number Diff line number Diff line change
@@ -0,0 1,58 @@
import numpy as np
import numpy.typing as npt
from scipy.interpolate import interp2d


def interp_matrix(
matrix: np.ndarray,
xs: float | np.ndarray['n', float],
ys: float | np.ndarray['n', float],
kind: str = 'linear'
) -> float | np.ndarray:
"""
Interpolates matrix values and allows using float indices.
:param matrix: 2d numpy ndarray
:param xs: float index or a vector of float indices (first dimension)
:param ys: float index or a vector of float indices (second dimension)
:param kind: interpolation method: {'linear', 'cubic', 'quintic'}
:return: interpolated float value or a vector of interpolated float values
"""

if matrix.ndim != 2:
raise ValueError('unable to process non-2d arrays')

xs = np.array(xs)
ys = np.array(ys)

if xs.shape != ys.shape:
raise ValueError('xs and ys must have the same shape')

x_idxs = np.arange(matrix.shape[0])
y_idxs = np.arange(matrix.shape[1])

# scipy considers x, y in reverse
interp = interp2d(y_idxs, x_idxs, matrix, kind=kind)

if xs.shape == tuple():
return interp(ys, xs)

interpolated = []
for x, y in zip(xs, ys):
interpolated.append(interp(y, x))

# FIXME vectorized way sorts the input, and I don't know how to map it to the original
# interpolated = np.diag(interp(ys, xs)).reshape(xs.shape)

return np.array(interpolated).reshape(xs.shape)


def orthonormal_2d_vector(v: np.ndarray['2', float]) -> np.ndarray['2', float]:
"""
Computes a normalized orthogonal (orthonormal) vector to the specified in the 2d space
:param v: input vector
:return: orthonormal vector to `v`
"""

ov = np.array([v[1], -v[0]], dtype=v.dtype)
return ov / np.linalg.norm(ov)
56 changes: 56 additions & 0 deletions coronaryx/plot.py
Original file line number Diff line number Diff line change
@@ -0,0 1,56 @@
from copy import deepcopy

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches

from coronaryx.data import CoronagraphyScan, VesselBranch


def plot_dataset(dataset: list[CoronagraphyScan], figsize: tuple[int, int] = (8, 8)) -> None:
n_items = len(dataset)
n_rows = 10
n_cols = n_items // 10 1
fig, axes = plt.subplots(n_rows, n_cols)

i = 0
for axes_row in axes:
for ax in axes_row:
if i < n_items:
ax.imshow(dataset[i].scan, cmap='gray', extent=None)
else:
ax.imshow(np.zeros_like(dataset[0].scan), cmap='gray', extent=None)
ax.axis("off")
i = 1

fig.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95, wspace=0.05, hspace=0.05)
fig.set_size_inches(*figsize)
plt.show()


def plot_scan(scan: CoronagraphyScan):
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(16, 8))

ax1.imshow(scan.scan, cmap='gray')
ax2.imshow(scan.vessel_mask, cmap='gray')

nodes = np.array(scan.centerline.nodes)
ax2.scatter(nodes[:, 1], nodes[:, 0], s=10, c='blue')

for (x1, y1), (x2, y2) in scan.centerline.edges:
ax2.plot([y1, y2], [x1, x2], c='pink')

for roi in scan.rois:
rect = patches.Rectangle(
(roi.start_x, roi.start_y), roi.end_x - roi.start_x, roi.end_y - roi.start_y,
linewidth=2, edgecolor='r', facecolor='none'
)
ax2.add_patch(rect)

plt.show()


def plot_branch(branch: VesselBranch):
scan_copy = deepcopy(branch.scan)
scan_copy.centerline = branch.branch
plot_scan(scan_copy)
48 changes: 48 additions & 0 deletions coronaryx/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 1,48 @@
from pathlib import Path
import pickle

import numpy as np
from PIL import Image

from coronaryx.data import CoronagraphyScan, ROI


def read_dataset(dataset_dir: str):
dataset_dir = Path(dataset_dir)

items = []
for item_dir in dataset_dir.iterdir():
if not item_dir.is_dir() and not item_dir.name.startswith('seg'):
continue

# scan
with open(item_dir / 'representative_frame.p', 'rb') as f:
scan = pickle.load(f)

# centerline
with open(item_dir / 'centreline.p', 'rb') as f:
centerline = pickle.load(f)

# vessel mask
mask_img = Image.open(item_dir / 'segmentation.png')
mask = np.array(mask_img)[:, :, 0] == 253

# roi
with open(item_dir / 'roi.p', 'rb') as f:
roi_list = pickle.load(f)

rois = []
for d in roi_list:
roi = ROI(d['start']['x'], d['start']['y'], d['end']['x'], d['end']['y'])
rois.append(roi)

# adding
item = CoronagraphyScan(
item_dir.name,
scan,
mask,
centerline,
rois
)
items.append(item)
return items
53 changes: 53 additions & 0 deletions data/data_desc.md
Original file line number Diff line number Diff line change
@@ -0,0 1,53 @@
# File desciption

Each directory contains two files:
1. centreline.p - Pickle file containing the graph representation of the vessel.
2. representative_frame.p - Pickle file containing representative frame.
3. roi.py - Pickle file containing lesion markings.
4. segmentation.png - PNG file containing binary vessel segmentation mask.

# Centreline file

In order to read the Pickle file, use the built-in pickle library in Python. You can use the following code:
```Python
import pickle

with open('centreline.p', 'rb') as file:
vessel_graph = pickle.load(file)
```
This will get you a NetworkX ([another Python library for graph manipulation](https://networkx.org/)) graph object. The graph's nodes are points on the vessel centreline. To obtain the centreline, you will have to connect the dots :). You can do this simply and less accurately - by drawing straight lines between the nodes - or by employing some kind of an interpolation method.

The graphs were generated automatically. They were also automatically cleaned of small branches and detached segments. They can contain some erroneous nodes and edges. If you encounter any issues, please report them back. This will help me develop a better version of the algorithm.

# Representative frame
Similarly to the centreline file, this one also has to be opened via the pickle library. It contains a numpy array with the frame on top of which the vessel segmentation was performed. Read the file the same way you read the centreline file.

# Stenosis marking
Again, a pickle file, containing a Python array with stenosis markings. Each array element is a region of interest designated by a start (upper left) and end point (lower right). It may contain additional attributes under the *form* key. These attributes are used for the calculation of [SyntaxScore](https://www.syntaxscore.org/). If the ROI array is empty, then there was no ROI marked. An example array:
```JSON
[ {
"start" : {
"x" : 158.89999389648438,
"y" : 122.59999084472656
},
"end" : {
"x" : 214.89999389648438,
"y" : 170.59999084472656
},
"form" : {
"bifurcation" : true,
"bifurcationType" : "B",
"bifurcationAngle" : "LT70"
},
"thumbnail" : "..."
} ]
```

# Segmentation file
As mentioned above, this is a PNG file that contains a binary mask. This is the segmentation of the vessel structure. You can use matplotlib to read it into an numpy array:
```Python
import maplotlib.pyplot as plt

segmentation = plt.imread('segmentation.png')
```
The segmentations all have dimensions of 512x512 pixels and were hand-drawn by medical specialists. **It is important to note that multiple MDs might have prepared the segmentation of a single vessel file!** There is a slight chance that some masks were used for testing and do not properly depict the vessel structure. However, I did my best to eliminate them (hence the missing numbers in the 'seg' folders). I hope the data works for you!
Binary file added data/seg0/centreline.p
Binary file not shown.
Binary file added data/seg0/representative_frame.p
Binary file not shown.
1 change: 1 addition & 0 deletions data/seg0/roi.p
Original file line number Diff line number Diff line change
@@ -0,0 1 @@
�]�.
Binary file added data/seg0/segmentation.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/seg1/centreline.p
Binary file not shown.
Binary file added data/seg1/representative_frame.p
Binary file not shown.
Binary file added data/seg1/roi.p
Binary file not shown.
Binary file added data/seg1/segmentation.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/seg100/centreline.p
Binary file not shown.
Binary file added data/seg100/representative_frame.p
Binary file not shown.
1 change: 1 addition & 0 deletions data/seg100/roi.p
Original file line number Diff line number Diff line change
@@ -0,0 1 @@
�]�.
Binary file added data/seg100/segmentation.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/seg101/centreline.p
Binary file not shown.
Binary file added data/seg101/representative_frame.p
Binary file not shown.
1 change: 1 addition & 0 deletions data/seg101/roi.p
Original file line number Diff line number Diff line change
@@ -0,0 1 @@
�]�.
Binary file added data/seg101/segmentation.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/seg102/centreline.p
Binary file not shown.
Binary file added data/seg102/representative_frame.p
Binary file not shown.
1 change: 1 addition & 0 deletions data/seg102/roi.p
Original file line number Diff line number Diff line change
@@ -0,0 1 @@
�]�.
Binary file added data/seg102/segmentation.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/seg103/centreline.p
Binary file not shown.
Binary file added data/seg103/representative_frame.p
Binary file not shown.
1 change: 1 addition & 0 deletions data/seg103/roi.p
Original file line number Diff line number Diff line change
@@ -0,0 1 @@
�]�.
Binary file added data/seg103/segmentation.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/seg104/centreline.p
Binary file not shown.
Binary file added data/seg104/representative_frame.p
Binary file not shown.
1 change: 1 addition & 0 deletions data/seg104/roi.p
Original file line number Diff line number Diff line change
@@ -0,0 1 @@
�]�.
Binary file added data/seg104/segmentation.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/seg105/centreline.p
Binary file not shown.
Binary file added data/seg105/representative_frame.p
Binary file not shown.
1 change: 1 addition & 0 deletions data/seg105/roi.p
Original file line number Diff line number Diff line change
@@ -0,0 1 @@
�]�.
Binary file added data/seg105/segmentation.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/seg106/centreline.p
Binary file not shown.
Binary file added data/seg106/representative_frame.p
Binary file not shown.
1 change: 1 addition & 0 deletions data/seg106/roi.p
Original file line number Diff line number Diff line change
@@ -0,0 1 @@
�]�.
Binary file added data/seg106/segmentation.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/seg107/centreline.p
Binary file not shown.
Binary file added data/seg107/representative_frame.p
Binary file not shown.
1 change: 1 addition & 0 deletions data/seg107/roi.p
Original file line number Diff line number Diff line change
@@ -0,0 1 @@
�]�.
Binary file added data/seg107/segmentation.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/seg108/centreline.p
Binary file not shown.
Binary file added data/seg108/representative_frame.p
Binary file not shown.
Loading

0 comments on commit 696d2c6

Please sign in to comment.