Source code for thesis.workflows.hcp.operations.extraction

"""ROI extraction from label maps.

NOTE: The main function ``extract_rois_task`` runs inside a Nipype Function node
(potentially in a separate process), so the thesis logger is not available at
runtime. ``sys.stderr.write()`` is used for progress messages instead.
"""


[docs] def extract_rois_task( roi_file: str, label_file: str, waypoint_labels: dict, output_dir: str, hemisphere: str = "both", ) -> tuple[str, str, str, str, str]: """Extract ROI masks from a label map and assemble workflow-ready outputs. Args: roi_file: Input label-map image path. label_file: Optional CSV or whitespace-delimited label mapping file. waypoint_labels: ROI extraction specification keyed by ROI name. May contain hemisphere-specific keys (``left_label_name`` etc.) that this task resolves at runtime via ``hemisphere``. output_dir: Output directory for generated masks and waypoint list files. hemisphere: ``"left"``, ``"right"``, or ``"both"`` (default). When ``waypoint_labels`` entries declare hemisphere-specific labels, this selects which side(s) to extract; entries without hemisphere-specific fields pass through unchanged. Returns: Tuple of ``(seed, waypoints_file, stop_mask, avoid_mask, target_mask)``. Raises: FileNotFoundError: If required FSL commands are unavailable or inputs are missing. RuntimeError: If an FSL ROI extraction command fails. """ import sys from pathlib import Path from typing import Dict, List from thesis.workflows.hcp.common import filter_waypoint_labels_by_hemisphere from thesis.workflows.hcp.operations._fsl import run_fsl_command waypoint_labels = filter_waypoint_labels_by_hemisphere(waypoint_labels, hemisphere) # Per-hemisphere subdir so that parallel left/right iterations of the same # extractor MapNode do not race on identically-named output files # (e.g. rois_synthseg/hemisphere_avoid.nii.gz). "both" keeps the legacy # flat layout. out_path = Path(output_dir) if hemisphere in ("left", "right"): out_path = out_path / hemisphere out_path.mkdir(parents=True, exist_ok=True) needs_csv = any( info.get("label_name") or info.get("label_names") for info in waypoint_labels.values() ) mapping: Dict[str, str] = {} if needs_csv and label_file: import csv with open(label_file, "r", encoding="utf-8") as handle: raw_text = handle.read() lines = [line.strip() for line in raw_text.splitlines() if line.strip()] if lines: first_line = lines[0] if "Name" in first_line and "LabelValue" in first_line: reader = csv.DictReader(raw_text.splitlines()) for row in reader: if "Name" in row and "LabelValue" in row: mapping[row["Name"]] = row["LabelValue"] else: for line in lines: parts = line.split(None, 1) if len(parts) != 2: continue parsed_value, parsed_name = parts if parsed_value.lstrip("-").isdigit(): mapping[parsed_name.strip()] = parsed_value.strip() def extract_multi_label(values: list[int], output_mask: str) -> None: """Create a binary mask covering all listed label integer values. Single nibabel pass: loads the label map once, builds the union of the requested label values via ``np.isin``, and writes a binarized (0/1) mask that preserves the source image's affine, header, and on-disk dtype. Replaces the previous 2N-1 ``fslmaths`` subprocess chain while producing an identical result. """ import nibabel as nib import numpy as np img = nib.load(str(roi_file)) data = np.asanyarray(img.dataobj) # type: ignore[attr-defined] mask = np.isin(data, values).astype(data.dtype) out_img = nib.Nifti1Image(mask, img.affine, img.header) # type: ignore[attr-defined] nib.save(out_img, output_mask) outputs: Dict[str, List[str]] = { "seed": [], "waypoint": [], "stop": [], "avoid": [], "target": [], } for name, info in waypoint_labels.items(): kind = info.get("region_kind") if kind not in outputs: continue mask_path = str(out_path / f"{name}.nii.gz") label_values = info.get("label_values") label_name = info.get("label_name") label_names: list[str] | None = info.get("label_names") if label_values: extract_multi_label([int(value) for value in label_values], mask_path) elif label_names: # Multiple label names (e.g. left + right merged into one mask) resolved_values: list[int] = [] for lname in label_names: lval: str | None = mapping.get(lname) if lval is None or lval == "": sys.stderr.write( f"Warning: label '{lname}' was not found in {label_file}; " f"skipping label for ROI '{name}'.\n" ) sys.stderr.flush() continue resolved_values.append(int(lval)) if not resolved_values: sys.stderr.write(f"Warning: no valid labels resolved for ROI '{name}'; skipping.\n") sys.stderr.flush() continue extract_multi_label(resolved_values, mask_path) elif label_name: label_value: str | None = mapping.get(label_name) if label_value is None or label_value == "": sys.stderr.write( f"Warning: label '{label_name}' was not found in {label_file}; " f"skipping ROI '{name}'.\n" ) sys.stderr.flush() continue run_fsl_command( [ "fslmaths", str(roi_file), "-thr", str(label_value), "-uthr", str(label_value), "-bin", mask_path, ] ) else: continue outputs[kind].append(mask_path) seed = "" if len(outputs["seed"]) > 1: seed = str(out_path / "seed_merged.nii.gz") first_seed, *other_seeds = outputs["seed"] run_fsl_command(["fslmaths", first_seed, "-bin", seed]) for seed_mask in other_seeds: run_fsl_command(["fslmaths", seed, "-add", seed_mask, "-bin", seed]) elif outputs["seed"]: seed = outputs["seed"][0] waypoints_file = "" if outputs["waypoint"]: waypoints_file = str(out_path / "waypoints.txt") with open(waypoints_file, "w", encoding="utf-8") as handle: for waypoint in outputs["waypoint"]: handle.write(waypoint + "\n") stop_mask = outputs["stop"][0] if outputs["stop"] else "" avoid_mask = outputs["avoid"][0] if outputs["avoid"] else "" target_mask = outputs["target"][0] if outputs["target"] else "" return seed, waypoints_file, stop_mask, avoid_mask, target_mask