Source code for ome_arrow.core

"""
Core of the ome_arrow package, used for classes and such.
"""

from __future__ import annotations

import pathlib
from dataclasses import dataclass
from typing import (
    TYPE_CHECKING,
    Any,
    Dict,
    Iterable,
    Literal,
    Optional,
    Sequence,
    Tuple,
)

import matplotlib
import numpy as np
import pyarrow as pa

from ome_arrow.export import (
    to_numpy,
    to_ome_tiff,
    to_ome_vortex,
    to_ome_zarr,
)
from ome_arrow.ingest import (
    _is_jax_array,
    _is_torch_array,
    from_jax_array,
    from_numpy,
    from_ome_parquet,
    from_ome_vortex,
    from_ome_zarr,
    from_stack_pattern_path,
    from_tiff,
    from_torch_array,
    open_lazy_plane_source,
)
from ome_arrow.meta import OME_ARROW_STRUCT
from ome_arrow.tensor import LazyTensorView, TensorView
from ome_arrow.transform import slice_ome_arrow
from ome_arrow.utils import describe_ome_arrow
from ome_arrow.view import view_matplotlib, view_pyvista

# if not in runtime, import pyvista for type hints
if TYPE_CHECKING:
    import pyvista


@dataclass(frozen=True)
class _LazySourceSpec:
    """Deferred source description for lazy OMEArrow loading."""

    data: str
    column_name: str
    row_index: int
    image_type: str | None


@dataclass(frozen=True)
class _LazySliceSpec:
    """Deferred spatial/index slice specification."""

    x_min: int
    x_max: int
    y_min: int
    y_max: int
    t_indices: tuple[int, ...] | None
    c_indices: tuple[int, ...] | None
    z_indices: tuple[int, ...] | None
    fill_missing: bool



[docs]
class OMEArrow:
    """
    Small convenience toolkit for working with ome-arrow data.

    If `input` is a TIFF path, this loads it via `tiff_to_ome_arrow`.
    If `input` is a dict, it will be converted using `to_struct_scalar`.
    If `input` is already a `pa.StructScalar`, it is used as-is.

    In Jupyter, evaluating the instance will render the first plane using
    matplotlib (via `_repr_html_`). Call `view_matplotlib()` to select a
    specific (z, t, c) plane.

    Args:
        input: TIFF path, nested dict, or `pa.StructScalar`.
        struct: Expected Arrow StructType (e.g., OME_ARROW_STRUCT).
    """

    def __init__(
        self,
        data: str | dict | pa.StructScalar | "np.ndarray",
        tcz: Tuple[int, int, int] = (0, 0, 0),
        *,
        dim_order: str | None = None,
        column_name: str = "ome_arrow",
        row_index: int = 0,
        image_type: str | None = None,
        lazy: bool = False,
    ) -> None:
        """
        Construct an OMEArrow from:
        - a Bio-Formats-style stack pattern string (contains '<', '>', or '*')
        - a path/URL to an OME-TIFF (.tif/.tiff)
        - a path/URL to an OME-Zarr store (.zarr / .ome.zarr)
        - a path/URL to an OME-Parquet file (.parquet / .pq)
        - a path/URL to a Vortex file (.vortex)
        - a NumPy ndarray (2D-5D; interpreted
            with from_numpy defaults)
        - a torch.Tensor (2D-5D; inferred dim order by rank unless provided via
            `dim_order`)
        - a jax.Array (2D-5D; inferred dim order by rank unless provided via
            `dim_order`)
        - a dict already matching the OME-Arrow schema
        - a pa.StructScalar already typed to OME_ARROW_STRUCT
        - optionally override/set image_type metadata on ingest
        - optionally defer source-file ingestion with lazy=True

        Args:
            data: Input source or record payload.
            dim_order: Axis labels used only for array/tensor ingest
                (NumPy, torch, JAX). Invalid or unrecognized combinations
                raise an error instead of being silently ignored.
        """

        # `dim_order` applies only when the constructor input itself is a raw
        # NumPy/torch/JAX array object (not string/file-path sources).
        # Rejecting incompatible combinations avoids silently ignoring user intent.
        if dim_order is not None and not (
            isinstance(data, np.ndarray) or _is_torch_array(data) or _is_jax_array(data)
        ):
            raise ValueError(
                "dim_order is supported only for numpy.ndarray, torch.Tensor, "
                "or jax.Array inputs."
            )

        # set the tcz for viewing
        self.tcz = tcz
        self._data: pa.StructScalar | None = None
        self._struct_array: pa.StructArray | None = None
        self._dataset: Any | None = None
        self._lazy_source: _LazySourceSpec | None = None
        self._lazy_slices: list[_LazySliceSpec] = []

        if lazy:
            if not isinstance(data, str):
                raise TypeError("lazy=True currently supports only string file inputs.")
            if any(c in data for c in "<>*"):
                raise TypeError(
                    "lazy=True does not support Bio-Formats pattern strings. "
                    "Use OMEArrow(..., lazy=False) for pattern ingestion via "
                    "from_stack_pattern_path."
                )
            self._lazy_source = _LazySourceSpec(
                data=data,
                column_name=column_name,
                row_index=row_index,
                image_type=image_type,
            )
            return

        # --- 1) Stack pattern (Bio-Formats-style) --------------------------------
        if isinstance(data, str) and any(c in data for c in "<>*"):
            self.data = from_stack_pattern_path(
                data,
                default_dim_for_unspecified="C",
                map_series_to="T",
                clamp_to_uint16=True,
                image_type=image_type,
            )

        # --- 2) String path/URL: OME-Zarr / OME-Parquet / OME-TIFF ---------------
        elif isinstance(data, str):
            dataset = self._try_open_dataset(data)
            if dataset is not None:
                self._dataset = dataset
            else:
                self.data, self._struct_array = self._load_from_string_source(
                    data,
                    column_name=column_name,
                    row_index=row_index,
                    image_type=image_type,
                )

        # --- 3) NumPy ndarray ----------------------------------------------------
        elif isinstance(data, np.ndarray):
            # Uses from_numpy defaults: dim_order="TCZYX", clamp_to_uint16=True, etc.
            # If the array is YX/ZYX/CYX/etc.,
            # from_numpy will expand/reorder accordingly.
            self.data = from_numpy(
                data,
                dim_order="TCZYX" if dim_order is None else dim_order,
                image_type=image_type,
            )

        # --- 4) Torch tensor ------------------------------------------------------
        elif _is_torch_array(data):
            self.data = from_torch_array(
                data,
                dim_order=dim_order,
                image_type=image_type,
            )

        # --- 5) JAX array --------------------------------------------------------
        elif _is_jax_array(data):
            self.data = from_jax_array(
                data,
                dim_order=dim_order,
                image_type=image_type,
            )

        # --- 6) Already-typed Arrow scalar ---------------------------------------
        elif isinstance(data, pa.StructScalar):
            self.data = data
            if image_type is not None:
                self.data = self._wrap_with_image_type(self.data, image_type)

        # --- 7) Plain dict matching the schema -----------------------------------
        elif isinstance(data, dict):
            record = {f.name: data.get(f.name) for f in OME_ARROW_STRUCT}
            self.data = pa.scalar(record, type=OME_ARROW_STRUCT)
            if image_type is not None:
                self.data = self._wrap_with_image_type(self.data, image_type)

        # --- otherwise ------------------------------------------------------------
        else:
            data_type = f"{type(data).__module__}.{type(data).__qualname__}"
            raise TypeError(
                "input data must be str, dict, pa.StructScalar, numpy.ndarray, "
                f"torch.Tensor, or jax.Array; got {data_type}"
            )


[docs]
    @classmethod
    def scan(
        cls,
        data: str,
        *,
        tcz: Tuple[int, int, int] = (0, 0, 0),
        column_name: str = "ome_arrow",
        row_index: int = 0,
        image_type: str | None = None,
    ) -> "OMEArrow":
        """Create a lazily-loaded OMEArrow, similar to Polars scan semantics.

        Args:
            data: Input source path/URL.
            tcz: Default `(t, c, z)` indices used for view helpers.
            column_name: OME-Arrow column name for tabular sources.
            row_index: Row index for tabular sources.
            image_type: Optional image type override.

        Returns:
            OMEArrow: Lazily planned OMEArrow instance.
        """
        return cls(
            data=data,
            tcz=tcz,
            column_name=column_name,
            row_index=row_index,
            image_type=image_type,
            lazy=True,
        )


    @property
    def is_lazy(self) -> bool:
        """Return whether this instance still has deferred work."""
        return self._lazy_source is not None or bool(self._lazy_slices)

    @property
    def data(self) -> pa.StructScalar:
        """Return the materialized OME-Arrow StructScalar.

        Returns:
            pa.StructScalar: Materialized OME-Arrow record.

        Raises:
            RuntimeError: If the record could not be initialized.
        """
        self._ensure_materialized()
        if self._data is None and self._dataset is not None:
            self._data = self._dataset_to_scalar()
        if self._data is None:
            raise RuntimeError("OMEArrow data is not initialized.")
        return self._data

    @data.setter
    def data(self, value: pa.StructScalar) -> None:
        self._data = value


[docs]
    def collect(self) -> "OMEArrow":
        """Materialize deferred source data and return ``self``.

        Returns:
            OMEArrow: The same instance after materialization.
        """
        self._ensure_materialized()
        return self


    @staticmethod
    def _try_open_dataset(data: str) -> Any | None:
        """Open a typed OME-Arrow dataset path if the manifest is present."""
        path = pathlib.Path(data.strip())
        if not (path.exists() and path.is_dir()):
            return None
        if not (path / "_ome_arrow_dataset.json").exists():
            return None
        from ome_arrow.dataset import OMEArrowDataset

        return OMEArrowDataset(path)

    def _dataset_to_scalar(self) -> pa.StructScalar:
        """Materialize a dataset-backed image into the legacy scalar shape."""
        if self._dataset is None:
            raise RuntimeError("OMEArrow has no dataset backing.")
        image_id = self._dataset.image_ids[0]
        meta = self._dataset.image_metadata(image_id)
        arr = self._dataset.read_image(image_id)
        return from_numpy(
            arr,
            dim_order="TCZYX",
            image_id=str(image_id),
            name=str(meta.get("name") or image_id),
            image_type=meta.get("image_type"),
            clamp_to_uint16=False,
            dtype_meta=str(meta["dtype"]),
        )

    @staticmethod
    def _load_from_string_source(
        data: str,
        *,
        column_name: str,
        row_index: int,
        image_type: str | None,
    ) -> tuple[pa.StructScalar, pa.StructArray | None]:
        s = data.strip()
        path = pathlib.Path(s)
        struct_array: pa.StructArray | None = None

        if (
            s.lower().endswith(".zarr")
            or s.lower().endswith(".ome.zarr")
            or ".zarr/" in s.lower()
            or (path.exists() and path.is_dir() and path.suffix.lower() == ".zarr")
        ):
            scalar = from_ome_zarr(s)
            if image_type is not None:
                scalar = OMEArrow._wrap_with_image_type(scalar, image_type)
            return scalar, None

        if s.lower().endswith((".parquet", ".pq")) or path.suffix.lower() in {
            ".parquet",
            ".pq",
        }:
            parquet_result = from_ome_parquet(
                s,
                column_name=column_name,
                row_index=row_index,
                return_array=True,
            )
            scalar, struct_array = parquet_result
            if image_type is not None:
                scalar = OMEArrow._wrap_with_image_type(scalar, image_type)
            return scalar, struct_array

        if s.lower().endswith(".vortex") or path.suffix.lower() == ".vortex":
            vortex_result = from_ome_vortex(
                s,
                column_name=column_name,
                row_index=row_index,
                return_array=True,
            )
            scalar, struct_array = vortex_result
            if image_type is not None:
                scalar = OMEArrow._wrap_with_image_type(scalar, image_type)
            return scalar, struct_array

        if path.suffix.lower() in {".tif", ".tiff"} or s.lower().endswith(
            (".tif", ".tiff")
        ):
            scalar = from_tiff(s)
            if image_type is not None:
                scalar = OMEArrow._wrap_with_image_type(scalar, image_type)
            return scalar, None

        if path.exists() and path.is_dir():
            raise ValueError(
                f"Directory '{s}' exists but does not look like an OME-Zarr store "
                "(expected suffix '.zarr' or '.ome.zarr')."
            )

        raise ValueError(
            "String input must be one of:\n"
            "  • Bio-Formats pattern string (contains '<', '>' or '*')\n"
            "  • OME-Zarr path/URL ending with '.zarr' or '.ome.zarr'\n"
            "  • OME-Parquet file ending with '.parquet' or '.pq'\n"
            "  • Vortex file ending with '.vortex'\n"
            "  • OME-TIFF path/URL ending with '.tif' or '.tiff'"
        )

    def _ensure_materialized(self) -> None:
        if self._lazy_source is None:
            return
        lazy_source = self._lazy_source
        dataset = self._try_open_dataset(lazy_source.data)
        if dataset is not None and not self._lazy_slices:
            self._dataset = dataset
            self._lazy_source = None
            return
        # Intentionally do not clear `_lazy_source` / `_lazy_slices` before load.
        # If `_load_from_string_source(...)` raises, lazy state is preserved so
        # callers can inspect/retry without losing the deferred plan.
        scalar, struct_array = self._load_from_string_source(
            lazy_source.data,
            column_name=lazy_source.column_name,
            row_index=lazy_source.row_index,
            image_type=lazy_source.image_type,
        )
        if self._lazy_slices:
            data = scalar
            for spec in self._lazy_slices:
                data = slice_ome_arrow(
                    data=data,
                    x_min=spec.x_min,
                    x_max=spec.x_max,
                    y_min=spec.y_min,
                    y_max=spec.y_max,
                    t_indices=spec.t_indices,
                    c_indices=spec.c_indices,
                    z_indices=spec.z_indices,
                    fill_missing=spec.fill_missing,
                )
            # Applying lazy slices via `slice_ome_arrow` materializes through a
            # StructScalar path, so we intentionally drop `_struct_array` here.
            # Consequence: Arrow-backed zero-copy tensor paths
            # (for example `tensor_view(...).to_dlpack(mode="arrow")`) are not
            # available after lazy slicing.
            self.data = data
            self._struct_array = None
        else:
            self.data, self._struct_array = scalar, struct_array
        # Lazy state is cleared only after a successful materialization.
        self._lazy_source = None
        self._lazy_slices = []

    def _tensor_source(self) -> pa.StructScalar | pa.StructArray:
        self._ensure_materialized()
        if self._dataset is not None and self._data is None:
            return self._dataset_to_scalar()
        if self._struct_array is not None:
            return self._struct_array
        if self._data is None:
            raise RuntimeError("OMEArrow data is not initialized.")
        return self._data

    def _resolve_lazy_tensor_view(self, view_kwargs: dict[str, Any]) -> TensorView:
        """Resolve a lazy tensor view plan without mutating this OMEArrow state.

        Args:
            view_kwargs: TensorView constructor kwargs captured by LazyTensorView.

        Returns:
            TensorView: Concrete tensor view for the planned selection.
        """
        # Deferred slice plans rely on slice_ome_arrow over a materialized scalar;
        # keep the existing behavior for those plans.
        if self._lazy_slices:
            self._ensure_materialized()
            return TensorView(self._tensor_source(), **view_kwargs)

        if self._lazy_source is None:
            return TensorView(self._tensor_source(), **view_kwargs)

        lazy_source = self._lazy_source
        lazy_plane_source = open_lazy_plane_source(lazy_source.data)
        if lazy_plane_source is not None:
            pixels_meta, plane_loader = lazy_plane_source
            lazy_record = {
                "id": None,
                "name": None,
                "image_type": lazy_source.image_type,
                "acquisition_datetime": None,
                "pixels_meta": pixels_meta,
                "channels": [],
                "planes": [],
                "masks": [],
                "chunk_grid": None,
                "chunks": [],
            }
            return TensorView(lazy_record, plane_loader=plane_loader, **view_kwargs)

        scalar, struct_array = self._load_from_string_source(
            lazy_source.data,
            column_name=lazy_source.column_name,
            row_index=lazy_source.row_index,
            image_type=lazy_source.image_type,
        )
        source = struct_array if struct_array is not None else scalar
        return TensorView(source, **view_kwargs)

    @staticmethod
    def _wrap_with_image_type(
        data: pa.StructScalar, image_type: str
    ) -> pa.StructScalar:
        return pa.scalar(
            {
                **data.as_py(),
                "image_type": str(image_type),
            },
            type=OME_ARROW_STRUCT,
        )


[docs]
    def export(  # noqa: PLR0911
        self,
        how: str = "numpy",
        dtype: np.dtype = np.uint16,
        strict: bool = True,
        clamp: bool = False,
        *,
        # common writer args
        out: str | None = None,
        dim_order: str = "TCZYX",
        # OME-TIFF args
        compression: str | None = "zlib",
        compression_level: int = 6,
        tile: tuple[int, int] | None = None,
        # OME-Zarr args
        chunks: tuple[int, int, int, int, int] | None = None,  # (T,C,Z,Y,X)
        zarr_compressor: str | None = "zstd",
        zarr_level: int = 7,
        # optional display metadata (both paths guard/ignore if unsafe)
        use_channel_colors: bool = False,
        # Parquet args
        parquet_column_name: str = "ome_arrow",
        parquet_compression: str | None = "zstd",
        parquet_metadata: dict[str, str] | None = None,
        vortex_column_name: str = "ome_arrow",
        vortex_metadata: dict[str, str] | None = None,
    ) -> np.array | dict | pa.StructScalar | str:
        """
        Export the OME-Arrow content in a chosen representation.

        Args
        ----
        how:
            "numpy"     → TCZYX np.ndarray
            "dict"      → plain Python dict
            "scalar"    → pa.StructScalar (as-is)
            "ome-tiff"  → write OME-TIFF via BioIO
            "ome-zarr"  → write OME-Zarr (OME-NGFF) via BioIO
            "parquet"   → write a single-row Parquet with one struct column
            "vortex"    → write a single-row Vortex file with one struct column
        dtype:
            Target dtype for "numpy"/writers (default: np.uint16).
        strict:
            For "numpy": raise if a plane has wrong pixel length.
        clamp:
            For "numpy"/writers: clamp values into dtype range before cast.

        Keyword-only (writer specific)
        ------------------------------
        out:
            Output path (required for 'ome-tiff', 'ome-zarr', and 'parquet').
        dim_order:
            Axes string for BioIO writers; default "TCZYX".
        compression / compression_level / tile:
            OME-TIFF options (passed through to tifffile via BioIO).
        chunks / zarr_compressor / zarr_level :
            OME-Zarr options (chunk shape, compressor hint, level). If chunks is
            None, a TCZYX default is chosen (1,1,<=4,<=512,<=512).
        use_channel_colors:
            Try to embed per-channel display colors when safe; otherwise omitted.
        parquet_*:
            Options for Parquet export (column name, compression, file metadata).
        vortex_*:
            Options for Vortex export (column name, file metadata).

        Returns
        -------
        Any
            - "numpy": np.ndarray (T, C, Z, Y, X)
            - "dict":  dict
            - "scalar": pa.StructScalar
            - "ome-tiff": output path (str)
            - "ome-zarr": output path (str)
            - "parquet": output path (str)
            - "vortex": output path (str)

        Raises
        ------
        ValueError:
            Unknown 'how' or missing required params.
        """
        mode = how.lower().replace("_", "-")

        if self._dataset is not None and mode == "numpy":
            arr = self._dataset.read_image()
            dtype_obj = np.dtype(dtype)
            if arr.dtype != dtype_obj:
                if clamp and np.issubdtype(dtype_obj, np.integer):
                    info = np.iinfo(dtype_obj)
                    arr = np.clip(arr, info.min, info.max)
                arr = arr.astype(dtype_obj, copy=False)
            return arr

        self._ensure_materialized()

        # existing modes
        if how == "numpy":
            return to_numpy(self.data, dtype=dtype, strict=strict, clamp=clamp)
        if how == "dict":
            return self.data.as_py()
        if how == "scalar":
            return self.data

        # OME-TIFF via BioIO
        if mode in {"ome-tiff", "ometiff", "tiff"}:
            if not out:
                raise ValueError("export(how='ome-tiff') requires 'out' path.")
            to_ome_tiff(
                self.data,
                out,
                dtype=dtype,
                clamp=clamp,
                dim_order=dim_order,
                compression=compression,
                compression_level=int(compression_level),
                tile=tile,
                use_channel_colors=use_channel_colors,
            )
            return out

        # OME-Zarr via BioIO
        if mode in {"ome-zarr", "omezarr", "zarr"}:
            if not out:
                raise ValueError("export(how='ome-zarr') requires 'out' path.")
            to_ome_zarr(
                self.data,
                out,
                dtype=dtype,
                clamp=clamp,
                dim_order=dim_order,
                chunks=chunks,
                compressor=zarr_compressor,
                compressor_level=int(zarr_level),
            )
            return out

        # Parquet (single row, single struct column)
        if mode in {"ome-parquet", "omeparquet", "parquet"}:
            if not out:
                raise ValueError("export(how='parquet') requires 'out' path.")
            from ome_arrow.dataset import write_ome_arrow_dataset

            # Kept for call-site compatibility with the old single-file
            # Parquet writer; the typed dataset layout does not use a struct
            # column name or arbitrary file-level metadata.
            _legacy_parquet_options = (parquet_column_name, parquet_metadata)
            write_ome_arrow_dataset(
                [self.data],
                out,
                layout="auto",
                access_pattern="balanced",
                compression=parquet_compression,
                pixel_dtype=dtype,
                clamp=clamp,
            )
            return out

        # Vortex (single row, single struct column)
        if mode in {"ome-vortex", "omevortex", "vortex"}:
            if not out:
                raise ValueError("export(how='vortex') requires 'out' path.")
            to_ome_vortex(
                data=self.data,
                out_path=out,
                column_name=vortex_column_name,
                file_metadata=vortex_metadata,
            )
            return out

        raise ValueError(f"Unknown export method: {how}")



[docs]
    def info(self) -> Dict[str, Any]:
        """
        Describe the OME-Arrow data structure.

        Returns:
            dict with keys:
                - shape: (T, C, Z, Y, X)
                - type: classification string
                - summary: human-readable text
        """
        self._ensure_materialized()
        if self._dataset is not None and self._data is None:
            meta = self._dataset.image_metadata(self._dataset.image_ids[0])
            return describe_ome_arrow(
                {
                    "pixels_meta": {
                        "size_t": meta["size_t"],
                        "size_c": meta["size_c"],
                        "size_z": meta["size_z"],
                        "size_y": meta["size_y"],
                        "size_x": meta["size_x"],
                    }
                }
            )
        return describe_ome_arrow(self.data)



[docs]
    def view(
        self,
        how: str = "matplotlib",
        tcz: tuple[int, int, int] = (0, 0, 0),
        autoscale: bool = True,
        vmin: int | None = None,
        vmax: int | None = None,
        cmap: str = "gray",
        show: bool = True,
        c: int | None = None,
        downsample: int = 1,
        opacity: str | float = "sigmoid",
        clim: tuple[float, float] | None = None,
        show_axes: bool = True,
        scaling_values: tuple[float, float, float] | None = None,
    ) -> tuple[matplotlib.figure.Figure, Any, Any] | "pyvista.Plotter":
        """Render an OME-Arrow record using Matplotlib or PyVista.

        This convenience method supports two rendering backends:

        - ``how="matplotlib"`` renders a single ``(t, c, z)`` plane as a 2D
          image.
        - ``how="pyvista"`` creates an interactive 3D PyVista visualization.

        Args:
            how: Rendering backend. One of ``"matplotlib"`` or ``"pyvista"``.
            tcz: ``(t, c, z)`` indices used for plane display.
            autoscale: Infer Matplotlib display limits from image range when
                ``vmin``/``vmax`` are not provided.
            vmin: Lower display limit for Matplotlib intensity scaling.
            vmax: Upper display limit for Matplotlib intensity scaling.
            cmap: Matplotlib colormap name for single-channel display.
            show: Whether to display the plot immediately.
            c: Channel index override for PyVista. If ``None``, uses
                ``tcz[1]``.
            downsample: Integer downsampling factor for PyVista views.
                Higher values render faster for large volumes but reduce
                spatial resolution.
            opacity: Opacity for PyVista. Either a float in ``[0, 1]`` or
                ``"sigmoid"``.
            clim: Contrast limits ``(low, high)`` for PyVista rendering.
            show_axes: Whether to display axes in the PyVista scene.
            scaling_values: Physical scale multipliers ``(x, y, z)`` used by
                PyVista. If ``None``, uses OME metadata-derived scaling.

        Returns:
            tuple[matplotlib.figure.Figure, matplotlib.axes.Axes,
            matplotlib.image.AxesImage] | pyvista.Plotter:
            For ``how="matplotlib"``, returns the tuple emitted by
            :func:`ome_arrow.view.view_matplotlib` as ``(figure, axes, image)``.
            For ``how="pyvista"``, returns a :class:`pyvista.Plotter`.

        Raises:
            ValueError: If a requested plane is not found or the render mode
                is unsupported.
            TypeError: If parameter types are invalid.

        Notes:
            - The ``how="pyvista"`` mode normally outputs an interactive
              visualization, but attempts to embed a static PNG snapshot for
              non-interactive renderers (for example, static docs builds,
              nbconvert HTML/PDF exports, rendered/read-only notebook views
              such as GitHub notebook previews, and CI log viewers).
            - When ``show=False`` and ``how="pyvista"``, the returned
              :class:`pyvista.Plotter` can be shown later.
        """
        if how not in {"matplotlib", "pyvista"}:
            raise ValueError(
                f"Unsupported view mode: {how!r}. Use 'matplotlib' or 'pyvista'."
            )

        self._ensure_materialized()

        if how == "matplotlib":
            return view_matplotlib(
                self.data,
                tcz=tcz,
                autoscale=autoscale,
                vmin=vmin,
                vmax=vmax,
                cmap=cmap,
                show=show,
            )

        if how == "pyvista":
            import base64
            import io

            from IPython.display import HTML, display

            c_idx = int(tcz[1] if c is None else c)
            plotter = view_pyvista(
                data=self.data,
                c=c_idx,
                downsample=downsample,
                opacity=opacity,
                clim=clim,
                show_axes=show_axes,
                scaling_values=scaling_values,
                show=False,
            )

            # 1) show the interactive widget for live work
            if show:
                plotter.show()

            # 2) capture a PNG and embed it in a collapsed details block
            try:
                img = plotter.screenshot(return_img=True)  # ndarray
                if img is not None:
                    buf = io.BytesIO()
                    # use matplotlib-free writer: PyVista returns RGB(A) uint8
                    from PIL import (
                        Image as PILImage,
                    )  # pillow is a light dep most envs have

                    PILImage.fromarray(img).save(buf, format="PNG")
                    b64 = base64.b64encode(buf.getvalue()).decode("ascii")
                    display(
                        HTML(
                            f"""
                        <details>
                        <summary>Static snapshot (for non-interactive view)</summary>
                        <img src="data:image/png;base64,{b64}" />
                        </details>
                        """
                        )
                    )
            except Exception as e:
                print(f"Warning: could not save PyVista snapshot: {e}")

            return plotter

        raise ValueError(
            f"Unsupported view mode: {how!r}. Use 'matplotlib' or 'pyvista'."
        )



[docs]
    def tensor_view(
        self,
        *,
        scene: int | None = None,
        t: int | slice | Sequence[int] | None = None,
        z: int | slice | Sequence[int] | None = None,
        c: int | slice | Sequence[int] | None = None,
        roi: tuple[int, int, int, int] | None = None,
        roi3d: tuple[int, int, int, int, int, int] | None = None,
        roi_nd: tuple[int, ...] | None = None,
        roi_type: Literal["2d", "2d_timelapse", "3d", "4d"] | None = None,
        tile: tuple[int, int] | None = None,
        layout: str | None = None,
        dtype: np.dtype | None = None,
        chunk_policy: Literal["auto", "combine", "keep"] = "auto",
        channel_policy: Literal["error", "first"] = "error",
    ) -> TensorView | LazyTensorView:
        """Create a TensorView of the pixel data.

        Args:
            scene: Scene index (only 0 is supported for single-image records).
            t: Time index selection (int, slice, or sequence). Default: all.
            z: Z index selection (int, slice, or sequence). Default: all.
            c: Channel index selection (int, slice, or sequence). Default: all.
            roi: Spatial crop (x, y, w, h) in pixels.
            roi3d: Spatial + depth crop (x, y, z, w, h, d) in pixels/planes.
                This is a convenience alias for ``roi=(x, y, w, h)`` and
                ``z=slice(z, z + d)``.
            roi_nd: General ROI tuple with min/max bounds.
            roi_type: ROI interpretation mode for ``roi_nd``. Supported values:
                ``"2d"``, ``"2d_timelapse"``, ``"3d"``, and ``"4d"``.
            tile: Tile index (tile_y, tile_x) based on chunk grid.
            layout: Desired layout string using `TZCYX` letters where
                T=time, Z=depth, C=channel, Y=row axis, X=column axis.
                `TZCHW` aliases are also accepted for compatibility.
            dtype: Output dtype override.
            chunk_policy: Handling for ``pyarrow.ChunkedArray`` inputs.
            channel_policy: Behavior when dropping `C` from layout while
                multiple channels are selected. "error" raises (default).
                "first" keeps the first channel.

        Returns:
            TensorView | LazyTensorView: Tensor view over selected pixels.
            In lazy mode, this returns a deferred ``LazyTensorView`` that
            resolves on first execution call (for example ``to_numpy()``)
            without forcing ``self`` to materialize unless deferred
            ``slice_lazy`` operations are queued.

        Raises:
            ValueError: If an unsupported scene is requested.
        """

        if scene not in (None, 0):
            raise ValueError("Only scene=0 is supported for single-image records.")

        if self._lazy_source is not None:
            return LazyTensorView(
                loader=self._tensor_source,
                resolver=self._resolve_lazy_tensor_view,
                t=t,
                z=z,
                c=c,
                roi=roi,
                roi3d=roi3d,
                roi_nd=roi_nd,
                roi_type=roi_type,
                tile=tile,
                layout=layout,
                dtype=dtype,
                chunk_policy=chunk_policy,
                channel_policy=channel_policy,
            )

        # TensorView uses an internal canonical axis basis (TZCHW) for shape/stride
        # math, then applies the requested layout permutation for output.
        # Public layout examples prefer TZCYX (Y/X), with H/W accepted as aliases.
        return TensorView(
            self._struct_array if self._struct_array is not None else self.data,
            t=t,
            z=z,
            c=c,
            roi=roi,
            roi3d=roi3d,
            roi_nd=roi_nd,
            roi_type=roi_type,
            tile=tile,
            layout=layout,
            dtype=dtype,
            chunk_policy=chunk_policy,
            channel_policy=channel_policy,
        )



[docs]
    def slice(
        self,
        x_min: int,
        x_max: int,
        y_min: int,
        y_max: int,
        t_indices: Optional[Iterable[int]] = None,
        c_indices: Optional[Iterable[int]] = None,
        z_indices: Optional[Iterable[int]] = None,
        fill_missing: bool = True,
    ) -> OMEArrow:
        """
        Create a cropped copy of an OME-Arrow record.

        Crops spatially to [y_min:y_max, x_min:x_max] (half-open) and, if provided,
        filters/reindexes T/C/Z to the given index sets.

        Parameters
        ----------
        x_min, x_max, y_min, y_max : int
            Half-open crop bounds in pixels (0-based).
        t_indices, c_indices, z_indices : Iterable[int] | None
            Optional explicit indices to keep for T, C, Z. If None, keep all.
            Selected indices are reindexed to 0..len-1 in the output.
        fill_missing : bool
            If True, any missing (t,c,z) planes in the selection are zero-filled.

        Returns
        -------
        OMEArrow object
            New OME-Arrow record with updated sizes and planes.
        """
        self._ensure_materialized()

        return OMEArrow(
            data=slice_ome_arrow(
                data=self.data,
                x_min=x_min,
                x_max=x_max,
                y_min=y_min,
                y_max=y_max,
                t_indices=t_indices,
                c_indices=c_indices,
                z_indices=z_indices,
                fill_missing=fill_missing,
            )
        )



[docs]
    def slice_lazy(
        self,
        x_min: int,
        x_max: int,
        y_min: int,
        y_max: int,
        t_indices: Optional[Iterable[int]] = None,
        c_indices: Optional[Iterable[int]] = None,
        z_indices: Optional[Iterable[int]] = None,
        fill_missing: bool = True,
    ) -> OMEArrow:
        """Return a lazily planned slice, collected on first execution.

        For lazy sources created with ``OMEArrow.scan(...)``, this queues a
        deferred slice operation and returns a new lazy OMEArrow plan produced
        from ``OMEArrow.scan(...)``.
        For already materialized sources, this falls back to eager ``slice()``.
        This method does not mutate ``self``.

        Notes:
            ``slice_lazy`` always returns a new plan object. Internally, the
            returned plan gets a fresh ``_lazy_slices`` list
            (``[*self._lazy_slices, new_slice]``), so chained plans do not
            share mutable slice state with the original ``OMEArrow``.
            A common footgun is:
            ``oa.slice_lazy(...).collect()`` followed by ``oa.tensor_view(...)``.
            Those calls can load/materialize the same source twice because
            ``oa`` remains the original plan. For a single-load workflow, keep
            working from the value returned by ``slice_lazy`` / ``collect``.

        Args:
            x_min: Inclusive minimum X index for the crop.
            x_max: Exclusive maximum X index for the crop.
            y_min: Inclusive minimum Y index for the crop.
            y_max: Exclusive maximum Y index for the crop.
            t_indices: Optional time indices to retain.
            c_indices: Optional channel indices to retain.
            z_indices: Optional depth indices to retain.
            fill_missing: Whether to zero-fill missing `(t, c, z)` planes.

        Returns:
            OMEArrow: Lazy plan when source is lazy; eager slice result otherwise.
        """
        if self._lazy_source is None:
            return self.slice(
                x_min=x_min,
                x_max=x_max,
                y_min=y_min,
                y_max=y_max,
                t_indices=t_indices,
                c_indices=c_indices,
                z_indices=z_indices,
                fill_missing=fill_missing,
            )

        lazy_source = self._lazy_source
        planned = OMEArrow.scan(
            lazy_source.data,
            tcz=self.tcz,
            column_name=lazy_source.column_name,
            row_index=lazy_source.row_index,
            image_type=lazy_source.image_type,
        )
        planned._lazy_slices = [
            *self._lazy_slices,
            _LazySliceSpec(
                x_min=int(x_min),
                x_max=int(x_max),
                y_min=int(y_min),
                y_max=int(y_max),
                t_indices=(
                    None if t_indices is None else tuple(int(i) for i in t_indices)
                ),
                c_indices=(
                    None if c_indices is None else tuple(int(i) for i in c_indices)
                ),
                z_indices=(
                    None if z_indices is None else tuple(int(i) for i in z_indices)
                ),
                fill_missing=bool(fill_missing),
            ),
        ]
        return planned


    def _repr_html_(self) -> str:
        """
        Auto-render a plane as inline PNG in Jupyter.
        """
        try:
            self._ensure_materialized()
            view_matplotlib(
                data=self.data,
                tcz=self.tcz,
                autoscale=True,
                vmin=None,
                vmax=None,
                cmap="gray",
                show=False,
            )
            # return blank string to avoid showing class representation below image
            return self.info()["summary"]
        except Exception as e:
            # Fallback to a tiny text status if rendering fails.
            return f"<pre>OMEArrowKit: render failed: {e}</pre>"