Python API#
Top-level API#
Public package interface for iceberg_bioimage.
- class iceberg_bioimage.CatalogScanOptions(columns: Sequence[str] | None = None, where: str | None = None, snapshot_id: int | None = None, limit: int | None = None)[source]#
Options for scanning a catalog-backed metadata table.
- class iceberg_bioimage.ContractValidationResult(target: str, present_columns: list[str], required_columns: list[str], recommended_columns: list[str], missing_required_columns: list[str], missing_recommended_columns: list[str], warnings: list[str] = <factory>)[source]#
Serializable result for schema-level contract validation.
- property is_valid: bool#
Return whether all required columns are present.
- class iceberg_bioimage.CytominingWarehouseResult(warehouse_root: str, tables_written: list[str], row_counts: dict[str, int], manifest_path: str | None = None)[source]#
Serializable result for exporting Parquet-backed Cytomining warehouses.
- class iceberg_bioimage.DatasetSummary(source_uri: str, format_family: str, image_asset_count: int, chunked_asset_count: int, array_paths: list[str], dtypes: list[str], shapes: list[list[int]], axes: list[str], channel_counts: list[int], storage_variants: list[str], warnings: list[str] = <factory>)[source]#
User-facing summary of a scanned dataset.
- class iceberg_bioimage.ImageAsset(uri: str, shape: list[int], dtype: str, array_path: str | None = None, chunk_shape: list[int] | None = None, metadata: dict[str, ~typing.Any] = <factory>, image_id: str | None = None)[source]#
Canonical representation of one discovered image asset.
- class iceberg_bioimage.RegistrationResult(source_uri: str, image_assets_rows_published: int, chunk_rows_published: int)[source]#
Serializable result for a metadata registration workflow.
- class iceberg_bioimage.ScanResult(source_uri: str, format_family: str, image_assets: list[~iceberg_bioimage.models.scan_result.ImageAsset], warnings: list[str] = <factory>)[source]#
Canonical scan output shared across adapters and publishers.
- class iceberg_bioimage.WarehouseIngestResult(catalog: str, namespace: list[str], image_assets_table: str, chunk_index_table: str | None, datasets: list[~iceberg_bioimage.models.scan_result.RegistrationResult], warnings: list[str] = <factory>)[source]#
Serializable result for a multi-dataset warehouse ingestion workflow.
- property chunk_rows_published: int#
Return the total number of published chunk-index rows.
- property dataset_count: int#
Return the number of ingested datasets.
- property image_assets_rows_published: int#
Return the total number of published image-assets rows.
- class iceberg_bioimage.WarehouseManifest(warehouse_root: str, tables: list[~iceberg_bioimage.models.scan_result.WarehouseTableManifestEntry] = <factory>)[source]#
Serializable manifest describing tables stored in a warehouse root.
- class iceberg_bioimage.WarehouseTableManifestEntry(table_name: str, role: str, format: str = 'parquet', join_keys: list[str] = <factory>, source_type: str | None = None, source_ref: str | None = None, row_count: int | None = None, columns: list[str] = <factory>)[source]#
Serializable metadata for one table in a warehouse manifest.
- class iceberg_bioimage.WarehouseValidationResult(warehouse_root: str, errors: list[str] = <factory>, warnings: list[str] = <factory>)[source]#
Serializable result for validating a warehouse manifest and layout.
- property is_valid: bool#
Return whether the warehouse passed validation.
- iceberg_bioimage.catalog_table_to_arrow(catalog: str | SupportsScanCatalog, namespace: str | Sequence[str], table_name: str, *, scan_options: CatalogScanOptions | None = None) Table[source]#
Load a catalog table into Arrow via PyIceberg.
- iceberg_bioimage.create_duckdb_connection(database: str = ':memory:', *, read_only: bool = False) DuckDBPyConnection[source]#
Create a DuckDB connection.
DuckDB is optional for this project. This helper isolates the import so the core package remains engine-neutral unless the user explicitly opts in.
- iceberg_bioimage.create_ome_arrow(data: Any, **kwargs: Any) object[source]#
Create an
ome_arrow.OMEArrowobject when the optional extra is installed.
- iceberg_bioimage.export_catalog_to_cytomining_warehouse(catalog: str | SupportsScanCatalog, namespace: str | tuple[str, ...], warehouse_root: str | Path, *, profiles: str | Path | Table | list[dict[str, object]] | None = None, image_assets_table_name: str = 'image_assets', chunk_index_table_name: str | None = 'chunk_index', joined_table_name: str = 'joined_profiles', profile_dataset_id: str | None = None, mode: Literal['overwrite', 'append'] = 'overwrite') CytominingWarehouseResult[source]#
Materialize catalog-backed metadata into a Parquet Cytomining warehouse.
- iceberg_bioimage.export_profiles_to_cytomining_warehouse(profiles: str | Path | Table | list[dict[str, object]], warehouse_root: str | Path, *, table_name: str = 'profiles', role: str = 'profiles', profile_dataset_id: str | None = None, join_keys: list[str] | None = None, source_type: str = 'profiles', source_ref: str | None = None, alias_map: Mapping[str, tuple[str, ...] | list[str]] | None = None, mode: Literal['overwrite', 'append'] = 'append') CytominingWarehouseResult[source]#
Write a Cytomining profile table into a Parquet-backed warehouse root.
- iceberg_bioimage.export_scan_result_to_cytomining_warehouse(scan_result: ScanResult, warehouse_root: str | Path, *, profiles: str | Path | Table | list[dict[str, object]] | None = None, include_chunks: bool = True, image_assets_table_name: str = 'image_assets', chunk_index_table_name: str = 'chunk_index', joined_table_name: str = 'joined_profiles', profile_dataset_id: str | None = None, mode: Literal['overwrite', 'append'] = 'overwrite') CytominingWarehouseResult[source]#
Write scan-derived metadata into a Parquet-backed Cytomining warehouse.
- iceberg_bioimage.export_store_to_cytomining_warehouse(uri: str, warehouse_root: str | Path, *, profiles: str | Path | Table | list[dict[str, object]] | None = None, include_chunks: bool = True, image_assets_table_name: str = 'image_assets', chunk_index_table_name: str = 'chunk_index', joined_table_name: str = 'joined_profiles', profile_dataset_id: str | None = None, mode: Literal['overwrite', 'append'] = 'overwrite') CytominingWarehouseResult[source]#
Scan a store and export its metadata into a Cytomining warehouse.
- iceberg_bioimage.export_table_to_cytomining_warehouse(table: Table, warehouse_root: str | Path, *, table_name: str, role: str, join_keys: list[str] | None = None, source_type: str | None = None, source_ref: str | None = None, mode: Literal['overwrite', 'append'] = 'append') CytominingWarehouseResult[source]#
Write a generic table into a warehouse root and update the manifest.
- iceberg_bioimage.ingest_scan_results_to_warehouse(scan_results: Sequence[ScanResult], catalog: str | SupportsCatalog, namespace: str | Sequence[str], *, image_assets_table: str = 'image_assets', chunk_index_table: str | None = 'chunk_index') WarehouseIngestResult[source]#
Publish many scanned datasets into a Cytotable-compatible warehouse.
- iceberg_bioimage.ingest_stores_to_warehouse(uris: Sequence[str], catalog: str | SupportsCatalog, namespace: str | Sequence[str], *, image_assets_table: str = 'image_assets', chunk_index_table: str | None = 'chunk_index') WarehouseIngestResult[source]#
Scan and publish many datasets into a Cytotable-compatible warehouse.
- iceberg_bioimage.join_catalog_image_assets_with_profiles(catalog: str | SupportsScanCatalog, namespace: str | Sequence[str], profiles: str | Path | Table | list[dict[str, object]], *, image_assets_table: str = 'image_assets', chunk_index_table: str | None = None, join_keys: Sequence[str] = DEFAULT_JOIN_KEYS, image_assets_scan_options: CatalogScanOptions | None = None, chunk_index_scan_options: CatalogScanOptions | None = None, profile_dataset_id: str | None = None) Table[source]#
Join catalog-backed image metadata to a profile table.
- Parameters:
catalog – Catalog name or catalog-like object.
namespace – Namespace containing the metadata tables.
profiles – Profile rows or table to join against.
image_assets_table – Name of the canonical image-assets table.
chunk_index_table – Optional chunk-index table name.
join_keys – Join columns shared by image metadata and profiles.
image_assets_scan_options – Optional scan options for image-assets reads.
chunk_index_scan_options – Optional scan options for chunk-index reads.
profile_dataset_id – Dataset identifier to inject for profile inputs that do not carry their own dataset_id column. Defaults to None.
- iceberg_bioimage.join_image_assets_with_profiles(image_assets: MetadataSource, profiles: MetadataSource, *, join_keys: Sequence[str] = DEFAULT_JOIN_KEYS, chunk_index: MetadataSource | None = None, connection: DuckDBPyConnection | None = None, profile_dataset_id: str | None = None) pa.Table[source]#
Join image metadata to a profile table using the canonical join keys.
If
profile_dataset_idis provided, that value is used to populate the profile-sidedataset_idwhen the profile input lacks one. When it is None, the profile input is expected to carrydataset_idalready.
- iceberg_bioimage.join_profiles_with_scan_result(scan_result: ScanResult, profiles: str | Path | Table | list[dict[str, object]], *, include_chunks: bool = False, profile_dataset_id: str | None = None) Table[source]#
Join canonical image assets from a scan result to profile rows.
This helper uses the optional DuckDB integration at runtime. Install the duckdb extra/group before calling it.
- iceberg_bioimage.join_profiles_with_store(uri: str, profiles: str | Path | Table | list[dict[str, object]], *, include_chunks: bool = False, profile_dataset_id: str | None = None) Table[source]#
Scan a store and join its canonical image assets to profile rows.
This helper uses the optional DuckDB integration at runtime. Install the duckdb extra/group before calling it.
- iceberg_bioimage.list_catalog_tables(catalog: str | SupportsScanCatalog, namespace: str | Sequence[str]) list[str][source]#
List canonical metadata tables available in a catalog namespace.
- iceberg_bioimage.load_catalog_table(catalog: str | SupportsScanCatalog, namespace: str | Sequence[str], table_name: str) SupportsIcebergTable[source]#
Load a canonical metadata table from a catalog.
- iceberg_bioimage.load_profile_column_aliases(path: str | Path) dict[str, tuple[str, ...]][source]#
Load microscopy profile column aliases from a TOML file.
- iceberg_bioimage.load_warehouse_manifest(warehouse_root: str | Path) WarehouseManifest[source]#
Load a warehouse manifest if present, otherwise return an empty manifest.
- iceberg_bioimage.publish_chunk_index(catalog: str | SupportsCatalog, namespace: str | Iterable[str], table_name: str, scan_result: ScanResult) int[source]#
Publish derived chunk metadata into the canonical chunk_index table.
- iceberg_bioimage.publish_image_assets(catalog: str | SupportsCatalog, namespace: str | Iterable[str], table_name: str, scan_result: ScanResult) int[source]#
Publish a scan result into the canonical image_assets Iceberg table.
- iceberg_bioimage.query_metadata_table(source: MetadataSource, *, columns: Sequence[str] | None = None, filters: Sequence[FilterClause] | None = None, connection: DuckDBPyConnection | None = None) pa.Table[source]#
Query a metadata table from a Parquet path, Arrow table, or row list.
- iceberg_bioimage.register_store(uri: str, catalog: str | SupportsCatalog, namespace: str | Sequence[str], *, image_assets_table: str = 'image_assets', chunk_index_table: str | None = 'chunk_index') RegistrationResult[source]#
Scan a store and publish canonical metadata tables.
- iceberg_bioimage.scan_ome_arrow(data: str, **kwargs: Any) object[source]#
Create a lazy
ome_arrow.OMEArrowscan plan for tabular image sources.
- iceberg_bioimage.scan_store(uri: str) ScanResult[source]#
Scan a supported image store and return canonical metadata.
- iceberg_bioimage.summarize_scan_result(scan_result: ScanResult) DatasetSummary[source]#
Build a concise user-facing summary from a scan result.
- iceberg_bioimage.summarize_store(uri: str) DatasetSummary[source]#
Scan a store and return a concise dataset summary.
- iceberg_bioimage.validate_microscopy_profile_columns(columns: list[str] | tuple[str, ...], *, target: str = 'profile_table', alias_map: Mapping[str, tuple[str, ...] | list[str]] | None = None) ContractValidationResult[source]#
Validate a schema against the microscopy join contract.
- iceberg_bioimage.validate_microscopy_profile_table(path: str) ContractValidationResult[source]#
Validate a local profile table file against the microscopy join contract.
- iceberg_bioimage.validate_warehouse_manifest(path: str | Path) WarehouseValidationResult[source]#
Validate a manifest-backed warehouse root.
Scan API#
Public API entry points.
- iceberg_bioimage.api.ingest_scan_results_to_warehouse(scan_results: Sequence[ScanResult], catalog: str | SupportsCatalog, namespace: str | Sequence[str], *, image_assets_table: str = 'image_assets', chunk_index_table: str | None = 'chunk_index') WarehouseIngestResult[source]#
Publish many scanned datasets into a Cytotable-compatible warehouse.
- iceberg_bioimage.api.ingest_stores_to_warehouse(uris: Sequence[str], catalog: str | SupportsCatalog, namespace: str | Sequence[str], *, image_assets_table: str = 'image_assets', chunk_index_table: str | None = 'chunk_index') WarehouseIngestResult[source]#
Scan and publish many datasets into a Cytotable-compatible warehouse.
- iceberg_bioimage.api.join_profiles_with_scan_result(scan_result: ScanResult, profiles: str | Path | Table | list[dict[str, object]], *, include_chunks: bool = False, profile_dataset_id: str | None = None) Table[source]#
Join canonical image assets from a scan result to profile rows.
This helper uses the optional DuckDB integration at runtime. Install the duckdb extra/group before calling it.
- iceberg_bioimage.api.join_profiles_with_store(uri: str, profiles: str | Path | Table | list[dict[str, object]], *, include_chunks: bool = False, profile_dataset_id: str | None = None) Table[source]#
Scan a store and join its canonical image assets to profile rows.
This helper uses the optional DuckDB integration at runtime. Install the duckdb extra/group before calling it.
- iceberg_bioimage.api.register_store(uri: str, catalog: str | SupportsCatalog, namespace: str | Sequence[str], *, image_assets_table: str = 'image_assets', chunk_index_table: str | None = 'chunk_index') RegistrationResult[source]#
Scan a store and publish canonical metadata tables.
- iceberg_bioimage.api.scan_store(uri: str) ScanResult[source]#
Scan a supported image store and return canonical metadata.
- iceberg_bioimage.api.summarize_scan_result(scan_result: ScanResult) DatasetSummary[source]#
Build a concise user-facing summary from a scan result.
- iceberg_bioimage.api.summarize_store(uri: str) DatasetSummary[source]#
Scan a store and return a concise dataset summary.
Models#
Serializable canonical scan models.
- class iceberg_bioimage.models.scan_result.ContractValidationResult(target: str, present_columns: list[str], required_columns: list[str], recommended_columns: list[str], missing_required_columns: list[str], missing_recommended_columns: list[str], warnings: list[str] = <factory>)[source]#
Serializable result for schema-level contract validation.
- property is_valid: bool#
Return whether all required columns are present.
- class iceberg_bioimage.models.scan_result.CytominingWarehouseResult(warehouse_root: str, tables_written: list[str], row_counts: dict[str, int], manifest_path: str | None = None)[source]#
Serializable result for exporting Parquet-backed Cytomining warehouses.
- class iceberg_bioimage.models.scan_result.DatasetSummary(source_uri: str, format_family: str, image_asset_count: int, chunked_asset_count: int, array_paths: list[str], dtypes: list[str], shapes: list[list[int]], axes: list[str], channel_counts: list[int], storage_variants: list[str], warnings: list[str] = <factory>)[source]#
User-facing summary of a scanned dataset.
- class iceberg_bioimage.models.scan_result.ImageAsset(uri: str, shape: list[int], dtype: str, array_path: str | None = None, chunk_shape: list[int] | None = None, metadata: dict[str, ~typing.Any] = <factory>, image_id: str | None = None)[source]#
Canonical representation of one discovered image asset.
- class iceberg_bioimage.models.scan_result.RegistrationResult(source_uri: str, image_assets_rows_published: int, chunk_rows_published: int)[source]#
Serializable result for a metadata registration workflow.
- class iceberg_bioimage.models.scan_result.ScanResult(source_uri: str, format_family: str, image_assets: list[~iceberg_bioimage.models.scan_result.ImageAsset], warnings: list[str] = <factory>)[source]#
Canonical scan output shared across adapters and publishers.
- class iceberg_bioimage.models.scan_result.WarehouseIngestResult(catalog: str, namespace: list[str], image_assets_table: str, chunk_index_table: str | None, datasets: list[~iceberg_bioimage.models.scan_result.RegistrationResult], warnings: list[str] = <factory>)[source]#
Serializable result for a multi-dataset warehouse ingestion workflow.
- property chunk_rows_published: int#
Return the total number of published chunk-index rows.
- property dataset_count: int#
Return the number of ingested datasets.
- property image_assets_rows_published: int#
Return the total number of published image-assets rows.
- class iceberg_bioimage.models.scan_result.WarehouseManifest(warehouse_root: str, tables: list[~iceberg_bioimage.models.scan_result.WarehouseTableManifestEntry] = <factory>)[source]#
Serializable manifest describing tables stored in a warehouse root.
- class iceberg_bioimage.models.scan_result.WarehouseTableManifestEntry(table_name: str, role: str, format: str = 'parquet', join_keys: list[str] = <factory>, source_type: str | None = None, source_ref: str | None = None, row_count: int | None = None, columns: list[str] = <factory>)[source]#
Serializable metadata for one table in a warehouse manifest.
- class iceberg_bioimage.models.scan_result.WarehouseValidationResult(warehouse_root: str, errors: list[str] = <factory>, warnings: list[str] = <factory>)[source]#
Serializable result for validating a warehouse manifest and layout.
- property is_valid: bool#
Return whether the warehouse passed validation.
Publishing#
Image asset publishing helpers.
- class iceberg_bioimage.publishing.image_assets.SupportsAppend(*args, **kwargs)[source]#
Protocol for appendable Iceberg-like tables.
- class iceberg_bioimage.publishing.image_assets.SupportsCatalog(*args, **kwargs)[source]#
Protocol for catalog objects used by the publishing layer.
- create_table(identifier: tuple[str, ...], schema: object) SupportsAppend[source]#
Create and return a table.
- load_table(identifier: tuple[str, ...]) SupportsAppend[source]#
Load an existing table.
- class iceberg_bioimage.publishing.image_assets.SupportsLoadTable(*args, **kwargs)[source]#
Protocol for catalog objects that can load existing tables.
- iceberg_bioimage.publishing.image_assets.publish_image_assets(catalog: str | SupportsCatalog, namespace: str | Iterable[str], table_name: str, scan_result: ScanResult) int[source]#
Publish a scan result into the canonical image_assets Iceberg table.
- iceberg_bioimage.publishing.image_assets.scan_result_to_rows(scan_result: ScanResult) list[dict[str, object]][source]#
Convert a scan result into canonical image_assets rows.
Chunk index publishing helpers.
- iceberg_bioimage.publishing.chunk_index.publish_chunk_index(catalog: str | SupportsCatalog, namespace: str | Iterable[str], table_name: str, scan_result: ScanResult) int[source]#
Publish derived chunk metadata into the canonical chunk_index table.
- iceberg_bioimage.publishing.chunk_index.scan_result_to_chunk_rows(scan_result: ScanResult) list[dict[str, object]][source]#
Convert a scan result into canonical chunk_index rows.
Validation#
Validation helpers for canonical scan objects and join contracts.
- iceberg_bioimage.validation.contracts.load_profile_column_aliases(path: str | Path) dict[str, tuple[str, ...]][source]#
Load microscopy profile column aliases from a TOML file.
- iceberg_bioimage.validation.contracts.profile_column_aliases() Mapping[str, tuple[str, ...]][source]#
Return the supported microscopy profile column aliases.
- iceberg_bioimage.validation.contracts.raise_for_invalid_scan_result(scan_result: ScanResult) None[source]#
Raise a ValueError when a scan result is invalid.
- iceberg_bioimage.validation.contracts.resolve_microscopy_profile_columns(columns: list[str] | tuple[str, ...], *, alias_map: Mapping[str, tuple[str, ...] | list[str]] | None = None) dict[str, str | None][source]#
Resolve canonical microscopy columns from a schema with known aliases.
- iceberg_bioimage.validation.contracts.validate_microscopy_profile_columns(columns: list[str] | tuple[str, ...], *, target: str = 'profile_table', alias_map: Mapping[str, tuple[str, ...] | list[str]] | None = None) ContractValidationResult[source]#
Validate a schema against the microscopy join contract.
- iceberg_bioimage.validation.contracts.validate_microscopy_profile_table(path: str) ContractValidationResult[source]#
Validate a local profile table file against the microscopy join contract.
- iceberg_bioimage.validation.contracts.validate_scan_result(scan_result: ScanResult) list[str][source]#
Return validation errors for a scan result.
- iceberg_bioimage.validation.contracts.validate_warehouse_manifest(path: str | Path) WarehouseValidationResult[source]#
Validate a manifest-backed warehouse root.
Optional DuckDB Integration#
Optional DuckDB query helpers for canonical metadata tables.
- iceberg_bioimage.integrations.duckdb.create_duckdb_connection(database: str = ':memory:', *, read_only: bool = False) DuckDBPyConnection[source]#
Create a DuckDB connection.
DuckDB is optional for this project. This helper isolates the import so the core package remains engine-neutral unless the user explicitly opts in.
- iceberg_bioimage.integrations.duckdb.join_image_assets_with_profiles(image_assets: MetadataSource, profiles: MetadataSource, *, join_keys: Sequence[str] = DEFAULT_JOIN_KEYS, chunk_index: MetadataSource | None = None, connection: DuckDBPyConnection | None = None, profile_dataset_id: str | None = None) pa.Table[source]#
Join image metadata to a profile table using the canonical join keys.
If
profile_dataset_idis provided, that value is used to populate the profile-sidedataset_idwhen the profile input lacks one. When it is None, the profile input is expected to carrydataset_idalready.
- iceberg_bioimage.integrations.duckdb.query_metadata_table(source: MetadataSource, *, columns: Sequence[str] | None = None, filters: Sequence[FilterClause] | None = None, connection: DuckDBPyConnection | None = None) pa.Table[source]#
Query a metadata table from a Parquet path, Arrow table, or row list.
Catalog-facing helpers for reading canonical Iceberg metadata tables.
- class iceberg_bioimage.integrations.catalog.CatalogScanOptions(columns: Sequence[str] | None = None, where: str | None = None, snapshot_id: int | None = None, limit: int | None = None)[source]#
Options for scanning a catalog-backed metadata table.
- class iceberg_bioimage.integrations.catalog.SupportsIcebergScan(*args, **kwargs)[source]#
Protocol for pyiceberg scan objects.
- class iceberg_bioimage.integrations.catalog.SupportsIcebergTable(*args, **kwargs)[source]#
Protocol for pyiceberg table objects.
- scan(row_filter: str = 'True', selected_fields: tuple[str, ...] = ('*',), case_sensitive: bool = True, snapshot_id: int | None = None, limit: int | None = None) SupportsIcebergScan[source]#
Return a scan object for the current table.
- class iceberg_bioimage.integrations.catalog.SupportsScanCatalog(*args, **kwargs)[source]#
Protocol for catalogs used by the read-only integration helpers.
- list_tables(namespace: tuple[str, ...]) list[tuple[str, ...]][source]#
List tables within a namespace.
- load_table(identifier: tuple[str, ...]) SupportsIcebergTable[source]#
Load an existing Iceberg table.
- iceberg_bioimage.integrations.catalog.catalog_table_to_arrow(catalog: str | SupportsScanCatalog, namespace: str | Sequence[str], table_name: str, *, scan_options: CatalogScanOptions | None = None) Table[source]#
Load a catalog table into Arrow via PyIceberg.
- iceberg_bioimage.integrations.catalog.join_catalog_image_assets_with_profiles(catalog: str | SupportsScanCatalog, namespace: str | Sequence[str], profiles: str | Path | Table | list[dict[str, object]], *, image_assets_table: str = 'image_assets', chunk_index_table: str | None = None, join_keys: Sequence[str] = DEFAULT_JOIN_KEYS, image_assets_scan_options: CatalogScanOptions | None = None, chunk_index_scan_options: CatalogScanOptions | None = None, profile_dataset_id: str | None = None) Table[source]#
Join catalog-backed image metadata to a profile table.
- Parameters:
catalog – Catalog name or catalog-like object.
namespace – Namespace containing the metadata tables.
profiles – Profile rows or table to join against.
image_assets_table – Name of the canonical image-assets table.
chunk_index_table – Optional chunk-index table name.
join_keys – Join columns shared by image metadata and profiles.
image_assets_scan_options – Optional scan options for image-assets reads.
chunk_index_scan_options – Optional scan options for chunk-index reads.
profile_dataset_id – Dataset identifier to inject for profile inputs that do not carry their own dataset_id column. Defaults to None.
- iceberg_bioimage.integrations.catalog.list_catalog_tables(catalog: str | SupportsScanCatalog, namespace: str | Sequence[str]) list[str][source]#
List canonical metadata tables available in a catalog namespace.
- iceberg_bioimage.integrations.catalog.load_catalog_table(catalog: str | SupportsScanCatalog, namespace: str | Sequence[str], table_name: str) SupportsIcebergTable[source]#
Load a canonical metadata table from a catalog.
Helpers for exporting Parquet-backed Cytomining warehouse layouts.
- iceberg_bioimage.integrations.cytomining.export_catalog_to_cytomining_warehouse(catalog: str | SupportsScanCatalog, namespace: str | tuple[str, ...], warehouse_root: str | Path, *, profiles: str | Path | Table | list[dict[str, object]] | None = None, image_assets_table_name: str = 'image_assets', chunk_index_table_name: str | None = 'chunk_index', joined_table_name: str = 'joined_profiles', profile_dataset_id: str | None = None, mode: Literal['overwrite', 'append'] = 'overwrite') CytominingWarehouseResult[source]#
Materialize catalog-backed metadata into a Parquet Cytomining warehouse.
- iceberg_bioimage.integrations.cytomining.export_profiles_to_cytomining_warehouse(profiles: str | Path | Table | list[dict[str, object]], warehouse_root: str | Path, *, table_name: str = 'profiles', role: str = 'profiles', profile_dataset_id: str | None = None, join_keys: list[str] | None = None, source_type: str = 'profiles', source_ref: str | None = None, alias_map: Mapping[str, tuple[str, ...] | list[str]] | None = None, mode: Literal['overwrite', 'append'] = 'append') CytominingWarehouseResult[source]#
Write a Cytomining profile table into a Parquet-backed warehouse root.
- iceberg_bioimage.integrations.cytomining.export_scan_result_to_cytomining_warehouse(scan_result: ScanResult, warehouse_root: str | Path, *, profiles: str | Path | Table | list[dict[str, object]] | None = None, include_chunks: bool = True, image_assets_table_name: str = 'image_assets', chunk_index_table_name: str = 'chunk_index', joined_table_name: str = 'joined_profiles', profile_dataset_id: str | None = None, mode: Literal['overwrite', 'append'] = 'overwrite') CytominingWarehouseResult[source]#
Write scan-derived metadata into a Parquet-backed Cytomining warehouse.
- iceberg_bioimage.integrations.cytomining.export_store_to_cytomining_warehouse(uri: str, warehouse_root: str | Path, *, profiles: str | Path | Table | list[dict[str, object]] | None = None, include_chunks: bool = True, image_assets_table_name: str = 'image_assets', chunk_index_table_name: str = 'chunk_index', joined_table_name: str = 'joined_profiles', profile_dataset_id: str | None = None, mode: Literal['overwrite', 'append'] = 'overwrite') CytominingWarehouseResult[source]#
Scan a store and export its metadata into a Cytomining warehouse.
- iceberg_bioimage.integrations.cytomining.export_table_to_cytomining_warehouse(table: Table, warehouse_root: str | Path, *, table_name: str, role: str, join_keys: list[str] | None = None, source_type: str | None = None, source_ref: str | None = None, mode: Literal['overwrite', 'append'] = 'append') CytominingWarehouseResult[source]#
Write a generic table into a warehouse root and update the manifest.
- iceberg_bioimage.integrations.cytomining.load_warehouse_manifest(warehouse_root: str | Path) WarehouseManifest[source]#
Load a warehouse manifest if present, otherwise return an empty manifest.
Optional OME-Arrow Integration#
Optional OME-Arrow integration helpers.