from __future__ import annotations
import argparse
from pathlib import Path
from OME_IRIS.clean import clean_local_data
from OME_IRIS.datasets import download
from OME_IRIS.fetch import fetch_datasets
from OME_IRIS.rocrate import export_rocrate_metadata
from OME_IRIS.scaffold import scaffold_dataset_manifest
from OME_IRIS.verify import verify_datasets
[docs]
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="ome-iris", description="Fetch and verify OME-IRIS datasets"
)
sub = parser.add_subparsers(dest="command", required=True)
download_cmd = sub.add_parser(
"download", help="Download a reproducible dataset subset"
)
download_cmd.add_argument("dataset")
download_cmd.add_argument("--output", required=True)
download_cmd.add_argument("--preset", choices=["tiny", "small", "benchmark"])
download_cmd.add_argument("--limit-images", type=int)
download_cmd.add_argument("--channel", dest="channels", action="append")
download_cmd.add_argument("--plate", action="append")
download_cmd.add_argument("--well", action="append")
download_cmd.add_argument("--site", action="append")
download_cmd.add_argument("--z-range", nargs=2, type=int, metavar=("START", "STOP"))
download_cmd.add_argument("--t-range", nargs=2, type=int, metavar=("START", "STOP"))
download_cmd.add_argument("--c-range", nargs=2, type=int, metavar=("START", "STOP"))
download_cmd.add_argument("--validate-only", action="store_true")
download_cmd.add_argument("--manifests-dir", default="src/OME_IRIS/data/datasets")
download_cmd.add_argument("--silent", action="store_true")
fetch_cmd = sub.add_parser("fetch", help="Fetch dataset files")
fetch_cmd.add_argument("--dataset", dest="dataset_id")
fetch_cmd.add_argument("--tier", choices=["tiny", "small", "realistic"])
fetch_cmd.add_argument("--manifests-dir", default="src/OME_IRIS/data/datasets")
fetch_cmd.add_argument("--data-dir", default="data")
fetch_mode = fetch_cmd.add_mutually_exclusive_group()
fetch_mode.add_argument("--verbose", action="store_true")
fetch_mode.add_argument("--silent", action="store_true")
verify_cmd = sub.add_parser("verify", help="Verify local datasets")
verify_cmd.add_argument("--manifests-dir", default="src/OME_IRIS/data/datasets")
verify_cmd.add_argument("--data-dir", default="data")
clean_cmd = sub.add_parser("clean", help="Remove local fetched data")
clean_cmd.add_argument("--data-dir", default="data")
scaffold_cmd = sub.add_parser(
"scaffold",
help="Generate starter dataset manifest and CSV row from a source path",
)
scaffold_cmd.add_argument("--source-path", required=True)
scaffold_cmd.add_argument("--dataset-id")
scaffold_cmd.add_argument("--name", dest="dataset_name")
scaffold_cmd.add_argument(
"--tier", choices=["tiny", "small", "realistic"], default="small"
)
scaffold_cmd.add_argument("--license", dest="license_name", default="TBD")
scaffold_cmd.add_argument("--source-repository", default="")
scaffold_cmd.add_argument("--source-url", default="")
scaffold_cmd.add_argument("--include-directory-entry", action="store_true")
scaffold_cmd.add_argument("--directory-path", default="images")
scaffold_cmd.add_argument("--archive-format", choices=["zip", "tar"], default="zip")
scaffold_cmd.add_argument("--manifests-dir", default="src/OME_IRIS/data/datasets")
scaffold_cmd.add_argument("--catalog-csv", default="src/OME_IRIS/data/datasets.csv")
scaffold_cmd.add_argument("--append-csv", action="store_true")
scaffold_cmd.add_argument("--force", action="store_true")
rocrate_cmd = sub.add_parser(
"export-rocrate",
help="Export RO-Crate metadata for a dataset into the fetched dataset directory",
)
rocrate_cmd.add_argument("--dataset", dest="dataset_id", required=True)
rocrate_cmd.add_argument("--manifests-dir", default="src/OME_IRIS/data/datasets")
rocrate_cmd.add_argument("--data-dir", default="data")
return parser
[docs]
def main() -> int:
parser = build_parser()
args = parser.parse_args()
if args.command == "download":
subset = {
"images": args.limit_images,
"channels": args.channels,
"plate": args.plate,
"well": args.well,
"site": args.site,
"z": tuple(args.z_range) if args.z_range else None,
"t": tuple(args.t_range) if args.t_range else None,
"c": tuple(args.c_range) if args.c_range else None,
}
result = download(
args.dataset,
output_dir=Path(args.output),
subset=subset,
preset=args.preset,
manifests_dir=Path(args.manifests_dir),
validate_only=args.validate_only,
silent=args.silent,
)
print(f"Downloaded: {result.downloaded}")
print(f"Skipped: {result.skipped}")
print(f"Validated: {result.validated}")
if result.manifest_path:
print(f"Manifest: {result.manifest_path}")
if result.failed:
print("Failed:")
for item in result.failed:
print(f"- {item}")
return 1
return 0
if args.command == "fetch":
result = fetch_datasets(
manifests_dir=Path(args.manifests_dir),
data_dir=Path(args.data_dir),
dataset_id=args.dataset_id,
tier=args.tier,
verbose=args.verbose,
silent=args.silent,
)
print(f"Downloaded: {result.downloaded}")
print(f"Skipped: {result.skipped}")
if result.downloaded_items:
print("Downloaded items:")
for item in result.downloaded_items:
print(f"- {item}")
if result.skipped_items:
print("Skipped items:")
for item in result.skipped_items:
print(f"- {item}")
if result.missing_urls:
print("Missing URLs:")
for item in result.missing_urls:
print(f"- {item}")
if result.failed:
print("Failed downloads:")
for item in result.failed:
print(f"- {item}")
return 0
if args.command == "clean":
clean_local_data(Path(args.data_dir))
print(f"Removed local data directory: {args.data_dir}")
return 0
if args.command == "scaffold":
result = scaffold_dataset_manifest(
source_path=args.source_path,
manifests_dir=Path(args.manifests_dir),
dataset_id=args.dataset_id,
dataset_name=args.dataset_name,
tier=args.tier,
license_name=args.license_name,
source_repository=args.source_repository,
source_url=args.source_url,
include_directory_entry=args.include_directory_entry,
directory_path=args.directory_path,
archive_format=args.archive_format,
append_csv=args.append_csv,
catalog_csv=Path(args.catalog_csv),
force=args.force,
)
print(f"Manifest created: {result.manifest_path}")
print("Suggested datasets.csv row:")
print(result.csv_row)
if args.append_csv:
print(f"Appended row to: {args.catalog_csv}")
return 0
if args.command == "export-rocrate":
out_path = export_rocrate_metadata(
manifests_dir=Path(args.manifests_dir),
dataset_id=args.dataset_id,
data_dir=Path(args.data_dir),
)
print(f"RO-Crate metadata written: {out_path}")
return 0
result = verify_datasets(
manifests_dir=Path(args.manifests_dir),
data_dir=Path(args.data_dir),
)
if result.ok:
print("Verification passed")
return 0
print("Verification failed")
for issue in result.issues:
print(f"- {issue}")
return 1
if __name__ == "__main__":
raise SystemExit(main())