{ "cells": [ { "cell_type": "markdown", "id": "7092d951-fcb8-4ee9-8537-0779e053c211", "metadata": {}, "source": [ "# CytoDataFrame at a Glance\n", "\n", "This notebook demonstrates various capabilities of\n", "[CytoDataFrame](https://github.com/WayScience/CytoDataFrame) using examples.\n", "\n", "CytoDataFrame is intended to provide you a Pandas-like\n", "DataFrame experience which is enhanced with single-cell\n", "visual information which can be viewed directly in a Jupyter notebook." ] }, { "cell_type": "code", "execution_count": 1, "id": "99e3f0b6-7737-4f4c-819f-6cf8cd9a71fe", "metadata": { "lines_to_next_cell": 0 }, "outputs": [], "source": [ "from cytodataframe.frame import CytoDataFrame\n", "\n", "# create paths for use with CytoDataFrames below\n", "jump_data_path = \"../../../tests/data/cytotable/JUMP_plate_BR00117006\"\n", "nf1_cellpainting_path = \"../../../tests/data/cytotable/NF1_cellpainting_data_shrunken/\"\n", "nuclear_speckles_path = \"../../../tests/data/cytotable/nuclear_speckles\"\n", "pediatric_cancer_atlas_path = (\n", " \"../../../tests/data/cytotable/pediatric_cancer_atlas_profiling\"\n", ")" ] }, { "cell_type": "code", "execution_count": 2, "id": "1e219072-c9de-4fef-8916-0218c09fac14", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 666 ms, sys: 450 ms, total: 1.12 s\n", "Wall time: 333 ms\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Metadata_ImageNumberCells_Number_Object_NumberImage_FileName_OrigAGPImage_FileName_OrigDNAImage_FileName_OrigRNA
011
112
213
" ], "text/plain": [ " Metadata_ImageNumber Cells_Number_Object_Number \\\n", "0 1 1 \n", "1 1 2 \n", "2 1 3 \n", "\n", " Image_FileName_OrigAGP Image_FileName_OrigDNA \\\n", "0 r01c01f01p01-ch2sk1fk1fl1.tiff r01c01f01p01-ch5sk1fk1fl1.tiff \n", "1 r01c01f01p01-ch2sk1fk1fl1.tiff r01c01f01p01-ch5sk1fk1fl1.tiff \n", "2 r01c01f01p01-ch2sk1fk1fl1.tiff r01c01f01p01-ch5sk1fk1fl1.tiff \n", "\n", " Image_FileName_OrigRNA \n", "0 r01c01f01p01-ch3sk1fk1fl1.tiff \n", "1 r01c01f01p01-ch3sk1fk1fl1.tiff \n", "2 r01c01f01p01-ch3sk1fk1fl1.tiff " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "# view JUMP plate BR00117006 with images\n", "CytoDataFrame(\n", " data=f\"{jump_data_path}/BR00117006_shrunken.parquet\",\n", " data_context_dir=f\"{jump_data_path}/images/orig\",\n", ")[\n", " [\n", " \"Metadata_ImageNumber\",\n", " \"Cells_Number_Object_Number\",\n", " \"Image_FileName_OrigAGP\",\n", " \"Image_FileName_OrigDNA\",\n", " \"Image_FileName_OrigRNA\",\n", " ]\n", "][:3]" ] }, { "cell_type": "code", "execution_count": 3, "id": "da4c0c27-eed4-4302-883e-3136946dd532", "metadata": { "lines_to_next_cell": 2 }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 658 ms, sys: 417 ms, total: 1.08 s\n", "Wall time: 291 ms\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Metadata_ImageNumberCells_Number_Object_NumberImage_FileName_OrigAGPImage_FileName_OrigDNAImage_FileName_OrigRNA
011
112
213
" ], "text/plain": [ " Metadata_ImageNumber Cells_Number_Object_Number \\\n", "0 1 1 \n", "1 1 2 \n", "2 1 3 \n", "\n", " Image_FileName_OrigAGP Image_FileName_OrigDNA \\\n", "0 r01c01f01p01-ch2sk1fk1fl1.tiff r01c01f01p01-ch5sk1fk1fl1.tiff \n", "1 r01c01f01p01-ch2sk1fk1fl1.tiff r01c01f01p01-ch5sk1fk1fl1.tiff \n", "2 r01c01f01p01-ch2sk1fk1fl1.tiff r01c01f01p01-ch5sk1fk1fl1.tiff \n", "\n", " Image_FileName_OrigRNA \n", "0 r01c01f01p01-ch3sk1fk1fl1.tiff \n", "1 r01c01f01p01-ch3sk1fk1fl1.tiff \n", "2 r01c01f01p01-ch3sk1fk1fl1.tiff " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "# view JUMP plate BR00117006 with images and overlaid outlines for segmentation\n", "CytoDataFrame(\n", " data=f\"{jump_data_path}/BR00117006_shrunken.parquet\",\n", " data_context_dir=f\"{jump_data_path}/images/orig\",\n", " data_outline_context_dir=f\"{jump_data_path}/images/outlines\",\n", ")[\n", " [\n", " \"Metadata_ImageNumber\",\n", " \"Cells_Number_Object_Number\",\n", " \"Image_FileName_OrigAGP\",\n", " \"Image_FileName_OrigDNA\",\n", " \"Image_FileName_OrigRNA\",\n", " ]\n", "][:3]" ] }, { "cell_type": "code", "execution_count": 4, "id": "365f68d0-3b66-4fb8-a381-8712dc43188c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 204 ms, sys: 128 ms, total: 333 ms\n", "Wall time: 103 ms\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Metadata_ImageNumberMetadata_Cells_Number_Object_NumberImage_FileName_GFPImage_FileName_RFPImage_FileName_DAPI
353314
156411317
1275945
" ], "text/plain": [ " Metadata_ImageNumber Metadata_Cells_Number_Object_Number \\\n", "353 31 4 \n", "1564 113 17 \n", "1275 94 5 \n", "\n", " Image_FileName_GFP Image_FileName_RFP Image_FileName_DAPI \n", "353 B7_01_2_3_GFP_001.tif B7_01_3_3_RFP_001.tif B7_01_1_3_DAPI_001.tif \n", "1564 H12_01_2_1_GFP_001.tif H12_01_3_1_RFP_001.tif H12_01_1_1_DAPI_001.tif \n", "1275 F7_01_2_2_GFP_001.tif F7_01_3_2_RFP_001.tif F7_01_1_2_DAPI_001.tif " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "# view NF1 Cell Painting data with images\n", "CytoDataFrame(\n", " data=f\"{nf1_cellpainting_path}/Plate_2_with_image_data_shrunken.parquet\",\n", " data_context_dir=f\"{nf1_cellpainting_path}/Plate_2_images\",\n", ")[\n", " [\n", " \"Metadata_ImageNumber\",\n", " \"Metadata_Cells_Number_Object_Number\",\n", " \"Image_FileName_GFP\",\n", " \"Image_FileName_RFP\",\n", " \"Image_FileName_DAPI\",\n", " ]\n", "][:3]" ] }, { "cell_type": "code", "execution_count": 5, "id": "de89c992-ea92-4565-b03b-3b27ae46d28c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 214 ms, sys: 131 ms, total: 345 ms\n", "Wall time: 105 ms\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Metadata_ImageNumberMetadata_Cells_Number_Object_NumberImage_FileName_GFPImage_FileName_RFPImage_FileName_DAPI
353314
156411317
1275945
" ], "text/plain": [ " Metadata_ImageNumber Metadata_Cells_Number_Object_Number \\\n", "353 31 4 \n", "1564 113 17 \n", "1275 94 5 \n", "\n", " Image_FileName_GFP Image_FileName_RFP Image_FileName_DAPI \n", "353 B7_01_2_3_GFP_001.tif B7_01_3_3_RFP_001.tif B7_01_1_3_DAPI_001.tif \n", "1564 H12_01_2_1_GFP_001.tif H12_01_3_1_RFP_001.tif H12_01_1_1_DAPI_001.tif \n", "1275 F7_01_2_2_GFP_001.tif F7_01_3_2_RFP_001.tif F7_01_1_2_DAPI_001.tif " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "# view NF1 Cell Painting data with images and overlaid outlines from masks\n", "CytoDataFrame(\n", " data=f\"{nf1_cellpainting_path}/Plate_2_with_image_data_shrunken.parquet\",\n", " data_context_dir=f\"{nf1_cellpainting_path}/Plate_2_images\",\n", " data_mask_context_dir=f\"{nf1_cellpainting_path}/Plate_2_masks\",\n", ")[\n", " [\n", " \"Metadata_ImageNumber\",\n", " \"Metadata_Cells_Number_Object_Number\",\n", " \"Image_FileName_GFP\",\n", " \"Image_FileName_RFP\",\n", " \"Image_FileName_DAPI\",\n", " ]\n", "][:3]" ] }, { "cell_type": "code", "execution_count": 6, "id": "5a5304db-7dac-4f45-aa55-dd3f50299c60", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 87.6 ms, sys: 49 ms, total: 137 ms\n", "Wall time: 47.1 ms\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Metadata_ImageNumberNuclei_Number_Object_NumberImage_FileName_A647Image_FileName_DAPIImage_FileName_GOLD
011slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
112slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
213slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
" ], "text/plain": [ " Metadata_ImageNumber Nuclei_Number_Object_Number \\\n", "0 1 1 \n", "1 1 2 \n", "2 1 3 \n", "\n", " Image_FileName_A647 \\\n", "0 slide1_A1_M10_CH1_Z09_illumcorrect.tiff \n", "1 slide1_A1_M10_CH1_Z09_illumcorrect.tiff \n", "2 slide1_A1_M10_CH1_Z09_illumcorrect.tiff \n", "\n", " Image_FileName_DAPI \\\n", "0 slide1_A1_M10_CH0_Z09_illumcorrect.tiff \n", "1 slide1_A1_M10_CH0_Z09_illumcorrect.tiff \n", "2 slide1_A1_M10_CH0_Z09_illumcorrect.tiff \n", "\n", " Image_FileName_GOLD \n", "0 slide1_A1_M10_CH2_Z09_illumcorrect.tiff \n", "1 slide1_A1_M10_CH2_Z09_illumcorrect.tiff \n", "2 slide1_A1_M10_CH2_Z09_illumcorrect.tiff " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "# view nuclear speckles data with images and overlaid outlines from masks\n", "CytoDataFrame(\n", " data=f\"{nuclear_speckles_path}/test_slide1_converted.parquet\",\n", " data_context_dir=f\"{nuclear_speckles_path}/images/plate1\",\n", " data_mask_context_dir=f\"{nuclear_speckles_path}/masks/plate1\",\n", ")[\n", " [\n", " \"Metadata_ImageNumber\",\n", " \"Nuclei_Number_Object_Number\",\n", " \"Image_FileName_A647\",\n", " \"Image_FileName_DAPI\",\n", " \"Image_FileName_GOLD\",\n", " ]\n", "][:3]" ] }, { "cell_type": "code", "execution_count": 7, "id": "cfd6358a-bf6c-4498-963a-bcf3f571993f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 253 ms, sys: 159 ms, total: 412 ms\n", "Wall time: 117 ms\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Metadata_ImageNumberMetadata_Nuclei_Number_Object_NumberImage_FileName_OrigAGPImage_FileName_OrigDNA
033
134
236
" ], "text/plain": [ " Metadata_ImageNumber Metadata_Nuclei_Number_Object_Number \\\n", "0 3 3 \n", "1 3 4 \n", "2 3 6 \n", "\n", " Image_FileName_OrigAGP Image_FileName_OrigDNA \n", "0 r03c03f03p01-ch3sk1fk1fl1.tiff r03c03f03p01-ch5sk1fk1fl1.tiff \n", "1 r03c03f03p01-ch3sk1fk1fl1.tiff r03c03f03p01-ch5sk1fk1fl1.tiff \n", "2 r03c03f03p01-ch3sk1fk1fl1.tiff r03c03f03p01-ch5sk1fk1fl1.tiff " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "# view ALSF pediatric cancer atlas plate BR00143976 with images\n", "CytoDataFrame(\n", " data=f\"{pediatric_cancer_atlas_path}/BR00143976_shrunken.parquet\",\n", " data_context_dir=f\"{pediatric_cancer_atlas_path}/images/orig\",\n", " data_outline_context_dir=f\"{pediatric_cancer_atlas_path}/images/outlines\",\n", " segmentation_file_regex={\n", " r\"CellsOutlines_BR(\\d+)_C(\\d{2})_\\d+\\.tiff\": r\".*ch3.*\\.tiff\",\n", " r\"NucleiOutlines_BR(\\d+)_C(\\d{2})_\\d+\\.tiff\": r\".*ch5.*\\.tiff\",\n", " },\n", ")[\n", " [\n", " \"Metadata_ImageNumber\",\n", " \"Metadata_Nuclei_Number_Object_Number\",\n", " \"Image_FileName_OrigAGP\",\n", " \"Image_FileName_OrigDNA\",\n", " ]\n", "][:3]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 5 }