{
"cells": [
{
"cell_type": "markdown",
"id": "7092d951-fcb8-4ee9-8537-0779e053c211",
"metadata": {},
"source": [
"# CytoDataFrame at a Glance\n",
"\n",
"This notebook demonstrates various capabilities of\n",
"[CytoDataFrame](https://github.com/WayScience/CytoDataFrame) using examples.\n",
"\n",
"CytoDataFrame is intended to provide you a Pandas-like\n",
"DataFrame experience which is enhanced with single-cell\n",
"visual information which can be viewed directly in a Jupyter notebook."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "99e3f0b6-7737-4f4c-819f-6cf8cd9a71fe",
"metadata": {
"lines_to_next_cell": 0
},
"outputs": [],
"source": [
"from cytodataframe.frame import CytoDataFrame\n",
"\n",
"# create paths for use with CytoDataFrames below\n",
"jump_data_path = \"../../../tests/data/cytotable/JUMP_plate_BR00117006\"\n",
"nf1_cellpainting_path = \"../../../tests/data/cytotable/NF1_cellpainting_data_shrunken/\"\n",
"nuclear_speckles_path = \"../../../tests/data/cytotable/nuclear_speckles\"\n",
"pediatric_cancer_atlas_path = (\n",
" \"../../../tests/data/cytotable/pediatric_cancer_atlas_profiling\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "1e219072-c9de-4fef-8916-0218c09fac14",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 666 ms, sys: 450 ms, total: 1.12 s\n",
"Wall time: 333 ms\n"
]
},
{
"data": {
"text/html": [
"
\n",
" \n",
" \n",
" | \n",
" Metadata_ImageNumber | \n",
" Cells_Number_Object_Number | \n",
" Image_FileName_OrigAGP | \n",
" Image_FileName_OrigDNA | \n",
" Image_FileName_OrigRNA | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" 2 | \n",
" 1 | \n",
" 3 | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
" Metadata_ImageNumber Cells_Number_Object_Number \\\n",
"0 1 1 \n",
"1 1 2 \n",
"2 1 3 \n",
"\n",
" Image_FileName_OrigAGP Image_FileName_OrigDNA \\\n",
"0 r01c01f01p01-ch2sk1fk1fl1.tiff r01c01f01p01-ch5sk1fk1fl1.tiff \n",
"1 r01c01f01p01-ch2sk1fk1fl1.tiff r01c01f01p01-ch5sk1fk1fl1.tiff \n",
"2 r01c01f01p01-ch2sk1fk1fl1.tiff r01c01f01p01-ch5sk1fk1fl1.tiff \n",
"\n",
" Image_FileName_OrigRNA \n",
"0 r01c01f01p01-ch3sk1fk1fl1.tiff \n",
"1 r01c01f01p01-ch3sk1fk1fl1.tiff \n",
"2 r01c01f01p01-ch3sk1fk1fl1.tiff "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"# view JUMP plate BR00117006 with images\n",
"CytoDataFrame(\n",
" data=f\"{jump_data_path}/BR00117006_shrunken.parquet\",\n",
" data_context_dir=f\"{jump_data_path}/images/orig\",\n",
")[\n",
" [\n",
" \"Metadata_ImageNumber\",\n",
" \"Cells_Number_Object_Number\",\n",
" \"Image_FileName_OrigAGP\",\n",
" \"Image_FileName_OrigDNA\",\n",
" \"Image_FileName_OrigRNA\",\n",
" ]\n",
"][:3]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "da4c0c27-eed4-4302-883e-3136946dd532",
"metadata": {
"lines_to_next_cell": 2
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 658 ms, sys: 417 ms, total: 1.08 s\n",
"Wall time: 291 ms\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
" | \n",
" Metadata_ImageNumber | \n",
" Cells_Number_Object_Number | \n",
" Image_FileName_OrigAGP | \n",
" Image_FileName_OrigDNA | \n",
" Image_FileName_OrigRNA | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" 2 | \n",
" 1 | \n",
" 3 | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
" Metadata_ImageNumber Cells_Number_Object_Number \\\n",
"0 1 1 \n",
"1 1 2 \n",
"2 1 3 \n",
"\n",
" Image_FileName_OrigAGP Image_FileName_OrigDNA \\\n",
"0 r01c01f01p01-ch2sk1fk1fl1.tiff r01c01f01p01-ch5sk1fk1fl1.tiff \n",
"1 r01c01f01p01-ch2sk1fk1fl1.tiff r01c01f01p01-ch5sk1fk1fl1.tiff \n",
"2 r01c01f01p01-ch2sk1fk1fl1.tiff r01c01f01p01-ch5sk1fk1fl1.tiff \n",
"\n",
" Image_FileName_OrigRNA \n",
"0 r01c01f01p01-ch3sk1fk1fl1.tiff \n",
"1 r01c01f01p01-ch3sk1fk1fl1.tiff \n",
"2 r01c01f01p01-ch3sk1fk1fl1.tiff "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"# view JUMP plate BR00117006 with images and overlaid outlines for segmentation\n",
"CytoDataFrame(\n",
" data=f\"{jump_data_path}/BR00117006_shrunken.parquet\",\n",
" data_context_dir=f\"{jump_data_path}/images/orig\",\n",
" data_outline_context_dir=f\"{jump_data_path}/images/outlines\",\n",
")[\n",
" [\n",
" \"Metadata_ImageNumber\",\n",
" \"Cells_Number_Object_Number\",\n",
" \"Image_FileName_OrigAGP\",\n",
" \"Image_FileName_OrigDNA\",\n",
" \"Image_FileName_OrigRNA\",\n",
" ]\n",
"][:3]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "365f68d0-3b66-4fb8-a381-8712dc43188c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 204 ms, sys: 128 ms, total: 333 ms\n",
"Wall time: 103 ms\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
" | \n",
" Metadata_ImageNumber | \n",
" Metadata_Cells_Number_Object_Number | \n",
" Image_FileName_GFP | \n",
" Image_FileName_RFP | \n",
" Image_FileName_DAPI | \n",
"
\n",
" \n",
" \n",
" \n",
" 353 | \n",
" 31 | \n",
" 4 | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" 1564 | \n",
" 113 | \n",
" 17 | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" 1275 | \n",
" 94 | \n",
" 5 | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
" Metadata_ImageNumber Metadata_Cells_Number_Object_Number \\\n",
"353 31 4 \n",
"1564 113 17 \n",
"1275 94 5 \n",
"\n",
" Image_FileName_GFP Image_FileName_RFP Image_FileName_DAPI \n",
"353 B7_01_2_3_GFP_001.tif B7_01_3_3_RFP_001.tif B7_01_1_3_DAPI_001.tif \n",
"1564 H12_01_2_1_GFP_001.tif H12_01_3_1_RFP_001.tif H12_01_1_1_DAPI_001.tif \n",
"1275 F7_01_2_2_GFP_001.tif F7_01_3_2_RFP_001.tif F7_01_1_2_DAPI_001.tif "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"# view NF1 Cell Painting data with images\n",
"CytoDataFrame(\n",
" data=f\"{nf1_cellpainting_path}/Plate_2_with_image_data_shrunken.parquet\",\n",
" data_context_dir=f\"{nf1_cellpainting_path}/Plate_2_images\",\n",
")[\n",
" [\n",
" \"Metadata_ImageNumber\",\n",
" \"Metadata_Cells_Number_Object_Number\",\n",
" \"Image_FileName_GFP\",\n",
" \"Image_FileName_RFP\",\n",
" \"Image_FileName_DAPI\",\n",
" ]\n",
"][:3]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "de89c992-ea92-4565-b03b-3b27ae46d28c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 214 ms, sys: 131 ms, total: 345 ms\n",
"Wall time: 105 ms\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
" | \n",
" Metadata_ImageNumber | \n",
" Metadata_Cells_Number_Object_Number | \n",
" Image_FileName_GFP | \n",
" Image_FileName_RFP | \n",
" Image_FileName_DAPI | \n",
"
\n",
" \n",
" \n",
" \n",
" 353 | \n",
" 31 | \n",
" 4 | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" 1564 | \n",
" 113 | \n",
" 17 | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" 1275 | \n",
" 94 | \n",
" 5 | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
" Metadata_ImageNumber Metadata_Cells_Number_Object_Number \\\n",
"353 31 4 \n",
"1564 113 17 \n",
"1275 94 5 \n",
"\n",
" Image_FileName_GFP Image_FileName_RFP Image_FileName_DAPI \n",
"353 B7_01_2_3_GFP_001.tif B7_01_3_3_RFP_001.tif B7_01_1_3_DAPI_001.tif \n",
"1564 H12_01_2_1_GFP_001.tif H12_01_3_1_RFP_001.tif H12_01_1_1_DAPI_001.tif \n",
"1275 F7_01_2_2_GFP_001.tif F7_01_3_2_RFP_001.tif F7_01_1_2_DAPI_001.tif "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"# view NF1 Cell Painting data with images and overlaid outlines from masks\n",
"CytoDataFrame(\n",
" data=f\"{nf1_cellpainting_path}/Plate_2_with_image_data_shrunken.parquet\",\n",
" data_context_dir=f\"{nf1_cellpainting_path}/Plate_2_images\",\n",
" data_mask_context_dir=f\"{nf1_cellpainting_path}/Plate_2_masks\",\n",
")[\n",
" [\n",
" \"Metadata_ImageNumber\",\n",
" \"Metadata_Cells_Number_Object_Number\",\n",
" \"Image_FileName_GFP\",\n",
" \"Image_FileName_RFP\",\n",
" \"Image_FileName_DAPI\",\n",
" ]\n",
"][:3]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "5a5304db-7dac-4f45-aa55-dd3f50299c60",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 87.6 ms, sys: 49 ms, total: 137 ms\n",
"Wall time: 47.1 ms\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
" | \n",
" Metadata_ImageNumber | \n",
" Nuclei_Number_Object_Number | \n",
" Image_FileName_A647 | \n",
" Image_FileName_DAPI | \n",
" Image_FileName_GOLD | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" slide1_A1_M10_CH1_Z09_illumcorrect.tiff | \n",
" | \n",
" slide1_A1_M10_CH2_Z09_illumcorrect.tiff | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" slide1_A1_M10_CH1_Z09_illumcorrect.tiff | \n",
" | \n",
" slide1_A1_M10_CH2_Z09_illumcorrect.tiff | \n",
"
\n",
" \n",
" 2 | \n",
" 1 | \n",
" 3 | \n",
" slide1_A1_M10_CH1_Z09_illumcorrect.tiff | \n",
" | \n",
" slide1_A1_M10_CH2_Z09_illumcorrect.tiff | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
" Metadata_ImageNumber Nuclei_Number_Object_Number \\\n",
"0 1 1 \n",
"1 1 2 \n",
"2 1 3 \n",
"\n",
" Image_FileName_A647 \\\n",
"0 slide1_A1_M10_CH1_Z09_illumcorrect.tiff \n",
"1 slide1_A1_M10_CH1_Z09_illumcorrect.tiff \n",
"2 slide1_A1_M10_CH1_Z09_illumcorrect.tiff \n",
"\n",
" Image_FileName_DAPI \\\n",
"0 slide1_A1_M10_CH0_Z09_illumcorrect.tiff \n",
"1 slide1_A1_M10_CH0_Z09_illumcorrect.tiff \n",
"2 slide1_A1_M10_CH0_Z09_illumcorrect.tiff \n",
"\n",
" Image_FileName_GOLD \n",
"0 slide1_A1_M10_CH2_Z09_illumcorrect.tiff \n",
"1 slide1_A1_M10_CH2_Z09_illumcorrect.tiff \n",
"2 slide1_A1_M10_CH2_Z09_illumcorrect.tiff "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"# view nuclear speckles data with images and overlaid outlines from masks\n",
"CytoDataFrame(\n",
" data=f\"{nuclear_speckles_path}/test_slide1_converted.parquet\",\n",
" data_context_dir=f\"{nuclear_speckles_path}/images/plate1\",\n",
" data_mask_context_dir=f\"{nuclear_speckles_path}/masks/plate1\",\n",
")[\n",
" [\n",
" \"Metadata_ImageNumber\",\n",
" \"Nuclei_Number_Object_Number\",\n",
" \"Image_FileName_A647\",\n",
" \"Image_FileName_DAPI\",\n",
" \"Image_FileName_GOLD\",\n",
" ]\n",
"][:3]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "cfd6358a-bf6c-4498-963a-bcf3f571993f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 253 ms, sys: 159 ms, total: 412 ms\n",
"Wall time: 117 ms\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
" | \n",
" Metadata_ImageNumber | \n",
" Metadata_Nuclei_Number_Object_Number | \n",
" Image_FileName_OrigAGP | \n",
" Image_FileName_OrigDNA | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 3 | \n",
" 3 | \n",
" | \n",
" | \n",
"
\n",
" \n",
" 1 | \n",
" 3 | \n",
" 4 | \n",
" | \n",
" | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" 6 | \n",
" | \n",
" | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
" Metadata_ImageNumber Metadata_Nuclei_Number_Object_Number \\\n",
"0 3 3 \n",
"1 3 4 \n",
"2 3 6 \n",
"\n",
" Image_FileName_OrigAGP Image_FileName_OrigDNA \n",
"0 r03c03f03p01-ch3sk1fk1fl1.tiff r03c03f03p01-ch5sk1fk1fl1.tiff \n",
"1 r03c03f03p01-ch3sk1fk1fl1.tiff r03c03f03p01-ch5sk1fk1fl1.tiff \n",
"2 r03c03f03p01-ch3sk1fk1fl1.tiff r03c03f03p01-ch5sk1fk1fl1.tiff "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"# view ALSF pediatric cancer atlas plate BR00143976 with images\n",
"CytoDataFrame(\n",
" data=f\"{pediatric_cancer_atlas_path}/BR00143976_shrunken.parquet\",\n",
" data_context_dir=f\"{pediatric_cancer_atlas_path}/images/orig\",\n",
" data_outline_context_dir=f\"{pediatric_cancer_atlas_path}/images/outlines\",\n",
" segmentation_file_regex={\n",
" r\"CellsOutlines_BR(\\d+)_C(\\d{2})_\\d+\\.tiff\": r\".*ch3.*\\.tiff\",\n",
" r\"NucleiOutlines_BR(\\d+)_C(\\d{2})_\\d+\\.tiff\": r\".*ch5.*\\.tiff\",\n",
" },\n",
")[\n",
" [\n",
" \"Metadata_ImageNumber\",\n",
" \"Metadata_Nuclei_Number_Object_Number\",\n",
" \"Image_FileName_OrigAGP\",\n",
" \"Image_FileName_OrigDNA\",\n",
" ]\n",
"][:3]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}