{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# `coSMicQC` in a nutshell\n", "\n", "This notebook demonstrates various capabilities of `coSMicQC` using examples." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Metadata_ImageNumberImage_Metadata_Plate_xMetadata_number_of_singlecellsImage_Metadata_Site_xImage_Metadata_Well_xMetadata_Cells_Number_Object_NumberMetadata_Cytoplasm_Parent_CellsMetadata_Cytoplasm_Parent_NucleiMetadata_Nuclei_Number_Object_NumberCytoplasm_AreaShape_Area...Image_Threshold_SumOfEntropies_CellsImage_Threshold_SumOfEntropies_NucleiImage_Threshold_WeightedVariance_CellsImage_Threshold_WeightedVariance_NucleiImage_URL_DAPIImage_URL_GFPImage_URL_RFPImage_Width_DAPIImage_Width_GFPImage_Width_RFP
01Plate_2441A12112221024.0...-12.181288-11.6999930.9926240.657791file:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/A12_01_1_1_DAPI_001.tiffile:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/A12_01_2_1_GFP_001.tiffile:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/A12_01_3_1_RFP_001.tif122412241224
11Plate_2441A12447712754.0...-12.181288-11.6999930.9926240.657791file:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/A12_01_1_1_DAPI_001.tiffile:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/A12_01_2_1_GFP_001.tiffile:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/A12_01_3_1_RFP_001.tif122412241224
21Plate_2441A1277101023976.0...-12.181288-11.6999930.9926240.657791file:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/A12_01_1_1_DAPI_001.tiffile:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/A12_01_2_1_GFP_001.tiffile:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/A12_01_3_1_RFP_001.tif122412241224
31Plate_2441A1288121219374.0...-12.181288-11.6999930.9926240.657791file:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/A12_01_1_1_DAPI_001.tiffile:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/A12_01_2_1_GFP_001.tiffile:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/A12_01_3_1_RFP_001.tif122412241224
41Plate_2441A1299131327385.0...-12.181288-11.6999930.9926240.657791file:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/A12_01_1_1_DAPI_001.tiffile:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/A12_01_2_1_GFP_001.tiffile:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/A12_01_3_1_RFP_001.tif122412241224
..................................................................
1709128Plate_2594H71010141424942.0...-12.566582-11.6330431.6243100.545186file:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/H7_01_1_4_DAPI_001.tiffile:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/H7_01_2_4_GFP_001.tiffile:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/H7_01_3_4_RFP_001.tif122412241224
1710128Plate_2594H7111115156627.0...-12.566582-11.6330431.6243100.545186file:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/H7_01_1_4_DAPI_001.tiffile:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/H7_01_2_4_GFP_001.tiffile:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/H7_01_3_4_RFP_001.tif122412241224
1711128Plate_2594H71212161611216.0...-12.566582-11.6330431.6243100.545186file:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/H7_01_1_4_DAPI_001.tiffile:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/H7_01_2_4_GFP_001.tiffile:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/H7_01_3_4_RFP_001.tif122412241224
1712128Plate_2594H71313171715279.0...-12.566582-11.6330431.6243100.545186file:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/H7_01_1_4_DAPI_001.tiffile:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/H7_01_2_4_GFP_001.tiffile:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/H7_01_3_4_RFP_001.tif122412241224
1713128Plate_2594H7141420207106.0...-12.566582-11.6330431.6243100.545186file:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/H7_01_1_4_DAPI_001.tiffile:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/H7_01_2_4_GFP_001.tiffile:/home/jenna/nf1_cellpainting_data/1.cellprofiler_ic/Corrected_Images/Corrected_Plate_2/H7_01_3_4_RFP_001.tif122412241224
\n", "

1714 rows × 2076 columns

" ], "text/plain": [ " Metadata_ImageNumber Image_Metadata_Plate_x \\\n", "0 1 Plate_2 \n", "1 1 Plate_2 \n", "2 1 Plate_2 \n", "3 1 Plate_2 \n", "4 1 Plate_2 \n", "... ... ... \n", "1709 128 Plate_2 \n", "1710 128 Plate_2 \n", "1711 128 Plate_2 \n", "1712 128 Plate_2 \n", "1713 128 Plate_2 \n", "\n", " Metadata_number_of_singlecells Image_Metadata_Site_x \\\n", "0 44 1 \n", "1 44 1 \n", "2 44 1 \n", "3 44 1 \n", "4 44 1 \n", "... ... ... \n", "1709 59 4 \n", "1710 59 4 \n", "1711 59 4 \n", "1712 59 4 \n", "1713 59 4 \n", "\n", " Image_Metadata_Well_x Metadata_Cells_Number_Object_Number \\\n", "0 A12 1 \n", "1 A12 4 \n", "2 A12 7 \n", "3 A12 8 \n", "4 A12 9 \n", "... ... ... \n", "1709 H7 10 \n", "1710 H7 11 \n", "1711 H7 12 \n", "1712 H7 13 \n", "1713 H7 14 \n", "\n", " Metadata_Cytoplasm_Parent_Cells Metadata_Cytoplasm_Parent_Nuclei \\\n", "0 1 2 \n", "1 4 7 \n", "2 7 10 \n", "3 8 12 \n", "4 9 13 \n", "... ... ... \n", "1709 10 14 \n", "1710 11 15 \n", "1711 12 16 \n", "1712 13 17 \n", "1713 14 20 \n", "\n", " Metadata_Nuclei_Number_Object_Number Cytoplasm_AreaShape_Area ... \\\n", "0 2 21024.0 ... \n", "1 7 12754.0 ... \n", "2 10 23976.0 ... \n", "3 12 19374.0 ... \n", "4 13 27385.0 ... \n", "... ... ... ... \n", "1709 14 24942.0 ... \n", "1710 15 6627.0 ... \n", "1711 16 11216.0 ... \n", "1712 17 15279.0 ... \n", "1713 20 7106.0 ... \n", "\n", " Image_Threshold_SumOfEntropies_Cells \\\n", "0 -12.181288 \n", "1 -12.181288 \n", "2 -12.181288 \n", "3 -12.181288 \n", "4 -12.181288 \n", "... ... \n", "1709 -12.566582 \n", "1710 -12.566582 \n", "1711 -12.566582 \n", "1712 -12.566582 \n", "1713 -12.566582 \n", "\n", " Image_Threshold_SumOfEntropies_Nuclei \\\n", "0 -11.699993 \n", "1 -11.699993 \n", "2 -11.699993 \n", "3 -11.699993 \n", "4 -11.699993 \n", "... ... \n", "1709 -11.633043 \n", "1710 -11.633043 \n", "1711 -11.633043 \n", "1712 -11.633043 \n", "1713 -11.633043 \n", "\n", " Image_Threshold_WeightedVariance_Cells \\\n", "0 0.992624 \n", "1 0.992624 \n", "2 0.992624 \n", "3 0.992624 \n", "4 0.992624 \n", "... ... \n", "1709 1.624310 \n", "1710 1.624310 \n", "1711 1.624310 \n", "1712 1.624310 \n", "1713 1.624310 \n", "\n", " Image_Threshold_WeightedVariance_Nuclei \\\n", "0 0.657791 \n", "1 0.657791 \n", "2 0.657791 \n", "3 0.657791 \n", "4 0.657791 \n", "... ... \n", "1709 0.545186 \n", "1710 0.545186 \n", "1711 0.545186 \n", "1712 0.545186 \n", "1713 0.545186 \n", "\n", " Image_URL_DAPI \\\n", "0 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "1 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "2 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "3 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "4 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "... ... \n", "1709 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "1710 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "1711 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "1712 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "1713 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "\n", " Image_URL_GFP \\\n", "0 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "1 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "2 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "3 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "4 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "... ... \n", "1709 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "1710 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "1711 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "1712 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "1713 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "\n", " Image_URL_RFP Image_Width_DAPI \\\n", "0 file:/home/jenna/nf1_cellpainting_data/1.cellp... 1224 \n", "1 file:/home/jenna/nf1_cellpainting_data/1.cellp... 1224 \n", "2 file:/home/jenna/nf1_cellpainting_data/1.cellp... 1224 \n", "3 file:/home/jenna/nf1_cellpainting_data/1.cellp... 1224 \n", "4 file:/home/jenna/nf1_cellpainting_data/1.cellp... 1224 \n", "... ... ... \n", "1709 file:/home/jenna/nf1_cellpainting_data/1.cellp... 1224 \n", "1710 file:/home/jenna/nf1_cellpainting_data/1.cellp... 1224 \n", "1711 file:/home/jenna/nf1_cellpainting_data/1.cellp... 1224 \n", "1712 file:/home/jenna/nf1_cellpainting_data/1.cellp... 1224 \n", "1713 file:/home/jenna/nf1_cellpainting_data/1.cellp... 1224 \n", "\n", " Image_Width_GFP Image_Width_RFP \n", "0 1224 1224 \n", "1 1224 1224 \n", "2 1224 1224 \n", "3 1224 1224 \n", "4 1224 1224 \n", "... ... ... \n", "1709 1224 1224 \n", "1710 1224 1224 \n", "1711 1224 1224 \n", "1712 1224 1224 \n", "1713 1224 1224 \n", "\n", "[1714 rows x 2076 columns]" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pathlib\n", "\n", "import pandas as pd\n", "from cytodataframe import CytoDataFrame\n", "\n", "import cosmicqc\n", "\n", "# set a path for the parquet-based dataset\n", "# (in this case, CellProfiler SQLite data processed by CytoTable)\n", "data_path = (\n", " \"../../../tests/data/cytotable/NF1_cellpainting_data/\"\n", " \"Plate_2_with_image_data.parquet\"\n", ")\n", "\n", "# set a context directory for images associated with the dataset\n", "image_context_dir = pathlib.Path(data_path).parent / \"Plate_2_images\"\n", "mask_context_dir = pathlib.Path(data_path).parent / \"Plate_2_masks\"\n", "\n", "# create a cosmicqc CytoDataFrame (single-cell DataFrame)\n", "scdf = CytoDataFrame(\n", " data=data_path,\n", " data_context_dir=image_context_dir,\n", " data_mask_context_dir=mask_context_dir,\n", ")\n", "\n", "# display the dataframe\n", "scdf" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 False\n", "1085 False\n", "1083 False\n", "1082 False\n", "1080 False\n", " ... \n", "572 True\n", "571 True\n", "567 True\n", "280 True\n", "856 True\n", "Name: cqc.custom.Z_Score.Nuclei_AreaShape_Area, Length: 1714, dtype: bool" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Identify which rows include outliers for a given threshold definition\n", "# which references a column name and a z-score number which is considered\n", "# the limit.\n", "cosmicqc.analyze.identify_outliers(\n", " df=scdf,\n", " feature_thresholds={\"Nuclei_AreaShape_Area\": -1},\n", ").sort_values()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of outliers: 328 (19.14%)\n", "Outliers Range:\n", "Nuclei_AreaShape_Area Min: 734.0\n", "Nuclei_AreaShape_Area Max: 1904.0\n" ] }, { "data": { "text/plain": [ " Nuclei_AreaShape_Area Metadata_ImageNumber Image_Metadata_Plate_x\n", "23 921.0 2 Plate_2\n", "28 845.0 2 Plate_2\n", "29 1024.0 2 Plate_2\n", "32 787.0 2 Plate_2\n", "37 1347.0 2 Plate_2\n", "... ... ... ...\n", "1682 1497.0 127 Plate_2\n", "1689 1794.0 127 Plate_2\n", "1692 1732.0 127 Plate_2\n", "1699 1149.0 127 Plate_2\n", "1707 1594.0 128 Plate_2\n", "\n", "[328 rows x 3 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Show the number of outliers given a column name and a specified threshold\n", "# via the `feature_thresholds` parameter and the `find_outliers` function.\n", "cosmicqc.analyze.find_outliers(\n", " df=scdf,\n", " metadata_columns=[\"Metadata_ImageNumber\", \"Image_Metadata_Plate_x\"],\n", " feature_thresholds={\"Nuclei_AreaShape_Area\": -1},\n", ")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cqc.small_and_low_formfactor_nuclei.Z_Score.Nuclei_AreaShape_Areacqc.small_and_low_formfactor_nuclei.Z_Score.Nuclei_AreaShape_FormFactorcqc.small_and_low_formfactor_nuclei.is_outliercqc.elongated_nuclei.Z_Score.Nuclei_AreaShape_Eccentricitycqc.elongated_nuclei.is_outliercqc.large_nuclei.Z_Score.Nuclei_AreaShape_Areacqc.large_nuclei.Z_Score.Nuclei_AreaShape_FormFactorcqc.large_nuclei.is_outlier
00.8488200.219903False0.498274False0.8488200.219903False
1-0.252521-1.280795False-0.659400False-0.252521-1.280795False
2-0.402491-0.325652False0.819165False-0.402491-0.325652False
30.329549-0.268920False0.961218False0.329549-0.268920False
41.1534460.028845False-0.372891False1.1534460.028845False
...........................
17090.598557-0.280063False0.923075False0.598557-0.280063False
1710-0.7164900.068293False0.650830False-0.7164900.068293False
17111.1871890.833264False-0.752359False1.1871890.833264False
1712-0.6996190.534479False-0.747030False-0.6996190.534479False
1713-0.9901850.356614False-1.309290False-0.9901850.356614False
\n", "

1714 rows × 8 columns

\n", "
" ], "text/plain": [ " cqc.small_and_low_formfactor_nuclei.Z_Score.Nuclei_AreaShape_Area \\\n", "0 0.848820 \n", "1 -0.252521 \n", "2 -0.402491 \n", "3 0.329549 \n", "4 1.153446 \n", "... ... \n", "1709 0.598557 \n", "1710 -0.716490 \n", "1711 1.187189 \n", "1712 -0.699619 \n", "1713 -0.990185 \n", "\n", " cqc.small_and_low_formfactor_nuclei.Z_Score.Nuclei_AreaShape_FormFactor \\\n", "0 0.219903 \n", "1 -1.280795 \n", "2 -0.325652 \n", "3 -0.268920 \n", "4 0.028845 \n", "... ... \n", "1709 -0.280063 \n", "1710 0.068293 \n", "1711 0.833264 \n", "1712 0.534479 \n", "1713 0.356614 \n", "\n", " cqc.small_and_low_formfactor_nuclei.is_outlier \\\n", "0 False \n", "1 False \n", "2 False \n", "3 False \n", "4 False \n", "... ... \n", "1709 False \n", "1710 False \n", "1711 False \n", "1712 False \n", "1713 False \n", "\n", " cqc.elongated_nuclei.Z_Score.Nuclei_AreaShape_Eccentricity \\\n", "0 0.498274 \n", "1 -0.659400 \n", "2 0.819165 \n", "3 0.961218 \n", "4 -0.372891 \n", "... ... \n", "1709 0.923075 \n", "1710 0.650830 \n", "1711 -0.752359 \n", "1712 -0.747030 \n", "1713 -1.309290 \n", "\n", " cqc.elongated_nuclei.is_outlier \\\n", "0 False \n", "1 False \n", "2 False \n", "3 False \n", "4 False \n", "... ... \n", "1709 False \n", "1710 False \n", "1711 False \n", "1712 False \n", "1713 False \n", "\n", " cqc.large_nuclei.Z_Score.Nuclei_AreaShape_Area \\\n", "0 0.848820 \n", "1 -0.252521 \n", "2 -0.402491 \n", "3 0.329549 \n", "4 1.153446 \n", "... ... \n", "1709 0.598557 \n", "1710 -0.716490 \n", "1711 1.187189 \n", "1712 -0.699619 \n", "1713 -0.990185 \n", "\n", " cqc.large_nuclei.Z_Score.Nuclei_AreaShape_FormFactor \\\n", "0 0.219903 \n", "1 -1.280795 \n", "2 -0.325652 \n", "3 -0.268920 \n", "4 0.028845 \n", "... ... \n", "1709 -0.280063 \n", "1710 0.068293 \n", "1711 0.833264 \n", "1712 0.534479 \n", "1713 0.356614 \n", "\n", " cqc.large_nuclei.is_outlier \n", "0 False \n", "1 False \n", "2 False \n", "3 False \n", "4 False \n", "... ... \n", "1709 False \n", "1710 False \n", "1711 False \n", "1712 False \n", "1713 False \n", "\n", "[1714 rows x 8 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# create a labeled dataset which includes z-scores and whether those scores\n", "# are interpreted as outliers or inliers. We use pre-defined threshold sets\n", "# loaded from defaults (cosmicqc can accept user-defined thresholds too!).\n", "labeled_scdf = cosmicqc.analyze.label_outliers(\n", " df=scdf,\n", " include_threshold_scores=True,\n", ")\n", "\n", "# show the dataframe rows with only the last 8 columns\n", "# (added from the label_outliers function)\n", "labeled_scdf.iloc[:, -8:]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Metadata_ImageNumberMetadata_Cells_Number_Object_Numbercqc.large_nuclei.is_outlierImage_FileName_GFPImage_FileName_RFPImage_FileName_DAPI
699502True
155711310True
16771269True
457346True
882616True
.....................
5704513False
5694510False
568459False
567458False
171312814False
\n", "

1714 rows × 6 columns

" ], "text/plain": [ " Metadata_ImageNumber Metadata_Cells_Number_Object_Number \\\n", "699 50 2 \n", "1557 113 10 \n", "1677 126 9 \n", "457 34 6 \n", "882 61 6 \n", "... ... ... \n", "570 45 13 \n", "569 45 10 \n", "568 45 9 \n", "567 45 8 \n", "1713 128 14 \n", "\n", " cqc.large_nuclei.is_outlier Image_FileName_GFP \\\n", "699 True D12_01_2_2_GFP_001.tif \n", "1557 True H12_01_2_1_GFP_001.tif \n", "1677 True H7_01_2_2_GFP_001.tif \n", "457 True C12_01_2_2_GFP_001.tif \n", "882 True D7_01_2_1_GFP_001.tif \n", "... ... ... \n", "570 False C7_01_2_1_GFP_001.tif \n", "569 False C7_01_2_1_GFP_001.tif \n", "568 False C7_01_2_1_GFP_001.tif \n", "567 False C7_01_2_1_GFP_001.tif \n", "1713 False H7_01_2_4_GFP_001.tif \n", "\n", " Image_FileName_RFP Image_FileName_DAPI \n", "699 D12_01_3_2_RFP_001.tif D12_01_1_2_DAPI_001.tif \n", "1557 H12_01_3_1_RFP_001.tif H12_01_1_1_DAPI_001.tif \n", "1677 H7_01_3_2_RFP_001.tif H7_01_1_2_DAPI_001.tif \n", "457 C12_01_3_2_RFP_001.tif C12_01_1_2_DAPI_001.tif \n", "882 D7_01_3_1_RFP_001.tif D7_01_1_1_DAPI_001.tif \n", "... ... ... \n", "570 C7_01_3_1_RFP_001.tif C7_01_1_1_DAPI_001.tif \n", "569 C7_01_3_1_RFP_001.tif C7_01_1_1_DAPI_001.tif \n", "568 C7_01_3_1_RFP_001.tif C7_01_1_1_DAPI_001.tif \n", "567 C7_01_3_1_RFP_001.tif C7_01_1_1_DAPI_001.tif \n", "1713 H7_01_3_4_RFP_001.tif H7_01_1_4_DAPI_001.tif \n", "\n", "[1714 rows x 6 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# show cropped images through CytoDataFrame from the dataset to help analyze outliers\n", "labeled_scdf.sort_values(by=\"cqc.large_nuclei.is_outlier\", ascending=False)[\n", " [\n", " \"Metadata_ImageNumber\",\n", " \"Metadata_Cells_Number_Object_Number\",\n", " \"cqc.large_nuclei.is_outlier\",\n", " \"Image_FileName_GFP\",\n", " \"Image_FileName_RFP\",\n", " \"Image_FileName_DAPI\",\n", " ]\n", "]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Metadata_ImageNumberImage_Metadata_Plate_xMetadata_number_of_singlecellsImage_Metadata_Site_xImage_Metadata_Well_xMetadata_Cells_Number_Object_NumberMetadata_Cytoplasm_Parent_CellsMetadata_Cytoplasm_Parent_NucleiMetadata_Nuclei_Number_Object_NumberCytoplasm_AreaShape_Area...Image_Threshold_SumOfEntropies_CellsImage_Threshold_SumOfEntropies_NucleiImage_Threshold_WeightedVariance_CellsImage_Threshold_WeightedVariance_NucleiImage_URL_DAPIImage_URL_GFPImage_URL_RFPImage_Width_DAPIImage_Width_GFPImage_Width_RFP
01Plate_2441A12112221024.0...-12.181288-11.6999930.9926240.657791file:/home/jenna/nf1_cellpainting_data/1.cellp...file:/home/jenna/nf1_cellpainting_data/1.cellp...file:/home/jenna/nf1_cellpainting_data/1.cellp...122412241224
11Plate_2441A12447712754.0...-12.181288-11.6999930.9926240.657791file:/home/jenna/nf1_cellpainting_data/1.cellp...file:/home/jenna/nf1_cellpainting_data/1.cellp...file:/home/jenna/nf1_cellpainting_data/1.cellp...122412241224
21Plate_2441A1277101023976.0...-12.181288-11.6999930.9926240.657791file:/home/jenna/nf1_cellpainting_data/1.cellp...file:/home/jenna/nf1_cellpainting_data/1.cellp...file:/home/jenna/nf1_cellpainting_data/1.cellp...122412241224
31Plate_2441A1288121219374.0...-12.181288-11.6999930.9926240.657791file:/home/jenna/nf1_cellpainting_data/1.cellp...file:/home/jenna/nf1_cellpainting_data/1.cellp...file:/home/jenna/nf1_cellpainting_data/1.cellp...122412241224
41Plate_2441A1299131327385.0...-12.181288-11.6999930.9926240.657791file:/home/jenna/nf1_cellpainting_data/1.cellp...file:/home/jenna/nf1_cellpainting_data/1.cellp...file:/home/jenna/nf1_cellpainting_data/1.cellp...122412241224
..................................................................
1709128Plate_2594H71010141424942.0...-12.566582-11.6330431.6243100.545186file:/home/jenna/nf1_cellpainting_data/1.cellp...file:/home/jenna/nf1_cellpainting_data/1.cellp...file:/home/jenna/nf1_cellpainting_data/1.cellp...122412241224
1710128Plate_2594H7111115156627.0...-12.566582-11.6330431.6243100.545186file:/home/jenna/nf1_cellpainting_data/1.cellp...file:/home/jenna/nf1_cellpainting_data/1.cellp...file:/home/jenna/nf1_cellpainting_data/1.cellp...122412241224
1711128Plate_2594H71212161611216.0...-12.566582-11.6330431.6243100.545186file:/home/jenna/nf1_cellpainting_data/1.cellp...file:/home/jenna/nf1_cellpainting_data/1.cellp...file:/home/jenna/nf1_cellpainting_data/1.cellp...122412241224
1712128Plate_2594H71313171715279.0...-12.566582-11.6330431.6243100.545186file:/home/jenna/nf1_cellpainting_data/1.cellp...file:/home/jenna/nf1_cellpainting_data/1.cellp...file:/home/jenna/nf1_cellpainting_data/1.cellp...122412241224
1713128Plate_2594H7141420207106.0...-12.566582-11.6330431.6243100.545186file:/home/jenna/nf1_cellpainting_data/1.cellp...file:/home/jenna/nf1_cellpainting_data/1.cellp...file:/home/jenna/nf1_cellpainting_data/1.cellp...122412241224
\n", "

1714 rows × 2076 columns

\n", "
" ], "text/plain": [ " Metadata_ImageNumber Image_Metadata_Plate_x \\\n", "0 1 Plate_2 \n", "1 1 Plate_2 \n", "2 1 Plate_2 \n", "3 1 Plate_2 \n", "4 1 Plate_2 \n", "... ... ... \n", "1709 128 Plate_2 \n", "1710 128 Plate_2 \n", "1711 128 Plate_2 \n", "1712 128 Plate_2 \n", "1713 128 Plate_2 \n", "\n", " Metadata_number_of_singlecells Image_Metadata_Site_x \\\n", "0 44 1 \n", "1 44 1 \n", "2 44 1 \n", "3 44 1 \n", "4 44 1 \n", "... ... ... \n", "1709 59 4 \n", "1710 59 4 \n", "1711 59 4 \n", "1712 59 4 \n", "1713 59 4 \n", "\n", " Image_Metadata_Well_x Metadata_Cells_Number_Object_Number \\\n", "0 A12 1 \n", "1 A12 4 \n", "2 A12 7 \n", "3 A12 8 \n", "4 A12 9 \n", "... ... ... \n", "1709 H7 10 \n", "1710 H7 11 \n", "1711 H7 12 \n", "1712 H7 13 \n", "1713 H7 14 \n", "\n", " Metadata_Cytoplasm_Parent_Cells Metadata_Cytoplasm_Parent_Nuclei \\\n", "0 1 2 \n", "1 4 7 \n", "2 7 10 \n", "3 8 12 \n", "4 9 13 \n", "... ... ... \n", "1709 10 14 \n", "1710 11 15 \n", "1711 12 16 \n", "1712 13 17 \n", "1713 14 20 \n", "\n", " Metadata_Nuclei_Number_Object_Number Cytoplasm_AreaShape_Area ... \\\n", "0 2 21024.0 ... \n", "1 7 12754.0 ... \n", "2 10 23976.0 ... \n", "3 12 19374.0 ... \n", "4 13 27385.0 ... \n", "... ... ... ... \n", "1709 14 24942.0 ... \n", "1710 15 6627.0 ... \n", "1711 16 11216.0 ... \n", "1712 17 15279.0 ... \n", "1713 20 7106.0 ... \n", "\n", " Image_Threshold_SumOfEntropies_Cells \\\n", "0 -12.181288 \n", "1 -12.181288 \n", "2 -12.181288 \n", "3 -12.181288 \n", "4 -12.181288 \n", "... ... \n", "1709 -12.566582 \n", "1710 -12.566582 \n", "1711 -12.566582 \n", "1712 -12.566582 \n", "1713 -12.566582 \n", "\n", " Image_Threshold_SumOfEntropies_Nuclei \\\n", "0 -11.699993 \n", "1 -11.699993 \n", "2 -11.699993 \n", "3 -11.699993 \n", "4 -11.699993 \n", "... ... \n", "1709 -11.633043 \n", "1710 -11.633043 \n", "1711 -11.633043 \n", "1712 -11.633043 \n", "1713 -11.633043 \n", "\n", " Image_Threshold_WeightedVariance_Cells \\\n", "0 0.992624 \n", "1 0.992624 \n", "2 0.992624 \n", "3 0.992624 \n", "4 0.992624 \n", "... ... \n", "1709 1.624310 \n", "1710 1.624310 \n", "1711 1.624310 \n", "1712 1.624310 \n", "1713 1.624310 \n", "\n", " Image_Threshold_WeightedVariance_Nuclei \\\n", "0 0.657791 \n", "1 0.657791 \n", "2 0.657791 \n", "3 0.657791 \n", "4 0.657791 \n", "... ... \n", "1709 0.545186 \n", "1710 0.545186 \n", "1711 0.545186 \n", "1712 0.545186 \n", "1713 0.545186 \n", "\n", " Image_URL_DAPI \\\n", "0 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "1 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "2 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "3 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "4 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "... ... \n", "1709 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "1710 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "1711 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "1712 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "1713 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "\n", " Image_URL_GFP \\\n", "0 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "1 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "2 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "3 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "4 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "... ... \n", "1709 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "1710 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "1711 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "1712 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "1713 file:/home/jenna/nf1_cellpainting_data/1.cellp... \n", "\n", " Image_URL_RFP Image_Width_DAPI \\\n", "0 file:/home/jenna/nf1_cellpainting_data/1.cellp... 1224 \n", "1 file:/home/jenna/nf1_cellpainting_data/1.cellp... 1224 \n", "2 file:/home/jenna/nf1_cellpainting_data/1.cellp... 1224 \n", "3 file:/home/jenna/nf1_cellpainting_data/1.cellp... 1224 \n", "4 file:/home/jenna/nf1_cellpainting_data/1.cellp... 1224 \n", "... ... ... \n", "1709 file:/home/jenna/nf1_cellpainting_data/1.cellp... 1224 \n", "1710 file:/home/jenna/nf1_cellpainting_data/1.cellp... 1224 \n", "1711 file:/home/jenna/nf1_cellpainting_data/1.cellp... 1224 \n", "1712 file:/home/jenna/nf1_cellpainting_data/1.cellp... 1224 \n", "1713 file:/home/jenna/nf1_cellpainting_data/1.cellp... 1224 \n", "\n", " Image_Width_GFP Image_Width_RFP \n", "0 1224 1224 \n", "1 1224 1224 \n", "2 1224 1224 \n", "3 1224 1224 \n", "4 1224 1224 \n", "... ... ... \n", "1709 1224 1224 \n", "1710 1224 1224 \n", "1711 1224 1224 \n", "1712 1224 1224 \n", "1713 1224 1224 \n", "\n", "[1714 rows x 2076 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# One can convert from cosmicqc.CytoDataFrame to pd.DataFrame's\n", "# (when or if needed!)\n", "df = pd.DataFrame(scdf)\n", "print(type(df))\n", "df" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 4 }