PyPI - spacr - Versions diffs - 0.3.61__py3-none-any.whl → 0.3.62__py3-none-any.whl - Mend

spacr 0.3.61py3-none-any.whl → 0.3.62py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

spacr/io.py CHANGED Viewed

@@ -1777,7 +1777,7 @@ def _read_and_join_tables(db_path, table_names=['cell', 'cytoplasm', 'nucleus',
         png_list_df['cell_id'] = png_list_df['cell_id'].str[1:].astype(int)
         png_list_df.rename(columns={'cell_id': 'object_label'}, inplace=True)
         if 'cell' in dataframes:
-            join_cols = ['object_label', 'plate', 'row_name', 'column_name']
+            join_cols = ['object_label', 'plate', 'row_name', 'column_name','field']
             dataframes['cell'] = pd.merge(dataframes['cell'], png_list_df, on=join_cols, how='left')
         else:
             print("Cell table not found in database tables.")
@@ -2276,7 +2276,7 @@ def _read_db(db_loc, tables):
     conn.close() # Close the connection
     return dfs
-def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=False, pathogen_limit=False):
+def _read_and_merge_data_v1(locs, tables, verbose=False, nuclei_limit=False, pathogen_limit=False):
     from .utils import _split_data
@@ -2443,7 +2443,135 @@ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=False, pathog
     if 'pathogen' in tables:
         obj_df_ls.append(pathogens)
-    return merged_df, obj_df_ls
+    return merged_df, obj_df_ls
+def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=10, pathogen_limit=10):
+    from .io import _read_db
+    from .utils import _split_data
+    # Initialize an empty dictionary to store DataFrames by table name
+    data_dict = {table: [] for table in tables}
+    # Extract plate DataFrames
+    for loc in locs:
+        db_dfs = _read_db(loc, tables)
+        for table, df in zip(tables, db_dfs):
+            data_dict[table].append(df)
+    # Concatenate rows across locations for each table
+    for table, dfs in data_dict.items():
+        if dfs:
+            data_dict[table] = pd.concat(dfs, axis=0)
+        if verbose:
+            print(f"{table}: {len(data_dict[table])}")
+    # Initialize merged DataFrame with 'cells' if available
+    merged_df = pd.DataFrame()
+    # Process each table
+    if 'cell' in data_dict:
+        cells = data_dict['cell'].copy()
+        cells = cells.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
+        cells = cells.assign(prcfo=lambda x: x['prcf'] + '_' + x['object_label'])
+        cells_g_df, metadata = _split_data(cells, 'prcfo', 'object_label')
+        merged_df = cells_g_df.copy()
+        if verbose:
+            print(f'cells: {len(cells)}, cells grouped: {len(cells_g_df)}')
+    if 'cytoplasm' in data_dict:
+        cytoplasms = data_dict['cytoplasm'].copy()
+        cytoplasms = cytoplasms.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
+        cytoplasms = cytoplasms.assign(prcfo=lambda x: x['prcf'] + '_' + x['object_label'])
+        if not 'cell' in data_dict:
+            merged_df, metadata = _split_data(cytoplasms, 'prcfo', 'object_label')
+            if verbose:
+                print(f'nucleus: {len(cytoplasms)}, cytoplasms grouped: {len(merged_df)}')
+        else:
+            cytoplasms_g_df, _ = _split_data(cytoplasms, 'prcfo', 'object_label')
+            merged_df = merged_df.merge(cytoplasms_g_df, left_index=True, right_index=True)
+            if verbose:
+                print(f'cytoplasms: {len(cytoplasms)}, cytoplasms grouped: {len(cytoplasms_g_df)}')
+    if 'nucleus' in data_dict:
+        nucleus = data_dict['nucleus'].copy()
+        nucleus = nucleus.dropna(subset=['cell_id'])
+        nucleus = nucleus.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
+        nucleus = nucleus.assign(cell_id=lambda x: 'o' + x['cell_id'].astype(int).astype(str))
+        nucleus = nucleus.assign(prcfo=lambda x: x['prcf'] + '_' + x['cell_id'])
+        nucleus['nucleus_prcfo_count'] = nucleus.groupby('prcfo')['prcfo'].transform('count')
+        if not nuclei_limit:
+            nucleus = nucleus[nucleus['nucleus_prcfo_count'] == 1]
+        if all(key not in data_dict for key in ['cell', 'cytoplasm']):
+            merged_df, metadata = _split_data(nucleus, 'prcfo', 'cell_id')
+            if verbose:
+                print(f'nucleus: {len(nucleus)}, nucleus grouped: {len(merged_df)}')
+        else:
+            nucleus_g_df, _ = _split_data(nucleus, 'prcfo', 'cell_id')
+            merged_df = merged_df.merge(nucleus_g_df, left_index=True, right_index=True)
+            if verbose:
+                print(f'nucleus: {len(nucleus)}, nucleus grouped: {len(nucleus_g_df)}')
+    if 'pathogen' in data_dict:
+        pathogens = data_dict['pathogen'].copy()
+        pathogens = pathogens.dropna(subset=['cell_id'])
+        pathogens = pathogens.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
+        pathogens = pathogens.assign(cell_id=lambda x: 'o' + x['cell_id'].astype(int).astype(str))
+        pathogens = pathogens.assign(prcfo=lambda x: x['prcf'] + '_' + x['cell_id'])
+        pathogens['pathogen_prcfo_count'] = pathogens.groupby('prcfo')['prcfo'].transform('count')
+        if isinstance(pathogen_limit, bool) and not pathogen_limit:
+            pathogens = pathogens[pathogens['pathogen_prcfo_count'] <= 1]
+        elif isinstance(pathogen_limit, (float, int)):
+            pathogens = pathogens[pathogens['pathogen_prcfo_count'] <= int(pathogen_limit)]
+        if all(key not in data_dict for key in ['cell', 'cytoplasm', 'nucleus']):
+            merged_df, metadata = _split_data(pathogens, 'prcfo', 'cell_id')
+            if verbose:
+                print(f'pathogens: {len(pathogens)}, pathogens grouped: {len(merged_df)}')
+        else:
+            pathogens_g_df, _ = _split_data(pathogens, 'prcfo', 'cell_id')
+            merged_df = merged_df.merge(pathogens_g_df, left_index=True, right_index=True)
+            if verbose:
+                print(f'pathogens: {len(pathogens)}, pathogens grouped: {len(pathogens_g_df)}')
+    if 'png_list' in data_dict:
+        png_list = data_dict['png_list'].copy()
+        png_list_g_df_numeric, png_list_g_df_non_numeric = _split_data(png_list, 'prcfo', 'cell_id')
+        png_list_g_df_non_numeric.drop(columns=['plate','row_name','column_name','field','file_name','cell_id', 'prcf'], inplace=True)
+        if verbose:
+            print(f'png_list: {len(png_list)}, png_list grouped: {len(png_list_g_df_numeric)}')
+        merged_df = merged_df.merge(png_list_g_df_numeric, left_index=True, right_index=True)
+        merged_df = merged_df.merge(png_list_g_df_non_numeric, left_index=True, right_index=True)
+    # Add prc (plate row column) and prcfo (plate row column field object) columns
+    metadata = metadata.assign(prc=lambda x: x['plate'] + '_' + x['row_name'] + '_' + x['column_name'])
+    cells_well = metadata.groupby('prc')['object_label'].nunique().reset_index(name='cells_per_well')
+    metadata = metadata.merge(cells_well, on='prc')
+    metadata = metadata.assign(prcfo=lambda x: x['plate'] + '_' + x['row_name'] + '_' + x['column_name'] + '_' + x['field'] + '_' + x['object_label'])
+    metadata.set_index('prcfo', inplace=True)
+    # Merge metadata with final merged DataFrame
+    merged_df = metadata.merge(merged_df, left_index=True, right_index=True).dropna(axis=1)
+    merged_df.drop(columns=['label_list_morphology', 'label_list_intensity'], errors='ignore', inplace=True)
+    if verbose:
+        print(f'Generated dataframe with: {len(merged_df.columns)} columns and {len(merged_df)} rows')
+    # Prepare object DataFrames for output
+    obj_df_ls = [data_dict[table] for table in ['cell', 'cytoplasm', 'nucleus', 'pathogen'] if table in data_dict]
+    return merged_df, obj_df_ls
 def _read_mask(mask_path):
     mask = imageio2.imread(mask_path)

spacr/utils.py CHANGED Viewed

@@ -1371,7 +1371,7 @@ def annotate_conditions(df, cells=None, cell_loc=None, pathogens=None, pathogen_
     return df
-def _split_data(df, group_by, object_type):
+def _split_data_v1(df, group_by, object_type):
     """
     Splits the input dataframe into numeric and non-numeric parts, groups them by the specified column,
     and returns the grouped dataframes.
@@ -1385,16 +1385,72 @@ def _split_data(df, group_by, object_type):
     grouped_numeric (pandas.DataFrame): The grouped dataframe containing numeric columns.
     grouped_non_numeric (pandas.DataFrame): The grouped dataframe containing non-numeric columns.
     """
+    if 'prcf' not in df.columns:
+        try:
+            df['prcf'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column_name'].astype(str) + '_' + df['field'].astype(str)
+        except Exception as e:
+            print(e)
     df['prcfo'] = df['prcf'] + '_' + df[object_type]
     df = df.set_index(group_by, inplace=False)
     df_numeric = df.select_dtypes(include=np.number)
     df_non_numeric = df.select_dtypes(exclude=np.number)
+    []
     grouped_numeric = df_numeric.groupby(df_numeric.index).mean()
     grouped_non_numeric = df_non_numeric.groupby(df_non_numeric.index).first()
     return pd.DataFrame(grouped_numeric), pd.DataFrame(grouped_non_numeric)
+def _split_data(df, group_by, object_type):
+    """
+    Splits the input dataframe into numeric and non-numeric parts, groups them by the specified column,
+    and returns the grouped dataframes with conditional aggregation.
+    Parameters:
+    df (pandas.DataFrame): The input dataframe.
+    group_by (str): The column name to group the dataframes by.
+    object_type (str): The column name to concatenate with 'prcf' to create a new column 'prcfo'.
+    Returns:
+    grouped_numeric (pandas.DataFrame): The grouped dataframe containing numeric columns with conditional aggregation.
+    grouped_non_numeric (pandas.DataFrame): The grouped dataframe containing non-numeric columns.
+    """
+    # Ensure 'prcf' column exists by concatenating specific columns
+    if 'prcf' not in df.columns:
+        try:
+            df['prcf'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column_name'].astype(str) + '_' + df['field'].astype(str)
+        except Exception as e:
+            print(e)
+    # Create the 'prcfo' column
+    df['prcfo'] = df['prcf'] + '_' + df[object_type]
+    df = df.set_index(group_by, inplace=False)
+    # Split the DataFrame into numeric and non-numeric parts
+    df_numeric = df.select_dtypes(include=np.number)
+    df_non_numeric = df.select_dtypes(exclude=np.number)
+    # Define keywords for columns to be summed instead of averaged
+    sum_keywords = ['area', 'perimeter', 'convex_area', 'bbox_area', 'filled_area', 'major_axis_length', 'minor_axis_length', 'equivalent_diameter']
+    # Create a dictionary for custom aggregation
+    agg_dict = {}
+    for column in df_numeric.columns:
+        if any(keyword in column for keyword in sum_keywords):
+            agg_dict[column] = 'sum'
+        else:
+            agg_dict[column] = 'mean'
+    # Apply custom aggregation
+    grouped_numeric = df_numeric.groupby(df_numeric.index).agg(agg_dict)
+    grouped_non_numeric = df_non_numeric.groupby(df_non_numeric.index).first()
+    return pd.DataFrame(grouped_numeric), pd.DataFrame(grouped_non_numeric)
 def _calculate_recruitment(df, channel):
     """
@@ -5184,7 +5240,7 @@ def group_feature_class(df, feature_groups=['cell', 'cytoplasm', 'nucleus', 'pat
         else:
             return None
-    from spacr.plot import spacrGraph
+    from .plot import spacrGraph
     df[name] = df['feature'].apply(lambda x: find_feature_class(x, feature_groups))

{spacr-0.3.61.dist-info → spacr-0.3.62.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: spacr
-Version: 0.3.61
+Version: 0.3.62
 Summary: Spatial phenotype analysis of crisp screens (SpaCr)
 Home-page: https://github.com/EinarOlafsson/spacr
 Author: Einar Birnir Olafsson

{spacr-0.3.61.dist-info → spacr-0.3.62.dist-info}/RECORD RENAMED Viewed

@@ -15,7 +15,7 @@ spacr/gui.py,sha256=ARyn9Q_g8HoP-cXh1nzMLVFCKqthY4v2u9yORyaQqQE,8230
 spacr/gui_core.py,sha256=N7R7yvfK_dJhOReM_kW3Ci8Bokhi1OzsxeKqvSGdvV4,41460
 spacr/gui_elements.py,sha256=EKlvEg_4_je7jciEdR3NTgPrcTraowa2e2RUt-xqd6M,138254
 spacr/gui_utils.py,sha256=u9RoIOWpAXFEOnUlLpMQZrc1pWSg6omZsJMIhJdRv_g,41211
-spacr/io.py,sha256=p-ky3yjtoSSvdsktPXVy_dx8dHgMeWqUZOtOwwfrk2o,136108
+spacr/io.py,sha256=0cBVmhqMaPkdEXib5Vhp19FC_1qfaK_NgtoImuDuwGU,142664
 spacr/logger.py,sha256=lJhTqt-_wfAunCPl93xE65Wr9Y1oIHJWaZMjunHUeIw,1538
 spacr/measure.py,sha256=2lK-ZcTxLM-MpXV1oZnucRD9iz5aprwahRKw9IEqshg,55085
 spacr/mediar.py,sha256=FwLvbLQW5LQzPgvJZG8Lw7GniA2vbZx6Jv6vIKu7I5c,14743
@@ -28,7 +28,7 @@ spacr/sim.py,sha256=1xKhXimNU3ukzIw-3l9cF3Znc_brW8h20yv8fSTzvss,71173
 spacr/submodules.py,sha256=Xq4gjvooHN8S7cTk5PIAkd7XD2c7CMVqNpeo8GCvtHc,42489
 spacr/timelapse.py,sha256=KGfG4L4-QnFfgbF7L6C5wL_3gd_rqr05Foje6RsoTBg,39603
 spacr/toxo.py,sha256=z2nT5aAze3NUIlwnBQcnkARihDwoPfqOgQIVoUluyK0,25087
-spacr/utils.py,sha256=tqIKiSc30xEX0IlfSpoctFJQDVnGHDAX7l1VakRCBuY,220601
+spacr/utils.py,sha256=vvciLh1gH0nsrCWQw3taUcDjxP59wme3gqrejeNO05w,222943
 spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
 spacr/resources/MEDIAR/.gitignore,sha256=Ff1q9Nme14JUd-4Q3jZ65aeQ5X4uttptssVDgBVHYo8,152
 spacr/resources/MEDIAR/LICENSE,sha256=yEj_TRDLUfDpHDNM0StALXIt6mLqSgaV2hcCwa6_TcY,1065
@@ -151,9 +151,9 @@ spacr/resources/icons/umap.png,sha256=dOLF3DeLYy9k0nkUybiZMe1wzHQwLJFRmgccppw-8b
 spacr/resources/images/plate1_E01_T0001F001L01A01Z01C02.tif,sha256=Tl0ZUfZ_AYAbu0up_nO0tPRtF1BxXhWQ3T3pURBCCRo,7958528
 spacr/resources/images/plate1_E01_T0001F001L01A02Z01C01.tif,sha256=m8N-V71rA1TT4dFlENNg8s0Q0YEXXs8slIn7yObmZJQ,7958528
 spacr/resources/images/plate1_E01_T0001F001L01A03Z01C03.tif,sha256=Pbhk7xn-KUP6RSIhJsxQcrHFImBm3GEpLkzx7WOc-5M,7958528
-spacr-0.3.61.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
-spacr-0.3.61.dist-info/METADATA,sha256=2jlzT9lkaXx01IWlYMYrpf24p48qDHvrRLZm-YUUl-0,6032
-spacr-0.3.61.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
-spacr-0.3.61.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
-spacr-0.3.61.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
-spacr-0.3.61.dist-info/RECORD,,
+spacr-0.3.62.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
+spacr-0.3.62.dist-info/METADATA,sha256=Ox14lWGxbXuMW36MriYHppKcZDqD_4HopfbcLAi8dLc,6032
+spacr-0.3.62.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
+spacr-0.3.62.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
+spacr-0.3.62.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
+spacr-0.3.62.dist-info/RECORD,,

{spacr-0.3.61.dist-info → spacr-0.3.62.dist-info}/LICENSE RENAMED Viewed

File without changes

{spacr-0.3.61.dist-info → spacr-0.3.62.dist-info}/WHEEL RENAMED Viewed

File without changes

{spacr-0.3.61.dist-info → spacr-0.3.62.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{spacr-0.3.61.dist-info → spacr-0.3.62.dist-info}/top_level.txt RENAMED Viewed

File without changes

spacr 0.3.61__py3-none-any.whl → 0.3.62__py3-none-any.whl

spacr 0.3.61py3-none-any.whl → 0.3.62py3-none-any.whl