spacr 0.3.60__py3-none-any.whl → 0.3.62__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/app_annotate.py CHANGED
@@ -4,14 +4,6 @@ from .gui import MainApp
4
4
  from .gui_elements import set_dark_style, spacrButton
5
5
 
6
6
  def convert_to_number(value):
7
-
8
- """
9
- Converts a string value to an integer if possible, otherwise converts to a float.
10
- Args:
11
- value (str): The string representation of the number.
12
- Returns:
13
- int or float: The converted number.
14
- """
15
7
  try:
16
8
  return int(value)
17
9
  except ValueError:
spacr/core.py CHANGED
@@ -465,10 +465,8 @@ def generate_image_umap(settings={}):
465
465
  display(settings_df)
466
466
 
467
467
  db_paths = get_db_paths(settings['src'])
468
-
469
468
  tables = settings['tables'] + ['png_list']
470
469
  all_df = pd.DataFrame()
471
- #image_paths = []
472
470
 
473
471
  for i,db_path in enumerate(db_paths):
474
472
  df = _read_and_join_tables(db_path, table_names=tables)
@@ -476,7 +474,7 @@ def generate_image_umap(settings={}):
476
474
  all_df = pd.concat([all_df, df], axis=0)
477
475
  #image_paths.extend(image_paths_tmp)
478
476
 
479
- all_df['cond'] = all_df['col'].apply(map_condition, neg=settings['neg'], pos=settings['pos'], mix=settings['mix'])
477
+ all_df['cond'] = all_df['column_name'].apply(map_condition, neg=settings['neg'], pos=settings['pos'], mix=settings['mix'])
480
478
 
481
479
  if settings['exclude_conditions']:
482
480
  if isinstance(settings['exclude_conditions'], str):
@@ -495,7 +493,10 @@ def generate_image_umap(settings={}):
495
493
 
496
494
  # Extract and reset the index for the column to compare
497
495
  col_to_compare = all_df[settings['col_to_compare']].reset_index(drop=True)
498
-
496
+
497
+ #if settings['only_top_features']:
498
+ # column_list = None
499
+
499
500
  # Preprocess the data to obtain numeric data
500
501
  numeric_data = preprocess_data(all_df, settings['filter_by'], settings['remove_highly_correlated'], settings['log_data'], settings['exclude'])
501
502
 
@@ -571,7 +572,11 @@ def generate_image_umap(settings={}):
571
572
  print(f'Saved {reduction_method} embedding to {embedding_path} and grid to {grid_path}')
572
573
 
573
574
  # Add cluster labels to the dataframe
574
- all_df['cluster'] = labels
575
+ if len(labels) > 0:
576
+ all_df['cluster'] = labels
577
+ else:
578
+ all_df['cluster'] = 1 # Assign a default cluster label
579
+ print("No clusters found. Consider reducing 'min_samples' or increasing 'eps' for DBSCAN.")
575
580
 
576
581
  # Save the results to a CSV file
577
582
  results_dir = os.path.join(settings['src'][0], 'results')
@@ -653,7 +658,7 @@ def reducer_hyperparameter_search(settings={}, reduction_params=None, dbscan_par
653
658
  df = _read_and_join_tables(db_path, table_names=tables)
654
659
  all_df = pd.concat([all_df, df], axis=0)
655
660
 
656
- all_df['cond'] = all_df['col'].apply(map_condition, neg=settings['neg'], pos=settings['pos'], mix=settings['mix'])
661
+ all_df['cond'] = all_df['column_name'].apply(map_condition, neg=settings['neg'], pos=settings['pos'], mix=settings['mix'])
657
662
 
658
663
  if settings['exclude_conditions']:
659
664
  if isinstance(settings['exclude_conditions'], str):
@@ -882,7 +887,7 @@ def generate_screen_graphs(settings):
882
887
  db_loc = [os.path.join(src, 'measurements', 'measurements.db')]
883
888
 
884
889
  # Read and merge data from the database
885
- df, _ = _read_and_merge_data(db_loc, settings['tables'], verbose=True, nuclei_limit=settings['nuclei_limit'], pathogen_limit=settings['pathogen_limit'], uninfected=settings['uninfected'])
890
+ df, _ = _read_and_merge_data(db_loc, settings['tables'], verbose=True, nuclei_limit=settings['nuclei_limit'], pathogen_limit=settings['pathogen_limit'])
886
891
 
887
892
  # Annotate the data
888
893
  df = annotate_conditions(df, cells=settings['cells'], cell_loc=None, pathogens=settings['controls'], pathogen_loc=settings['controls_loc'], treatments=None, treatment_loc=None)
spacr/gui_utils.py CHANGED
@@ -225,14 +225,30 @@ def annotate(settings):
225
225
  conn.close()
226
226
 
227
227
  root = tk.Tk()
228
- root.geometry(settings['geom'])
229
- app = AnnotateApp(root, db, src, image_type=settings['image_type'], channels=settings['channels'], image_size=settings['img_size'], grid_rows=settings['rows'], grid_cols=settings['columns'], annotation_column=settings['annotation_column'], normalize=settings['normalize'], percentiles=settings['percentiles'], measurement=settings['measurement'], threshold=settings['threshold'], normalize_channels=settings['normalize_channels'])
230
- next_button = tk.Button(root, text="Next", command=app.next_page)
231
- next_button.grid(row=app.grid_rows, column=app.grid_cols - 1)
232
- back_button = tk.Button(root, text="Back", command=app.previous_page)
233
- back_button.grid(row=app.grid_rows, column=app.grid_cols - 2)
234
- exit_button = tk.Button(root, text="Exit", command=app.shutdown)
235
- exit_button.grid(row=app.grid_rows, column=app.grid_cols - 3)
228
+
229
+ root.geometry(f"{root.winfo_screenwidth()}x{root.winfo_screenheight()}")
230
+
231
+ db_path = os.path.join(settings['src'], 'measurements/measurements.db')
232
+
233
+ app = AnnotateApp(root,
234
+ db_path=db_path,
235
+ src=settings['src'],
236
+ image_type=settings['image_type'],
237
+ channels=settings['channels'],
238
+ image_size=settings['img_size'],
239
+ annotation_column=settings['annotation_column'],
240
+ normalize=settings['normalize'],
241
+ percentiles=settings['percentiles'],
242
+ measurement=settings['measurement'],
243
+ threshold=settings['threshold'],
244
+ normalize_channels=settings['normalize_channels'])
245
+
246
+ #next_button = tk.Button(root, text="Next", command=app.next_page)
247
+ #next_button.grid(row=app.grid_rows, column=app.grid_cols - 1)
248
+ #back_button = tk.Button(root, text="Back", command=app.previous_page)
249
+ #back_button.grid(row=app.grid_rows, column=app.grid_cols - 2)
250
+ #exit_button = tk.Button(root, text="Exit", command=app.shutdown)
251
+ #exit_button.grid(row=app.grid_rows, column=app.grid_cols - 3)
236
252
 
237
253
  app.load_images()
238
254
  root.mainloop()
spacr/io.py CHANGED
@@ -1777,7 +1777,7 @@ def _read_and_join_tables(db_path, table_names=['cell', 'cytoplasm', 'nucleus',
1777
1777
  png_list_df['cell_id'] = png_list_df['cell_id'].str[1:].astype(int)
1778
1778
  png_list_df.rename(columns={'cell_id': 'object_label'}, inplace=True)
1779
1779
  if 'cell' in dataframes:
1780
- join_cols = ['object_label', 'plate', 'row_name', 'column_name']
1780
+ join_cols = ['object_label', 'plate', 'row_name', 'column_name','field']
1781
1781
  dataframes['cell'] = pd.merge(dataframes['cell'], png_list_df, on=join_cols, how='left')
1782
1782
  else:
1783
1783
  print("Cell table not found in database tables.")
@@ -2089,150 +2089,6 @@ def _read_db(db_loc, tables):
2089
2089
  conn.close()
2090
2090
  return dfs
2091
2091
 
2092
- def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=False, pathogen_limit=False, uninfected=False):
2093
- """
2094
- Read and merge data from SQLite databases and perform data preprocessing.
2095
-
2096
- Parameters:
2097
- - locs (list): A list of file paths to the SQLite database files.
2098
- - tables (list): A list of table names to read from the databases.
2099
- - verbose (bool): Whether to print verbose output. Default is False.
2100
- - nuclei_limit (bool): Whether to include multinucleated cells. Default is False.
2101
- - pathogen_limit (bool): Whether to include cells with multiple infections. Default is False.
2102
- - uninfected (bool): Whether to include non-infected cells. Default is False.
2103
-
2104
- Returns:
2105
- - merged_df (pandas.DataFrame): The merged and preprocessed dataframe.
2106
- - obj_df_ls (list): A list of pandas DataFrames, each containing the data for a specific object type.
2107
- """
2108
-
2109
- from .utils import _split_data
2110
-
2111
- #Extract plate DataFrames
2112
- all_dfs = []
2113
- for loc in locs:
2114
- db_dfs = _read_db(loc, tables)
2115
- all_dfs.append(db_dfs)
2116
-
2117
- #Extract Tables from DataFrames and concatinate rows
2118
- for i, dfs in enumerate(all_dfs):
2119
- if 'cell' in tables:
2120
- cell = dfs[0]
2121
- print(f'plate: {i+1} cells:{len(cell)}')
2122
-
2123
- if 'nucleus' in tables:
2124
- nucleus = dfs[1]
2125
- print(f'plate: {i+1} nucleus:{len(nucleus)} ')
2126
-
2127
- if 'pathogen' in tables:
2128
- pathogen = dfs[2]
2129
-
2130
- print(f'plate: {i+1} pathogens:{len(pathogen)}')
2131
- if 'cytoplasm' in tables:
2132
- if not 'pathogen' in tables:
2133
- cytoplasm = dfs[2]
2134
- else:
2135
- cytoplasm = dfs[3]
2136
- print(f'plate: {i+1} cytoplasms: {len(cytoplasm)}')
2137
-
2138
- if i > 0:
2139
- if 'cell' in tables:
2140
- cells = pd.concat([cells, cell], axis = 0)
2141
- if 'nucleus' in tables:
2142
- nucleus = pd.concat([nucleus, nucleus], axis = 0)
2143
- if 'pathogen' in tables:
2144
- pathogens = pd.concat([pathogens, pathogen], axis = 0)
2145
- if 'cytoplasm' in tables:
2146
- cytoplasms = pd.concat([cytoplasms, cytoplasm], axis = 0)
2147
- else:
2148
- if 'cell' in tables:
2149
- cells = cell.copy()
2150
- if 'nucleus' in tables:
2151
- nucleus = nucleus.copy()
2152
- if 'pathogen' in tables:
2153
- pathogens = pathogen.copy()
2154
- if 'cytoplasm' in tables:
2155
- cytoplasms = cytoplasm.copy()
2156
-
2157
- #Add an o in front of all object and cell lables to convert them to strings
2158
- if 'cell' in tables:
2159
- cells = cells.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
2160
- cells = cells.assign(prcfo = lambda x: x['prcf'] + '_' + x['object_label'])
2161
- cells_g_df, metadata = _split_data(cells, 'prcfo', 'object_label')
2162
- print(f'cells: {len(cells)}')
2163
- print(f'cells grouped: {len(cells_g_df)}')
2164
- if 'cytoplasm' in tables:
2165
- cytoplasms = cytoplasms.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
2166
- cytoplasms = cytoplasms.assign(prcfo = lambda x: x['prcf'] + '_' + x['object_label'])
2167
- cytoplasms_g_df, _ = _split_data(cytoplasms, 'prcfo', 'object_label')
2168
- merged_df = cells_g_df.merge(cytoplasms_g_df, left_index=True, right_index=True)
2169
- print(f'cytoplasms: {len(cytoplasms)}')
2170
- print(f'cytoplasms grouped: {len(cytoplasms_g_df)}')
2171
- if 'nucleus' in tables:
2172
- nucleus = nucleus.dropna(subset=['cell_id'])
2173
- nucleus = nucleus.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
2174
- nucleus = nucleus.assign(cell_id=lambda x: 'o' + x['cell_id'].astype(int).astype(str))
2175
- nucleus = nucleus.assign(prcfo = lambda x: x['prcf'] + '_' + x['cell_id'])
2176
- nucleus['nucleus_prcfo_count'] = nucleus.groupby('prcfo')['prcfo'].transform('count')
2177
- if nuclei_limit == False:
2178
- #nucleus = nucleus[~nucleus['prcfo'].duplicated()]
2179
- nucleus = nucleus[nucleus['nucleus_prcfo_count']==1]
2180
- nucleus_g_df, _ = _split_data(nucleus, 'prcfo', 'cell_id')
2181
- print(f'nucleus: {len(nucleus)}')
2182
- print(f'nucleus grouped: {len(nucleus_g_df)}')
2183
- if 'cytoplasm' in tables:
2184
- merged_df = merged_df.merge(nucleus_g_df, left_index=True, right_index=True)
2185
- else:
2186
- merged_df = cells_g_df.merge(nucleus_g_df, left_index=True, right_index=True)
2187
- if 'pathogen' in tables:
2188
- pathogens = pathogens.dropna(subset=['cell_id'])
2189
- pathogens = pathogens.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
2190
- pathogens = pathogens.assign(cell_id=lambda x: 'o' + x['cell_id'].astype(int).astype(str))
2191
- pathogens = pathogens.assign(prcfo = lambda x: x['prcf'] + '_' + x['cell_id'])
2192
- pathogens['pathogen_prcfo_count'] = pathogens.groupby('prcfo')['prcfo'].transform('count')
2193
- if uninfected == False:
2194
- pathogens = pathogens[pathogens['pathogen_prcfo_count']>=1]
2195
- if pathogen_limit == False:
2196
- pathogens = pathogens[pathogens['pathogen_prcfo_count']<=1]
2197
- pathogens_g_df, _ = _split_data(pathogens, 'prcfo', 'cell_id')
2198
- print(f'pathogens: {len(pathogens)}')
2199
- print(f'pathogens grouped: {len(pathogens_g_df)}')
2200
- merged_df = merged_df.merge(pathogens_g_df, left_index=True, right_index=True)
2201
-
2202
- #Add prc column (plate row column)
2203
- metadata = metadata.assign(prc = lambda x: x['plate'] + '_' + x['row_name'] + '_' +x['column_name'])
2204
-
2205
- #Count cells per well
2206
- cells_well = pd.DataFrame(metadata.groupby('prc')['object_label'].nunique())
2207
-
2208
- cells_well.reset_index(inplace=True)
2209
- cells_well.rename(columns={'object_label': 'cells_per_well'}, inplace=True)
2210
- metadata = pd.merge(metadata, cells_well, on='prc', how='inner', suffixes=('', '_drop_col'))
2211
- object_label_cols = [col for col in metadata.columns if '_drop_col' in col]
2212
- metadata.drop(columns=object_label_cols, inplace=True)
2213
-
2214
- #Add prcfo column (plate row column field object)
2215
- metadata = metadata.assign(prcfo = lambda x: x['plate'] + '_' + x['row_name'] + '_' +x['column_name']+ '_' +x['field']+ '_' +x['object_label'])
2216
- metadata.set_index('prcfo', inplace=True)
2217
-
2218
- merged_df = metadata.merge(merged_df, left_index=True, right_index=True)
2219
-
2220
- merged_df = merged_df.dropna(axis=1)
2221
-
2222
- print(f'Generated dataframe with: {len(merged_df.columns)} columns and {len(merged_df)} rows')
2223
-
2224
- obj_df_ls = []
2225
- if 'cell' in tables:
2226
- obj_df_ls.append(cells)
2227
- if 'cytoplasm' in tables:
2228
- obj_df_ls.append(cytoplasms)
2229
- if 'nucleus' in tables:
2230
- obj_df_ls.append(nucleus)
2231
- if 'pathogen' in tables:
2232
- obj_df_ls.append(pathogens)
2233
-
2234
- return merged_df, obj_df_ls
2235
-
2236
2092
  def _results_to_csv(src, df, df_well):
2237
2093
  """
2238
2094
  Save the given dataframes as CSV files in the specified directory.
@@ -2420,7 +2276,7 @@ def _read_db(db_loc, tables):
2420
2276
  conn.close() # Close the connection
2421
2277
  return dfs
2422
2278
 
2423
- def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=False, pathogen_limit=False, uninfected=False):
2279
+ def _read_and_merge_data_v1(locs, tables, verbose=False, nuclei_limit=False, pathogen_limit=False):
2424
2280
 
2425
2281
  from .utils import _split_data
2426
2282
 
@@ -2532,11 +2388,6 @@ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=False, pathog
2532
2388
  pathogens = pathogens.assign(prcfo = lambda x: x['prcf'] + '_' + x['cell_id'])
2533
2389
  pathogens['pathogen_prcfo_count'] = pathogens.groupby('prcfo')['prcfo'].transform('count')
2534
2390
 
2535
- print(f"before noninfected: {len(pathogens)}")
2536
- if uninfected == False:
2537
- pathogens = pathogens[pathogens['pathogen_prcfo_count']>=1]
2538
- print(f"after noninfected: {len(pathogens)}")
2539
-
2540
2391
  if isinstance(pathogen_limit, bool):
2541
2392
  if pathogen_limit == False:
2542
2393
  pathogens = pathogens[pathogens['pathogen_prcfo_count']<=1]
@@ -2592,7 +2443,135 @@ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=False, pathog
2592
2443
  if 'pathogen' in tables:
2593
2444
  obj_df_ls.append(pathogens)
2594
2445
 
2595
- return merged_df, obj_df_ls
2446
+ return merged_df, obj_df_ls
2447
+
2448
+ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=10, pathogen_limit=10):
2449
+ from .io import _read_db
2450
+ from .utils import _split_data
2451
+
2452
+ # Initialize an empty dictionary to store DataFrames by table name
2453
+ data_dict = {table: [] for table in tables}
2454
+
2455
+ # Extract plate DataFrames
2456
+ for loc in locs:
2457
+ db_dfs = _read_db(loc, tables)
2458
+ for table, df in zip(tables, db_dfs):
2459
+ data_dict[table].append(df)
2460
+
2461
+ # Concatenate rows across locations for each table
2462
+ for table, dfs in data_dict.items():
2463
+ if dfs:
2464
+ data_dict[table] = pd.concat(dfs, axis=0)
2465
+ if verbose:
2466
+ print(f"{table}: {len(data_dict[table])}")
2467
+
2468
+ # Initialize merged DataFrame with 'cells' if available
2469
+ merged_df = pd.DataFrame()
2470
+
2471
+ # Process each table
2472
+ if 'cell' in data_dict:
2473
+ cells = data_dict['cell'].copy()
2474
+ cells = cells.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
2475
+ cells = cells.assign(prcfo=lambda x: x['prcf'] + '_' + x['object_label'])
2476
+ cells_g_df, metadata = _split_data(cells, 'prcfo', 'object_label')
2477
+ merged_df = cells_g_df.copy()
2478
+ if verbose:
2479
+ print(f'cells: {len(cells)}, cells grouped: {len(cells_g_df)}')
2480
+
2481
+ if 'cytoplasm' in data_dict:
2482
+ cytoplasms = data_dict['cytoplasm'].copy()
2483
+ cytoplasms = cytoplasms.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
2484
+ cytoplasms = cytoplasms.assign(prcfo=lambda x: x['prcf'] + '_' + x['object_label'])
2485
+
2486
+ if not 'cell' in data_dict:
2487
+ merged_df, metadata = _split_data(cytoplasms, 'prcfo', 'object_label')
2488
+
2489
+ if verbose:
2490
+ print(f'nucleus: {len(cytoplasms)}, cytoplasms grouped: {len(merged_df)}')
2491
+
2492
+ else:
2493
+ cytoplasms_g_df, _ = _split_data(cytoplasms, 'prcfo', 'object_label')
2494
+ merged_df = merged_df.merge(cytoplasms_g_df, left_index=True, right_index=True)
2495
+
2496
+ if verbose:
2497
+ print(f'cytoplasms: {len(cytoplasms)}, cytoplasms grouped: {len(cytoplasms_g_df)}')
2498
+
2499
+ if 'nucleus' in data_dict:
2500
+ nucleus = data_dict['nucleus'].copy()
2501
+ nucleus = nucleus.dropna(subset=['cell_id'])
2502
+ nucleus = nucleus.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
2503
+ nucleus = nucleus.assign(cell_id=lambda x: 'o' + x['cell_id'].astype(int).astype(str))
2504
+ nucleus = nucleus.assign(prcfo=lambda x: x['prcf'] + '_' + x['cell_id'])
2505
+ nucleus['nucleus_prcfo_count'] = nucleus.groupby('prcfo')['prcfo'].transform('count')
2506
+ if not nuclei_limit:
2507
+ nucleus = nucleus[nucleus['nucleus_prcfo_count'] == 1]
2508
+
2509
+ if all(key not in data_dict for key in ['cell', 'cytoplasm']):
2510
+ merged_df, metadata = _split_data(nucleus, 'prcfo', 'cell_id')
2511
+
2512
+ if verbose:
2513
+ print(f'nucleus: {len(nucleus)}, nucleus grouped: {len(merged_df)}')
2514
+
2515
+ else:
2516
+ nucleus_g_df, _ = _split_data(nucleus, 'prcfo', 'cell_id')
2517
+ merged_df = merged_df.merge(nucleus_g_df, left_index=True, right_index=True)
2518
+
2519
+ if verbose:
2520
+ print(f'nucleus: {len(nucleus)}, nucleus grouped: {len(nucleus_g_df)}')
2521
+
2522
+ if 'pathogen' in data_dict:
2523
+ pathogens = data_dict['pathogen'].copy()
2524
+ pathogens = pathogens.dropna(subset=['cell_id'])
2525
+ pathogens = pathogens.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
2526
+ pathogens = pathogens.assign(cell_id=lambda x: 'o' + x['cell_id'].astype(int).astype(str))
2527
+ pathogens = pathogens.assign(prcfo=lambda x: x['prcf'] + '_' + x['cell_id'])
2528
+ pathogens['pathogen_prcfo_count'] = pathogens.groupby('prcfo')['prcfo'].transform('count')
2529
+
2530
+ if isinstance(pathogen_limit, bool) and not pathogen_limit:
2531
+ pathogens = pathogens[pathogens['pathogen_prcfo_count'] <= 1]
2532
+ elif isinstance(pathogen_limit, (float, int)):
2533
+ pathogens = pathogens[pathogens['pathogen_prcfo_count'] <= int(pathogen_limit)]
2534
+
2535
+ if all(key not in data_dict for key in ['cell', 'cytoplasm', 'nucleus']):
2536
+ merged_df, metadata = _split_data(pathogens, 'prcfo', 'cell_id')
2537
+
2538
+ if verbose:
2539
+ print(f'pathogens: {len(pathogens)}, pathogens grouped: {len(merged_df)}')
2540
+
2541
+ else:
2542
+ pathogens_g_df, _ = _split_data(pathogens, 'prcfo', 'cell_id')
2543
+ merged_df = merged_df.merge(pathogens_g_df, left_index=True, right_index=True)
2544
+
2545
+ if verbose:
2546
+ print(f'pathogens: {len(pathogens)}, pathogens grouped: {len(pathogens_g_df)}')
2547
+
2548
+ if 'png_list' in data_dict:
2549
+ png_list = data_dict['png_list'].copy()
2550
+ png_list_g_df_numeric, png_list_g_df_non_numeric = _split_data(png_list, 'prcfo', 'cell_id')
2551
+ png_list_g_df_non_numeric.drop(columns=['plate','row_name','column_name','field','file_name','cell_id', 'prcf'], inplace=True)
2552
+ if verbose:
2553
+ print(f'png_list: {len(png_list)}, png_list grouped: {len(png_list_g_df_numeric)}')
2554
+ merged_df = merged_df.merge(png_list_g_df_numeric, left_index=True, right_index=True)
2555
+ merged_df = merged_df.merge(png_list_g_df_non_numeric, left_index=True, right_index=True)
2556
+
2557
+ # Add prc (plate row column) and prcfo (plate row column field object) columns
2558
+ metadata = metadata.assign(prc=lambda x: x['plate'] + '_' + x['row_name'] + '_' + x['column_name'])
2559
+ cells_well = metadata.groupby('prc')['object_label'].nunique().reset_index(name='cells_per_well')
2560
+ metadata = metadata.merge(cells_well, on='prc')
2561
+ metadata = metadata.assign(prcfo=lambda x: x['plate'] + '_' + x['row_name'] + '_' + x['column_name'] + '_' + x['field'] + '_' + x['object_label'])
2562
+ metadata.set_index('prcfo', inplace=True)
2563
+
2564
+ # Merge metadata with final merged DataFrame
2565
+ merged_df = metadata.merge(merged_df, left_index=True, right_index=True).dropna(axis=1)
2566
+ merged_df.drop(columns=['label_list_morphology', 'label_list_intensity'], errors='ignore', inplace=True)
2567
+
2568
+ if verbose:
2569
+ print(f'Generated dataframe with: {len(merged_df.columns)} columns and {len(merged_df)} rows')
2570
+
2571
+ # Prepare object DataFrames for output
2572
+ obj_df_ls = [data_dict[table] for table in ['cell', 'cytoplasm', 'nucleus', 'pathogen'] if table in data_dict]
2573
+
2574
+ return merged_df, obj_df_ls
2596
2575
 
2597
2576
  def _read_mask(mask_path):
2598
2577
  mask = imageio2.imread(mask_path)
@@ -2929,8 +2908,8 @@ def generate_training_dataset(settings):
2929
2908
  tables=tables,
2930
2909
  verbose=False,
2931
2910
  nuclei_limit=settings['nuclei_limit'],
2932
- pathogen_limit=settings['pathogen_limit'],
2933
- uninfected=settings['uninfected'])
2911
+ pathogen_limit=settings['pathogen_limit'])
2912
+
2934
2913
  [png_list_df] = _read_db(db_loc=db_path, tables=['png_list'])
2935
2914
  filtered_png_list_df = png_list_df[png_list_df['prcfo'].isin(df.index)]
2936
2915
  return filtered_png_list_df
@@ -2952,8 +2931,7 @@ def generate_training_dataset(settings):
2952
2931
  tables=tables,
2953
2932
  verbose=False,
2954
2933
  nuclei_limit=settings['nuclei_limit'],
2955
- pathogen_limit=settings['pathogen_limit'],
2956
- uninfected=settings['uninfected'])
2934
+ pathogen_limit=settings['pathogen_limit'])
2957
2935
 
2958
2936
  print('length df 1', len(df))
2959
2937
  df = annotate_conditions(df, cells=['HeLa'], pathogens=['pathogen'], treatments=settings['classes'],
@@ -3034,7 +3012,6 @@ def generate_training_dataset(settings):
3034
3012
 
3035
3013
  if 'pathogen' not in settings['tables']:
3036
3014
  settings['pathogen_limit'] = 0
3037
- settings['uninfected'] = True
3038
3015
 
3039
3016
  # Set default settings and save
3040
3017
  settings = set_generate_training_dataset_defaults(settings)
spacr/ml.py CHANGED
@@ -1172,15 +1172,14 @@ def generate_ml_scores(settings):
1172
1172
  db_loc = [src+'/measurements/measurements.db']
1173
1173
  tables = ['cell', 'nucleus', 'pathogen','cytoplasm']
1174
1174
 
1175
- nuclei_limit, pathogen_limit, uninfected = settings['nuclei_limit'], settings['pathogen_limit'], settings['uninfected']
1175
+ nuclei_limit, pathogen_limit = settings['nuclei_limit'], settings['pathogen_limit']
1176
1176
 
1177
1177
  df, _ = _read_and_merge_data(db_loc,
1178
1178
  tables,
1179
1179
  settings['verbose'],
1180
1180
  nuclei_limit,
1181
- pathogen_limit,
1182
- uninfected)
1183
-
1181
+ pathogen_limit)
1182
+
1184
1183
  if settings['annotation_column'] is not None:
1185
1184
 
1186
1185
  settings['location_column'] = settings['annotation_column']