spacr 0.3.61__py3-none-any.whl → 0.3.62__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/io.py CHANGED
@@ -1777,7 +1777,7 @@ def _read_and_join_tables(db_path, table_names=['cell', 'cytoplasm', 'nucleus',
1777
1777
  png_list_df['cell_id'] = png_list_df['cell_id'].str[1:].astype(int)
1778
1778
  png_list_df.rename(columns={'cell_id': 'object_label'}, inplace=True)
1779
1779
  if 'cell' in dataframes:
1780
- join_cols = ['object_label', 'plate', 'row_name', 'column_name']
1780
+ join_cols = ['object_label', 'plate', 'row_name', 'column_name','field']
1781
1781
  dataframes['cell'] = pd.merge(dataframes['cell'], png_list_df, on=join_cols, how='left')
1782
1782
  else:
1783
1783
  print("Cell table not found in database tables.")
@@ -2276,7 +2276,7 @@ def _read_db(db_loc, tables):
2276
2276
  conn.close() # Close the connection
2277
2277
  return dfs
2278
2278
 
2279
- def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=False, pathogen_limit=False):
2279
+ def _read_and_merge_data_v1(locs, tables, verbose=False, nuclei_limit=False, pathogen_limit=False):
2280
2280
 
2281
2281
  from .utils import _split_data
2282
2282
 
@@ -2443,7 +2443,135 @@ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=False, pathog
2443
2443
  if 'pathogen' in tables:
2444
2444
  obj_df_ls.append(pathogens)
2445
2445
 
2446
- return merged_df, obj_df_ls
2446
+ return merged_df, obj_df_ls
2447
+
2448
+ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=10, pathogen_limit=10):
2449
+ from .io import _read_db
2450
+ from .utils import _split_data
2451
+
2452
+ # Initialize an empty dictionary to store DataFrames by table name
2453
+ data_dict = {table: [] for table in tables}
2454
+
2455
+ # Extract plate DataFrames
2456
+ for loc in locs:
2457
+ db_dfs = _read_db(loc, tables)
2458
+ for table, df in zip(tables, db_dfs):
2459
+ data_dict[table].append(df)
2460
+
2461
+ # Concatenate rows across locations for each table
2462
+ for table, dfs in data_dict.items():
2463
+ if dfs:
2464
+ data_dict[table] = pd.concat(dfs, axis=0)
2465
+ if verbose:
2466
+ print(f"{table}: {len(data_dict[table])}")
2467
+
2468
+ # Initialize merged DataFrame with 'cells' if available
2469
+ merged_df = pd.DataFrame()
2470
+
2471
+ # Process each table
2472
+ if 'cell' in data_dict:
2473
+ cells = data_dict['cell'].copy()
2474
+ cells = cells.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
2475
+ cells = cells.assign(prcfo=lambda x: x['prcf'] + '_' + x['object_label'])
2476
+ cells_g_df, metadata = _split_data(cells, 'prcfo', 'object_label')
2477
+ merged_df = cells_g_df.copy()
2478
+ if verbose:
2479
+ print(f'cells: {len(cells)}, cells grouped: {len(cells_g_df)}')
2480
+
2481
+ if 'cytoplasm' in data_dict:
2482
+ cytoplasms = data_dict['cytoplasm'].copy()
2483
+ cytoplasms = cytoplasms.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
2484
+ cytoplasms = cytoplasms.assign(prcfo=lambda x: x['prcf'] + '_' + x['object_label'])
2485
+
2486
+ if not 'cell' in data_dict:
2487
+ merged_df, metadata = _split_data(cytoplasms, 'prcfo', 'object_label')
2488
+
2489
+ if verbose:
2490
+ print(f'nucleus: {len(cytoplasms)}, cytoplasms grouped: {len(merged_df)}')
2491
+
2492
+ else:
2493
+ cytoplasms_g_df, _ = _split_data(cytoplasms, 'prcfo', 'object_label')
2494
+ merged_df = merged_df.merge(cytoplasms_g_df, left_index=True, right_index=True)
2495
+
2496
+ if verbose:
2497
+ print(f'cytoplasms: {len(cytoplasms)}, cytoplasms grouped: {len(cytoplasms_g_df)}')
2498
+
2499
+ if 'nucleus' in data_dict:
2500
+ nucleus = data_dict['nucleus'].copy()
2501
+ nucleus = nucleus.dropna(subset=['cell_id'])
2502
+ nucleus = nucleus.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
2503
+ nucleus = nucleus.assign(cell_id=lambda x: 'o' + x['cell_id'].astype(int).astype(str))
2504
+ nucleus = nucleus.assign(prcfo=lambda x: x['prcf'] + '_' + x['cell_id'])
2505
+ nucleus['nucleus_prcfo_count'] = nucleus.groupby('prcfo')['prcfo'].transform('count')
2506
+ if not nuclei_limit:
2507
+ nucleus = nucleus[nucleus['nucleus_prcfo_count'] == 1]
2508
+
2509
+ if all(key not in data_dict for key in ['cell', 'cytoplasm']):
2510
+ merged_df, metadata = _split_data(nucleus, 'prcfo', 'cell_id')
2511
+
2512
+ if verbose:
2513
+ print(f'nucleus: {len(nucleus)}, nucleus grouped: {len(merged_df)}')
2514
+
2515
+ else:
2516
+ nucleus_g_df, _ = _split_data(nucleus, 'prcfo', 'cell_id')
2517
+ merged_df = merged_df.merge(nucleus_g_df, left_index=True, right_index=True)
2518
+
2519
+ if verbose:
2520
+ print(f'nucleus: {len(nucleus)}, nucleus grouped: {len(nucleus_g_df)}')
2521
+
2522
+ if 'pathogen' in data_dict:
2523
+ pathogens = data_dict['pathogen'].copy()
2524
+ pathogens = pathogens.dropna(subset=['cell_id'])
2525
+ pathogens = pathogens.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
2526
+ pathogens = pathogens.assign(cell_id=lambda x: 'o' + x['cell_id'].astype(int).astype(str))
2527
+ pathogens = pathogens.assign(prcfo=lambda x: x['prcf'] + '_' + x['cell_id'])
2528
+ pathogens['pathogen_prcfo_count'] = pathogens.groupby('prcfo')['prcfo'].transform('count')
2529
+
2530
+ if isinstance(pathogen_limit, bool) and not pathogen_limit:
2531
+ pathogens = pathogens[pathogens['pathogen_prcfo_count'] <= 1]
2532
+ elif isinstance(pathogen_limit, (float, int)):
2533
+ pathogens = pathogens[pathogens['pathogen_prcfo_count'] <= int(pathogen_limit)]
2534
+
2535
+ if all(key not in data_dict for key in ['cell', 'cytoplasm', 'nucleus']):
2536
+ merged_df, metadata = _split_data(pathogens, 'prcfo', 'cell_id')
2537
+
2538
+ if verbose:
2539
+ print(f'pathogens: {len(pathogens)}, pathogens grouped: {len(merged_df)}')
2540
+
2541
+ else:
2542
+ pathogens_g_df, _ = _split_data(pathogens, 'prcfo', 'cell_id')
2543
+ merged_df = merged_df.merge(pathogens_g_df, left_index=True, right_index=True)
2544
+
2545
+ if verbose:
2546
+ print(f'pathogens: {len(pathogens)}, pathogens grouped: {len(pathogens_g_df)}')
2547
+
2548
+ if 'png_list' in data_dict:
2549
+ png_list = data_dict['png_list'].copy()
2550
+ png_list_g_df_numeric, png_list_g_df_non_numeric = _split_data(png_list, 'prcfo', 'cell_id')
2551
+ png_list_g_df_non_numeric.drop(columns=['plate','row_name','column_name','field','file_name','cell_id', 'prcf'], inplace=True)
2552
+ if verbose:
2553
+ print(f'png_list: {len(png_list)}, png_list grouped: {len(png_list_g_df_numeric)}')
2554
+ merged_df = merged_df.merge(png_list_g_df_numeric, left_index=True, right_index=True)
2555
+ merged_df = merged_df.merge(png_list_g_df_non_numeric, left_index=True, right_index=True)
2556
+
2557
+ # Add prc (plate row column) and prcfo (plate row column field object) columns
2558
+ metadata = metadata.assign(prc=lambda x: x['plate'] + '_' + x['row_name'] + '_' + x['column_name'])
2559
+ cells_well = metadata.groupby('prc')['object_label'].nunique().reset_index(name='cells_per_well')
2560
+ metadata = metadata.merge(cells_well, on='prc')
2561
+ metadata = metadata.assign(prcfo=lambda x: x['plate'] + '_' + x['row_name'] + '_' + x['column_name'] + '_' + x['field'] + '_' + x['object_label'])
2562
+ metadata.set_index('prcfo', inplace=True)
2563
+
2564
+ # Merge metadata with final merged DataFrame
2565
+ merged_df = metadata.merge(merged_df, left_index=True, right_index=True).dropna(axis=1)
2566
+ merged_df.drop(columns=['label_list_morphology', 'label_list_intensity'], errors='ignore', inplace=True)
2567
+
2568
+ if verbose:
2569
+ print(f'Generated dataframe with: {len(merged_df.columns)} columns and {len(merged_df)} rows')
2570
+
2571
+ # Prepare object DataFrames for output
2572
+ obj_df_ls = [data_dict[table] for table in ['cell', 'cytoplasm', 'nucleus', 'pathogen'] if table in data_dict]
2573
+
2574
+ return merged_df, obj_df_ls
2447
2575
 
2448
2576
  def _read_mask(mask_path):
2449
2577
  mask = imageio2.imread(mask_path)
spacr/utils.py CHANGED
@@ -1371,7 +1371,7 @@ def annotate_conditions(df, cells=None, cell_loc=None, pathogens=None, pathogen_
1371
1371
 
1372
1372
  return df
1373
1373
 
1374
- def _split_data(df, group_by, object_type):
1374
+ def _split_data_v1(df, group_by, object_type):
1375
1375
  """
1376
1376
  Splits the input dataframe into numeric and non-numeric parts, groups them by the specified column,
1377
1377
  and returns the grouped dataframes.
@@ -1385,16 +1385,72 @@ def _split_data(df, group_by, object_type):
1385
1385
  grouped_numeric (pandas.DataFrame): The grouped dataframe containing numeric columns.
1386
1386
  grouped_non_numeric (pandas.DataFrame): The grouped dataframe containing non-numeric columns.
1387
1387
  """
1388
+
1389
+ if 'prcf' not in df.columns:
1390
+ try:
1391
+ df['prcf'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column_name'].astype(str) + '_' + df['field'].astype(str)
1392
+ except Exception as e:
1393
+ print(e)
1394
+
1388
1395
  df['prcfo'] = df['prcf'] + '_' + df[object_type]
1389
1396
  df = df.set_index(group_by, inplace=False)
1390
1397
 
1391
1398
  df_numeric = df.select_dtypes(include=np.number)
1392
1399
  df_non_numeric = df.select_dtypes(exclude=np.number)
1400
+
1401
+ []
1393
1402
 
1394
1403
  grouped_numeric = df_numeric.groupby(df_numeric.index).mean()
1395
1404
  grouped_non_numeric = df_non_numeric.groupby(df_non_numeric.index).first()
1396
1405
 
1397
1406
  return pd.DataFrame(grouped_numeric), pd.DataFrame(grouped_non_numeric)
1407
+
1408
+ def _split_data(df, group_by, object_type):
1409
+ """
1410
+ Splits the input dataframe into numeric and non-numeric parts, groups them by the specified column,
1411
+ and returns the grouped dataframes with conditional aggregation.
1412
+
1413
+ Parameters:
1414
+ df (pandas.DataFrame): The input dataframe.
1415
+ group_by (str): The column name to group the dataframes by.
1416
+ object_type (str): The column name to concatenate with 'prcf' to create a new column 'prcfo'.
1417
+
1418
+ Returns:
1419
+ grouped_numeric (pandas.DataFrame): The grouped dataframe containing numeric columns with conditional aggregation.
1420
+ grouped_non_numeric (pandas.DataFrame): The grouped dataframe containing non-numeric columns.
1421
+ """
1422
+
1423
+ # Ensure 'prcf' column exists by concatenating specific columns
1424
+ if 'prcf' not in df.columns:
1425
+ try:
1426
+ df['prcf'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column_name'].astype(str) + '_' + df['field'].astype(str)
1427
+ except Exception as e:
1428
+ print(e)
1429
+
1430
+ # Create the 'prcfo' column
1431
+ df['prcfo'] = df['prcf'] + '_' + df[object_type]
1432
+ df = df.set_index(group_by, inplace=False)
1433
+
1434
+ # Split the DataFrame into numeric and non-numeric parts
1435
+ df_numeric = df.select_dtypes(include=np.number)
1436
+ df_non_numeric = df.select_dtypes(exclude=np.number)
1437
+
1438
+ # Define keywords for columns to be summed instead of averaged
1439
+ sum_keywords = ['area', 'perimeter', 'convex_area', 'bbox_area', 'filled_area', 'major_axis_length', 'minor_axis_length', 'equivalent_diameter']
1440
+
1441
+ # Create a dictionary for custom aggregation
1442
+ agg_dict = {}
1443
+ for column in df_numeric.columns:
1444
+ if any(keyword in column for keyword in sum_keywords):
1445
+ agg_dict[column] = 'sum'
1446
+ else:
1447
+ agg_dict[column] = 'mean'
1448
+
1449
+ # Apply custom aggregation
1450
+ grouped_numeric = df_numeric.groupby(df_numeric.index).agg(agg_dict)
1451
+ grouped_non_numeric = df_non_numeric.groupby(df_non_numeric.index).first()
1452
+
1453
+ return pd.DataFrame(grouped_numeric), pd.DataFrame(grouped_non_numeric)
1398
1454
 
1399
1455
  def _calculate_recruitment(df, channel):
1400
1456
  """
@@ -5184,7 +5240,7 @@ def group_feature_class(df, feature_groups=['cell', 'cytoplasm', 'nucleus', 'pat
5184
5240
  else:
5185
5241
  return None
5186
5242
 
5187
- from spacr.plot import spacrGraph
5243
+ from .plot import spacrGraph
5188
5244
 
5189
5245
  df[name] = df['feature'].apply(lambda x: find_feature_class(x, feature_groups))
5190
5246
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spacr
3
- Version: 0.3.61
3
+ Version: 0.3.62
4
4
  Summary: Spatial phenotype analysis of crisp screens (SpaCr)
5
5
  Home-page: https://github.com/EinarOlafsson/spacr
6
6
  Author: Einar Birnir Olafsson
@@ -15,7 +15,7 @@ spacr/gui.py,sha256=ARyn9Q_g8HoP-cXh1nzMLVFCKqthY4v2u9yORyaQqQE,8230
15
15
  spacr/gui_core.py,sha256=N7R7yvfK_dJhOReM_kW3Ci8Bokhi1OzsxeKqvSGdvV4,41460
16
16
  spacr/gui_elements.py,sha256=EKlvEg_4_je7jciEdR3NTgPrcTraowa2e2RUt-xqd6M,138254
17
17
  spacr/gui_utils.py,sha256=u9RoIOWpAXFEOnUlLpMQZrc1pWSg6omZsJMIhJdRv_g,41211
18
- spacr/io.py,sha256=p-ky3yjtoSSvdsktPXVy_dx8dHgMeWqUZOtOwwfrk2o,136108
18
+ spacr/io.py,sha256=0cBVmhqMaPkdEXib5Vhp19FC_1qfaK_NgtoImuDuwGU,142664
19
19
  spacr/logger.py,sha256=lJhTqt-_wfAunCPl93xE65Wr9Y1oIHJWaZMjunHUeIw,1538
20
20
  spacr/measure.py,sha256=2lK-ZcTxLM-MpXV1oZnucRD9iz5aprwahRKw9IEqshg,55085
21
21
  spacr/mediar.py,sha256=FwLvbLQW5LQzPgvJZG8Lw7GniA2vbZx6Jv6vIKu7I5c,14743
@@ -28,7 +28,7 @@ spacr/sim.py,sha256=1xKhXimNU3ukzIw-3l9cF3Znc_brW8h20yv8fSTzvss,71173
28
28
  spacr/submodules.py,sha256=Xq4gjvooHN8S7cTk5PIAkd7XD2c7CMVqNpeo8GCvtHc,42489
29
29
  spacr/timelapse.py,sha256=KGfG4L4-QnFfgbF7L6C5wL_3gd_rqr05Foje6RsoTBg,39603
30
30
  spacr/toxo.py,sha256=z2nT5aAze3NUIlwnBQcnkARihDwoPfqOgQIVoUluyK0,25087
31
- spacr/utils.py,sha256=tqIKiSc30xEX0IlfSpoctFJQDVnGHDAX7l1VakRCBuY,220601
31
+ spacr/utils.py,sha256=vvciLh1gH0nsrCWQw3taUcDjxP59wme3gqrejeNO05w,222943
32
32
  spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
33
33
  spacr/resources/MEDIAR/.gitignore,sha256=Ff1q9Nme14JUd-4Q3jZ65aeQ5X4uttptssVDgBVHYo8,152
34
34
  spacr/resources/MEDIAR/LICENSE,sha256=yEj_TRDLUfDpHDNM0StALXIt6mLqSgaV2hcCwa6_TcY,1065
@@ -151,9 +151,9 @@ spacr/resources/icons/umap.png,sha256=dOLF3DeLYy9k0nkUybiZMe1wzHQwLJFRmgccppw-8b
151
151
  spacr/resources/images/plate1_E01_T0001F001L01A01Z01C02.tif,sha256=Tl0ZUfZ_AYAbu0up_nO0tPRtF1BxXhWQ3T3pURBCCRo,7958528
152
152
  spacr/resources/images/plate1_E01_T0001F001L01A02Z01C01.tif,sha256=m8N-V71rA1TT4dFlENNg8s0Q0YEXXs8slIn7yObmZJQ,7958528
153
153
  spacr/resources/images/plate1_E01_T0001F001L01A03Z01C03.tif,sha256=Pbhk7xn-KUP6RSIhJsxQcrHFImBm3GEpLkzx7WOc-5M,7958528
154
- spacr-0.3.61.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
155
- spacr-0.3.61.dist-info/METADATA,sha256=2jlzT9lkaXx01IWlYMYrpf24p48qDHvrRLZm-YUUl-0,6032
156
- spacr-0.3.61.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
157
- spacr-0.3.61.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
158
- spacr-0.3.61.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
159
- spacr-0.3.61.dist-info/RECORD,,
154
+ spacr-0.3.62.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
155
+ spacr-0.3.62.dist-info/METADATA,sha256=Ox14lWGxbXuMW36MriYHppKcZDqD_4HopfbcLAi8dLc,6032
156
+ spacr-0.3.62.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
157
+ spacr-0.3.62.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
158
+ spacr-0.3.62.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
159
+ spacr-0.3.62.dist-info/RECORD,,
File without changes