spacr 0.3.61__py3-none-any.whl → 0.3.62__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/io.py +131 -3
- spacr/utils.py +58 -2
- {spacr-0.3.61.dist-info → spacr-0.3.62.dist-info}/METADATA +1 -1
- {spacr-0.3.61.dist-info → spacr-0.3.62.dist-info}/RECORD +8 -8
- {spacr-0.3.61.dist-info → spacr-0.3.62.dist-info}/LICENSE +0 -0
- {spacr-0.3.61.dist-info → spacr-0.3.62.dist-info}/WHEEL +0 -0
- {spacr-0.3.61.dist-info → spacr-0.3.62.dist-info}/entry_points.txt +0 -0
- {spacr-0.3.61.dist-info → spacr-0.3.62.dist-info}/top_level.txt +0 -0
spacr/io.py
CHANGED
@@ -1777,7 +1777,7 @@ def _read_and_join_tables(db_path, table_names=['cell', 'cytoplasm', 'nucleus',
|
|
1777
1777
|
png_list_df['cell_id'] = png_list_df['cell_id'].str[1:].astype(int)
|
1778
1778
|
png_list_df.rename(columns={'cell_id': 'object_label'}, inplace=True)
|
1779
1779
|
if 'cell' in dataframes:
|
1780
|
-
join_cols = ['object_label', 'plate', 'row_name', 'column_name']
|
1780
|
+
join_cols = ['object_label', 'plate', 'row_name', 'column_name','field']
|
1781
1781
|
dataframes['cell'] = pd.merge(dataframes['cell'], png_list_df, on=join_cols, how='left')
|
1782
1782
|
else:
|
1783
1783
|
print("Cell table not found in database tables.")
|
@@ -2276,7 +2276,7 @@ def _read_db(db_loc, tables):
|
|
2276
2276
|
conn.close() # Close the connection
|
2277
2277
|
return dfs
|
2278
2278
|
|
2279
|
-
def
|
2279
|
+
def _read_and_merge_data_v1(locs, tables, verbose=False, nuclei_limit=False, pathogen_limit=False):
|
2280
2280
|
|
2281
2281
|
from .utils import _split_data
|
2282
2282
|
|
@@ -2443,7 +2443,135 @@ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=False, pathog
|
|
2443
2443
|
if 'pathogen' in tables:
|
2444
2444
|
obj_df_ls.append(pathogens)
|
2445
2445
|
|
2446
|
-
return merged_df, obj_df_ls
|
2446
|
+
return merged_df, obj_df_ls
|
2447
|
+
|
2448
|
+
def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=10, pathogen_limit=10):
|
2449
|
+
from .io import _read_db
|
2450
|
+
from .utils import _split_data
|
2451
|
+
|
2452
|
+
# Initialize an empty dictionary to store DataFrames by table name
|
2453
|
+
data_dict = {table: [] for table in tables}
|
2454
|
+
|
2455
|
+
# Extract plate DataFrames
|
2456
|
+
for loc in locs:
|
2457
|
+
db_dfs = _read_db(loc, tables)
|
2458
|
+
for table, df in zip(tables, db_dfs):
|
2459
|
+
data_dict[table].append(df)
|
2460
|
+
|
2461
|
+
# Concatenate rows across locations for each table
|
2462
|
+
for table, dfs in data_dict.items():
|
2463
|
+
if dfs:
|
2464
|
+
data_dict[table] = pd.concat(dfs, axis=0)
|
2465
|
+
if verbose:
|
2466
|
+
print(f"{table}: {len(data_dict[table])}")
|
2467
|
+
|
2468
|
+
# Initialize merged DataFrame with 'cells' if available
|
2469
|
+
merged_df = pd.DataFrame()
|
2470
|
+
|
2471
|
+
# Process each table
|
2472
|
+
if 'cell' in data_dict:
|
2473
|
+
cells = data_dict['cell'].copy()
|
2474
|
+
cells = cells.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
|
2475
|
+
cells = cells.assign(prcfo=lambda x: x['prcf'] + '_' + x['object_label'])
|
2476
|
+
cells_g_df, metadata = _split_data(cells, 'prcfo', 'object_label')
|
2477
|
+
merged_df = cells_g_df.copy()
|
2478
|
+
if verbose:
|
2479
|
+
print(f'cells: {len(cells)}, cells grouped: {len(cells_g_df)}')
|
2480
|
+
|
2481
|
+
if 'cytoplasm' in data_dict:
|
2482
|
+
cytoplasms = data_dict['cytoplasm'].copy()
|
2483
|
+
cytoplasms = cytoplasms.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
|
2484
|
+
cytoplasms = cytoplasms.assign(prcfo=lambda x: x['prcf'] + '_' + x['object_label'])
|
2485
|
+
|
2486
|
+
if not 'cell' in data_dict:
|
2487
|
+
merged_df, metadata = _split_data(cytoplasms, 'prcfo', 'object_label')
|
2488
|
+
|
2489
|
+
if verbose:
|
2490
|
+
print(f'nucleus: {len(cytoplasms)}, cytoplasms grouped: {len(merged_df)}')
|
2491
|
+
|
2492
|
+
else:
|
2493
|
+
cytoplasms_g_df, _ = _split_data(cytoplasms, 'prcfo', 'object_label')
|
2494
|
+
merged_df = merged_df.merge(cytoplasms_g_df, left_index=True, right_index=True)
|
2495
|
+
|
2496
|
+
if verbose:
|
2497
|
+
print(f'cytoplasms: {len(cytoplasms)}, cytoplasms grouped: {len(cytoplasms_g_df)}')
|
2498
|
+
|
2499
|
+
if 'nucleus' in data_dict:
|
2500
|
+
nucleus = data_dict['nucleus'].copy()
|
2501
|
+
nucleus = nucleus.dropna(subset=['cell_id'])
|
2502
|
+
nucleus = nucleus.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
|
2503
|
+
nucleus = nucleus.assign(cell_id=lambda x: 'o' + x['cell_id'].astype(int).astype(str))
|
2504
|
+
nucleus = nucleus.assign(prcfo=lambda x: x['prcf'] + '_' + x['cell_id'])
|
2505
|
+
nucleus['nucleus_prcfo_count'] = nucleus.groupby('prcfo')['prcfo'].transform('count')
|
2506
|
+
if not nuclei_limit:
|
2507
|
+
nucleus = nucleus[nucleus['nucleus_prcfo_count'] == 1]
|
2508
|
+
|
2509
|
+
if all(key not in data_dict for key in ['cell', 'cytoplasm']):
|
2510
|
+
merged_df, metadata = _split_data(nucleus, 'prcfo', 'cell_id')
|
2511
|
+
|
2512
|
+
if verbose:
|
2513
|
+
print(f'nucleus: {len(nucleus)}, nucleus grouped: {len(merged_df)}')
|
2514
|
+
|
2515
|
+
else:
|
2516
|
+
nucleus_g_df, _ = _split_data(nucleus, 'prcfo', 'cell_id')
|
2517
|
+
merged_df = merged_df.merge(nucleus_g_df, left_index=True, right_index=True)
|
2518
|
+
|
2519
|
+
if verbose:
|
2520
|
+
print(f'nucleus: {len(nucleus)}, nucleus grouped: {len(nucleus_g_df)}')
|
2521
|
+
|
2522
|
+
if 'pathogen' in data_dict:
|
2523
|
+
pathogens = data_dict['pathogen'].copy()
|
2524
|
+
pathogens = pathogens.dropna(subset=['cell_id'])
|
2525
|
+
pathogens = pathogens.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
|
2526
|
+
pathogens = pathogens.assign(cell_id=lambda x: 'o' + x['cell_id'].astype(int).astype(str))
|
2527
|
+
pathogens = pathogens.assign(prcfo=lambda x: x['prcf'] + '_' + x['cell_id'])
|
2528
|
+
pathogens['pathogen_prcfo_count'] = pathogens.groupby('prcfo')['prcfo'].transform('count')
|
2529
|
+
|
2530
|
+
if isinstance(pathogen_limit, bool) and not pathogen_limit:
|
2531
|
+
pathogens = pathogens[pathogens['pathogen_prcfo_count'] <= 1]
|
2532
|
+
elif isinstance(pathogen_limit, (float, int)):
|
2533
|
+
pathogens = pathogens[pathogens['pathogen_prcfo_count'] <= int(pathogen_limit)]
|
2534
|
+
|
2535
|
+
if all(key not in data_dict for key in ['cell', 'cytoplasm', 'nucleus']):
|
2536
|
+
merged_df, metadata = _split_data(pathogens, 'prcfo', 'cell_id')
|
2537
|
+
|
2538
|
+
if verbose:
|
2539
|
+
print(f'pathogens: {len(pathogens)}, pathogens grouped: {len(merged_df)}')
|
2540
|
+
|
2541
|
+
else:
|
2542
|
+
pathogens_g_df, _ = _split_data(pathogens, 'prcfo', 'cell_id')
|
2543
|
+
merged_df = merged_df.merge(pathogens_g_df, left_index=True, right_index=True)
|
2544
|
+
|
2545
|
+
if verbose:
|
2546
|
+
print(f'pathogens: {len(pathogens)}, pathogens grouped: {len(pathogens_g_df)}')
|
2547
|
+
|
2548
|
+
if 'png_list' in data_dict:
|
2549
|
+
png_list = data_dict['png_list'].copy()
|
2550
|
+
png_list_g_df_numeric, png_list_g_df_non_numeric = _split_data(png_list, 'prcfo', 'cell_id')
|
2551
|
+
png_list_g_df_non_numeric.drop(columns=['plate','row_name','column_name','field','file_name','cell_id', 'prcf'], inplace=True)
|
2552
|
+
if verbose:
|
2553
|
+
print(f'png_list: {len(png_list)}, png_list grouped: {len(png_list_g_df_numeric)}')
|
2554
|
+
merged_df = merged_df.merge(png_list_g_df_numeric, left_index=True, right_index=True)
|
2555
|
+
merged_df = merged_df.merge(png_list_g_df_non_numeric, left_index=True, right_index=True)
|
2556
|
+
|
2557
|
+
# Add prc (plate row column) and prcfo (plate row column field object) columns
|
2558
|
+
metadata = metadata.assign(prc=lambda x: x['plate'] + '_' + x['row_name'] + '_' + x['column_name'])
|
2559
|
+
cells_well = metadata.groupby('prc')['object_label'].nunique().reset_index(name='cells_per_well')
|
2560
|
+
metadata = metadata.merge(cells_well, on='prc')
|
2561
|
+
metadata = metadata.assign(prcfo=lambda x: x['plate'] + '_' + x['row_name'] + '_' + x['column_name'] + '_' + x['field'] + '_' + x['object_label'])
|
2562
|
+
metadata.set_index('prcfo', inplace=True)
|
2563
|
+
|
2564
|
+
# Merge metadata with final merged DataFrame
|
2565
|
+
merged_df = metadata.merge(merged_df, left_index=True, right_index=True).dropna(axis=1)
|
2566
|
+
merged_df.drop(columns=['label_list_morphology', 'label_list_intensity'], errors='ignore', inplace=True)
|
2567
|
+
|
2568
|
+
if verbose:
|
2569
|
+
print(f'Generated dataframe with: {len(merged_df.columns)} columns and {len(merged_df)} rows')
|
2570
|
+
|
2571
|
+
# Prepare object DataFrames for output
|
2572
|
+
obj_df_ls = [data_dict[table] for table in ['cell', 'cytoplasm', 'nucleus', 'pathogen'] if table in data_dict]
|
2573
|
+
|
2574
|
+
return merged_df, obj_df_ls
|
2447
2575
|
|
2448
2576
|
def _read_mask(mask_path):
|
2449
2577
|
mask = imageio2.imread(mask_path)
|
spacr/utils.py
CHANGED
@@ -1371,7 +1371,7 @@ def annotate_conditions(df, cells=None, cell_loc=None, pathogens=None, pathogen_
|
|
1371
1371
|
|
1372
1372
|
return df
|
1373
1373
|
|
1374
|
-
def
|
1374
|
+
def _split_data_v1(df, group_by, object_type):
|
1375
1375
|
"""
|
1376
1376
|
Splits the input dataframe into numeric and non-numeric parts, groups them by the specified column,
|
1377
1377
|
and returns the grouped dataframes.
|
@@ -1385,16 +1385,72 @@ def _split_data(df, group_by, object_type):
|
|
1385
1385
|
grouped_numeric (pandas.DataFrame): The grouped dataframe containing numeric columns.
|
1386
1386
|
grouped_non_numeric (pandas.DataFrame): The grouped dataframe containing non-numeric columns.
|
1387
1387
|
"""
|
1388
|
+
|
1389
|
+
if 'prcf' not in df.columns:
|
1390
|
+
try:
|
1391
|
+
df['prcf'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column_name'].astype(str) + '_' + df['field'].astype(str)
|
1392
|
+
except Exception as e:
|
1393
|
+
print(e)
|
1394
|
+
|
1388
1395
|
df['prcfo'] = df['prcf'] + '_' + df[object_type]
|
1389
1396
|
df = df.set_index(group_by, inplace=False)
|
1390
1397
|
|
1391
1398
|
df_numeric = df.select_dtypes(include=np.number)
|
1392
1399
|
df_non_numeric = df.select_dtypes(exclude=np.number)
|
1400
|
+
|
1401
|
+
[]
|
1393
1402
|
|
1394
1403
|
grouped_numeric = df_numeric.groupby(df_numeric.index).mean()
|
1395
1404
|
grouped_non_numeric = df_non_numeric.groupby(df_non_numeric.index).first()
|
1396
1405
|
|
1397
1406
|
return pd.DataFrame(grouped_numeric), pd.DataFrame(grouped_non_numeric)
|
1407
|
+
|
1408
|
+
def _split_data(df, group_by, object_type):
|
1409
|
+
"""
|
1410
|
+
Splits the input dataframe into numeric and non-numeric parts, groups them by the specified column,
|
1411
|
+
and returns the grouped dataframes with conditional aggregation.
|
1412
|
+
|
1413
|
+
Parameters:
|
1414
|
+
df (pandas.DataFrame): The input dataframe.
|
1415
|
+
group_by (str): The column name to group the dataframes by.
|
1416
|
+
object_type (str): The column name to concatenate with 'prcf' to create a new column 'prcfo'.
|
1417
|
+
|
1418
|
+
Returns:
|
1419
|
+
grouped_numeric (pandas.DataFrame): The grouped dataframe containing numeric columns with conditional aggregation.
|
1420
|
+
grouped_non_numeric (pandas.DataFrame): The grouped dataframe containing non-numeric columns.
|
1421
|
+
"""
|
1422
|
+
|
1423
|
+
# Ensure 'prcf' column exists by concatenating specific columns
|
1424
|
+
if 'prcf' not in df.columns:
|
1425
|
+
try:
|
1426
|
+
df['prcf'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column_name'].astype(str) + '_' + df['field'].astype(str)
|
1427
|
+
except Exception as e:
|
1428
|
+
print(e)
|
1429
|
+
|
1430
|
+
# Create the 'prcfo' column
|
1431
|
+
df['prcfo'] = df['prcf'] + '_' + df[object_type]
|
1432
|
+
df = df.set_index(group_by, inplace=False)
|
1433
|
+
|
1434
|
+
# Split the DataFrame into numeric and non-numeric parts
|
1435
|
+
df_numeric = df.select_dtypes(include=np.number)
|
1436
|
+
df_non_numeric = df.select_dtypes(exclude=np.number)
|
1437
|
+
|
1438
|
+
# Define keywords for columns to be summed instead of averaged
|
1439
|
+
sum_keywords = ['area', 'perimeter', 'convex_area', 'bbox_area', 'filled_area', 'major_axis_length', 'minor_axis_length', 'equivalent_diameter']
|
1440
|
+
|
1441
|
+
# Create a dictionary for custom aggregation
|
1442
|
+
agg_dict = {}
|
1443
|
+
for column in df_numeric.columns:
|
1444
|
+
if any(keyword in column for keyword in sum_keywords):
|
1445
|
+
agg_dict[column] = 'sum'
|
1446
|
+
else:
|
1447
|
+
agg_dict[column] = 'mean'
|
1448
|
+
|
1449
|
+
# Apply custom aggregation
|
1450
|
+
grouped_numeric = df_numeric.groupby(df_numeric.index).agg(agg_dict)
|
1451
|
+
grouped_non_numeric = df_non_numeric.groupby(df_non_numeric.index).first()
|
1452
|
+
|
1453
|
+
return pd.DataFrame(grouped_numeric), pd.DataFrame(grouped_non_numeric)
|
1398
1454
|
|
1399
1455
|
def _calculate_recruitment(df, channel):
|
1400
1456
|
"""
|
@@ -5184,7 +5240,7 @@ def group_feature_class(df, feature_groups=['cell', 'cytoplasm', 'nucleus', 'pat
|
|
5184
5240
|
else:
|
5185
5241
|
return None
|
5186
5242
|
|
5187
|
-
from
|
5243
|
+
from .plot import spacrGraph
|
5188
5244
|
|
5189
5245
|
df[name] = df['feature'].apply(lambda x: find_feature_class(x, feature_groups))
|
5190
5246
|
|
@@ -15,7 +15,7 @@ spacr/gui.py,sha256=ARyn9Q_g8HoP-cXh1nzMLVFCKqthY4v2u9yORyaQqQE,8230
|
|
15
15
|
spacr/gui_core.py,sha256=N7R7yvfK_dJhOReM_kW3Ci8Bokhi1OzsxeKqvSGdvV4,41460
|
16
16
|
spacr/gui_elements.py,sha256=EKlvEg_4_je7jciEdR3NTgPrcTraowa2e2RUt-xqd6M,138254
|
17
17
|
spacr/gui_utils.py,sha256=u9RoIOWpAXFEOnUlLpMQZrc1pWSg6omZsJMIhJdRv_g,41211
|
18
|
-
spacr/io.py,sha256=
|
18
|
+
spacr/io.py,sha256=0cBVmhqMaPkdEXib5Vhp19FC_1qfaK_NgtoImuDuwGU,142664
|
19
19
|
spacr/logger.py,sha256=lJhTqt-_wfAunCPl93xE65Wr9Y1oIHJWaZMjunHUeIw,1538
|
20
20
|
spacr/measure.py,sha256=2lK-ZcTxLM-MpXV1oZnucRD9iz5aprwahRKw9IEqshg,55085
|
21
21
|
spacr/mediar.py,sha256=FwLvbLQW5LQzPgvJZG8Lw7GniA2vbZx6Jv6vIKu7I5c,14743
|
@@ -28,7 +28,7 @@ spacr/sim.py,sha256=1xKhXimNU3ukzIw-3l9cF3Znc_brW8h20yv8fSTzvss,71173
|
|
28
28
|
spacr/submodules.py,sha256=Xq4gjvooHN8S7cTk5PIAkd7XD2c7CMVqNpeo8GCvtHc,42489
|
29
29
|
spacr/timelapse.py,sha256=KGfG4L4-QnFfgbF7L6C5wL_3gd_rqr05Foje6RsoTBg,39603
|
30
30
|
spacr/toxo.py,sha256=z2nT5aAze3NUIlwnBQcnkARihDwoPfqOgQIVoUluyK0,25087
|
31
|
-
spacr/utils.py,sha256=
|
31
|
+
spacr/utils.py,sha256=vvciLh1gH0nsrCWQw3taUcDjxP59wme3gqrejeNO05w,222943
|
32
32
|
spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
|
33
33
|
spacr/resources/MEDIAR/.gitignore,sha256=Ff1q9Nme14JUd-4Q3jZ65aeQ5X4uttptssVDgBVHYo8,152
|
34
34
|
spacr/resources/MEDIAR/LICENSE,sha256=yEj_TRDLUfDpHDNM0StALXIt6mLqSgaV2hcCwa6_TcY,1065
|
@@ -151,9 +151,9 @@ spacr/resources/icons/umap.png,sha256=dOLF3DeLYy9k0nkUybiZMe1wzHQwLJFRmgccppw-8b
|
|
151
151
|
spacr/resources/images/plate1_E01_T0001F001L01A01Z01C02.tif,sha256=Tl0ZUfZ_AYAbu0up_nO0tPRtF1BxXhWQ3T3pURBCCRo,7958528
|
152
152
|
spacr/resources/images/plate1_E01_T0001F001L01A02Z01C01.tif,sha256=m8N-V71rA1TT4dFlENNg8s0Q0YEXXs8slIn7yObmZJQ,7958528
|
153
153
|
spacr/resources/images/plate1_E01_T0001F001L01A03Z01C03.tif,sha256=Pbhk7xn-KUP6RSIhJsxQcrHFImBm3GEpLkzx7WOc-5M,7958528
|
154
|
-
spacr-0.3.
|
155
|
-
spacr-0.3.
|
156
|
-
spacr-0.3.
|
157
|
-
spacr-0.3.
|
158
|
-
spacr-0.3.
|
159
|
-
spacr-0.3.
|
154
|
+
spacr-0.3.62.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
|
155
|
+
spacr-0.3.62.dist-info/METADATA,sha256=Ox14lWGxbXuMW36MriYHppKcZDqD_4HopfbcLAi8dLc,6032
|
156
|
+
spacr-0.3.62.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
|
157
|
+
spacr-0.3.62.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
|
158
|
+
spacr-0.3.62.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
|
159
|
+
spacr-0.3.62.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|