spacr 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. spacr/__init__.py +19 -3
  2. spacr/cellpose.py +311 -0
  3. spacr/core.py +142 -2495
  4. spacr/deep_spacr.py +151 -29
  5. spacr/gui.py +1 -0
  6. spacr/gui_core.py +74 -63
  7. spacr/gui_elements.py +110 -5
  8. spacr/gui_utils.py +346 -6
  9. spacr/io.py +631 -51
  10. spacr/logger.py +28 -9
  11. spacr/measure.py +107 -95
  12. spacr/mediar.py +0 -5
  13. spacr/ml.py +964 -0
  14. spacr/openai.py +37 -0
  15. spacr/plot.py +281 -16
  16. spacr/resources/data/lopit.csv +3833 -0
  17. spacr/resources/data/toxoplasma_metadata.csv +8843 -0
  18. spacr/resources/icons/convert.png +0 -0
  19. spacr/resources/{models/cp/toxo_plaque_cyto_e25000_X1120_Y1120.CP_model → icons/dna_matrix.mp4} +0 -0
  20. spacr/sequencing.py +241 -1311
  21. spacr/settings.py +129 -43
  22. spacr/sim.py +0 -2
  23. spacr/submodules.py +348 -0
  24. spacr/timelapse.py +0 -2
  25. spacr/toxo.py +233 -0
  26. spacr/utils.py +275 -173
  27. {spacr-0.3.0.dist-info → spacr-0.3.2.dist-info}/METADATA +7 -1
  28. {spacr-0.3.0.dist-info → spacr-0.3.2.dist-info}/RECORD +32 -33
  29. spacr/chris.py +0 -50
  30. spacr/graph_learning.py +0 -340
  31. spacr/resources/MEDIAR/.git +0 -1
  32. spacr/resources/MEDIAR_weights/.DS_Store +0 -0
  33. spacr/resources/icons/.DS_Store +0 -0
  34. spacr/resources/icons/spacr_logo_rotation.gif +0 -0
  35. spacr/resources/models/cp/toxo_plaque_cyto_e25000_X1120_Y1120.CP_model_settings.csv +0 -23
  36. spacr/resources/models/cp/toxo_pv_lumen.CP_model +0 -0
  37. spacr/sim_app.py +0 -0
  38. {spacr-0.3.0.dist-info → spacr-0.3.2.dist-info}/LICENSE +0 -0
  39. {spacr-0.3.0.dist-info → spacr-0.3.2.dist-info}/WHEEL +0 -0
  40. {spacr-0.3.0.dist-info → spacr-0.3.2.dist-info}/entry_points.txt +0 -0
  41. {spacr-0.3.0.dist-info → spacr-0.3.2.dist-info}/top_level.txt +0 -0
spacr/utils.py CHANGED
@@ -1,6 +1,7 @@
1
- import sys, os, re, sqlite3, torch, torchvision, random, string, shutil, cv2, tarfile, glob, psutil, platform, gzip, subprocess
1
+ import os, re, sqlite3, torch, torchvision, random, string, shutil, cv2, tarfile, glob, psutil, platform, gzip, subprocess, time, requests
2
2
 
3
3
  import numpy as np
4
+ import pandas as pd
4
5
  from cellpose import models as cp_models
5
6
  from cellpose import denoise
6
7
 
@@ -14,7 +15,6 @@ from skimage.segmentation import clear_border
14
15
 
15
16
  from collections import defaultdict, OrderedDict
16
17
  from PIL import Image
17
- import pandas as pd
18
18
  from statsmodels.stats.outliers_influence import variance_inflation_factor
19
19
  from statsmodels.stats.stattools import durbin_watson
20
20
  import statsmodels.formula.api as smf
@@ -24,7 +24,7 @@ from itertools import combinations
24
24
  from functools import reduce
25
25
  from IPython.display import display
26
26
 
27
- from multiprocessing import Pool, cpu_count
27
+ from multiprocessing import Pool, cpu_count, set_start_method, get_start_method
28
28
  from concurrent.futures import ThreadPoolExecutor
29
29
 
30
30
  import torch.nn as nn
@@ -33,63 +33,44 @@ from torch.utils.checkpoint import checkpoint
33
33
  from torch.utils.data import Subset
34
34
  from torch.autograd import grad
35
35
 
36
+ from torchvision import models
37
+ from torchvision.models.resnet import ResNet18_Weights, ResNet34_Weights, ResNet50_Weights, ResNet101_Weights, ResNet152_Weights
38
+ import torchvision.transforms as transforms
39
+ from torchvision.models import resnet50
40
+
36
41
  import seaborn as sns
37
42
  import matplotlib.pyplot as plt
38
43
  from matplotlib.offsetbox import OffsetImage, AnnotationBbox
39
44
 
45
+ from scipy import stats
40
46
  import scipy.ndimage as ndi
41
47
  from scipy.spatial import distance
42
- from scipy.stats import fisher_exact
48
+ from scipy.stats import fisher_exact, f_oneway, kruskal
43
49
  from scipy.ndimage.filters import gaussian_filter
44
50
  from scipy.spatial import ConvexHull
45
51
  from scipy.interpolate import splprep, splev
46
52
  from scipy.ndimage import binary_dilation
47
53
 
48
- from sklearn.preprocessing import StandardScaler
49
54
  from skimage.exposure import rescale_intensity
50
55
  from sklearn.metrics import auc, precision_recall_curve
51
56
  from sklearn.model_selection import train_test_split
52
57
  from sklearn.linear_model import Lasso, Ridge
53
- from sklearn.preprocessing import OneHotEncoder
54
- from sklearn.cluster import KMeans
55
- from sklearn.preprocessing import StandardScaler
56
- from sklearn.cluster import DBSCAN
57
- from sklearn.cluster import KMeans
58
+ from sklearn.preprocessing import OneHotEncoder, StandardScaler
59
+ from sklearn.cluster import KMeans, DBSCAN
58
60
  from sklearn.manifold import TSNE
59
- from sklearn.cluster import KMeans
60
61
  from sklearn.decomposition import PCA
61
-
62
- import umap.umap_ as umap
63
-
64
- from torchvision import models
65
- from torchvision.models.resnet import ResNet18_Weights, ResNet34_Weights, ResNet50_Weights, ResNet101_Weights, ResNet152_Weights
66
- import torchvision.transforms as transforms
67
-
68
62
  from sklearn.ensemble import RandomForestClassifier
69
- from sklearn.preprocessing import StandardScaler
70
- from scipy.stats import f_oneway, kruskal
71
- from sklearn.cluster import KMeans
72
- from scipy import stats
73
63
 
74
- from .logger import log_function_call
75
- from multiprocessing import set_start_method, get_start_method
64
+ from huggingface_hub import list_repo_files
76
65
 
77
- import torch
78
- import torchvision.transforms as transforms
79
- from torchvision.models import resnet50
80
- from PIL import Image
81
- import numpy as np
82
- import umap
83
- import pandas as pd
84
- from sklearn.ensemble import RandomForestClassifier
85
- from sklearn.preprocessing import StandardScaler
86
- from scipy.stats import f_oneway, kruskal
87
- from sklearn.cluster import KMeans
88
- from scipy import stats
66
+ import umap.umap_ as umap
67
+ #import umap
89
68
 
90
- def save_settings(settings, name='settings'):
69
+ def save_settings(settings, name='settings', show=False):
91
70
 
92
71
  settings_df = pd.DataFrame(list(settings.items()), columns=['Key', 'Value'])
72
+ if show:
73
+ display(settings_df)
93
74
  settings_csv = os.path.join(settings['src'],'settings',f'{name}.csv')
94
75
  os.makedirs(os.path.join(settings['src'],'settings'), exist_ok=True)
95
76
  settings_df.to_csv(settings_csv, index=False)
@@ -303,7 +284,10 @@ def _get_cellpose_batch_size():
303
284
  except Exception as e:
304
285
  return 8
305
286
 
306
- def _extract_filename_metadata(filenames, src, images_by_key, regular_expression, metadata_type='cellvoyager', pick_slice=False, skip_mode='01'):
287
+ def _extract_filename_metadata_v1(filenames, src, regular_expression, metadata_type='cellvoyager', pick_slice=False, skip_mode='01'):
288
+
289
+ images_by_key = defaultdict(list)
290
+
307
291
  for filename in filenames:
308
292
  match = regular_expression.match(filename)
309
293
  if match:
@@ -328,7 +312,6 @@ def _extract_filename_metadata(filenames, src, images_by_key, regular_expression
328
312
  if metadata_type =='cq1':
329
313
  orig_wellID = wellID
330
314
  wellID = _convert_cq1_well_id(wellID)
331
- #clear_output(wait=True)
332
315
  print(f'Converted Well ID: {orig_wellID} to {wellID}', end='\r', flush=True)
333
316
 
334
317
  if pick_slice:
@@ -338,7 +321,7 @@ def _extract_filename_metadata(filenames, src, images_by_key, regular_expression
338
321
  sliceid = '00'
339
322
 
340
323
  if mode == skip_mode:
341
- continue
324
+ continue
342
325
 
343
326
  key = (plate, well, field, channel, mode)
344
327
  with Image.open(os.path.join(src, filename)) as img:
@@ -351,6 +334,57 @@ def _extract_filename_metadata(filenames, src, images_by_key, regular_expression
351
334
 
352
335
  return images_by_key
353
336
 
337
+ def _extract_filename_metadata(filenames, src, regular_expression, metadata_type='cellvoyager', pick_slice=False, skip_mode='01'):
338
+
339
+ images_by_key = defaultdict(list)
340
+
341
+ for filename in filenames:
342
+ match = regular_expression.match(filename)
343
+ if match:
344
+ try:
345
+ try:
346
+ plate = match.group('plateID')
347
+ except:
348
+ plate = os.path.basename(src)
349
+
350
+ well = match.group('wellID')
351
+ field = match.group('fieldID')
352
+ channel = match.group('chanID')
353
+ mode = None
354
+
355
+ if well[0].isdigit():
356
+ well = str(_safe_int_convert(well))
357
+ if field[0].isdigit():
358
+ field = str(_safe_int_convert(field))
359
+ if channel[0].isdigit():
360
+ channel = str(_safe_int_convert(channel))
361
+
362
+ if metadata_type =='cq1':
363
+ orig_wellID = wellID
364
+ wellID = _convert_cq1_well_id(wellID)
365
+ print(f'Converted Well ID: {orig_wellID} to {wellID}', end='\r', flush=True)
366
+
367
+ if pick_slice:
368
+ try:
369
+ mode = match.group('AID')
370
+ except IndexError:
371
+ sliceid = '00'
372
+
373
+ if mode == skip_mode:
374
+ continue
375
+
376
+ key = (plate, well, field, channel, mode)
377
+ file_path = os.path.join(src, filename) # Store the full path
378
+ images_by_key[key].append(file_path)
379
+
380
+ except IndexError:
381
+ print(f"Could not extract information from filename {filename} using provided regex")
382
+ else:
383
+ print(f"Filename {filename} did not match provided regex")
384
+ continue
385
+
386
+ return images_by_key
387
+
354
388
  def mask_object_count(mask):
355
389
  """
356
390
  Counts the number of objects in a given mask.
@@ -441,7 +475,7 @@ def _generate_representative_images(db_path, cells=['HeLa'], cell_loc=None, path
441
475
  from .plot import _plot_images_on_grid
442
476
 
443
477
  df = _read_and_join_tables(db_path)
444
- df = _annotate_conditions(df, cells, cell_loc, pathogens, pathogen_loc, treatments,treatment_loc)
478
+ df = annotate_conditions(df, cells, cell_loc, pathogens, pathogen_loc, treatments, treatment_loc)
445
479
 
446
480
  if update_db:
447
481
  _update_database_with_merged_info(db_path, df, table='png_list', columns=['pathogen', 'treatment', 'host_cells', 'condition', 'prcfo'])
@@ -487,34 +521,6 @@ def _map_values(row, values, locs):
487
521
  return value_dict.get(row[type_], None)
488
522
  return values[0] if values else None
489
523
 
490
- def _annotate_conditions(df, cells=['HeLa'], cell_loc=None, pathogens=['rh'], pathogen_loc=None, treatments=['cm'], treatment_loc=None):
491
- """
492
- Annotates conditions in the given DataFrame based on the provided parameters.
493
-
494
- Args:
495
- df (pandas.DataFrame): The DataFrame to annotate.
496
- cells (list, optional): The list of host cell types. Defaults to ['HeLa'].
497
- cell_loc (list, optional): The list of location identifiers for host cells. Defaults to None.
498
- pathogens (list, optional): The list of pathogens. Defaults to ['rh'].
499
- pathogen_loc (list, optional): The list of location identifiers for pathogens. Defaults to None.
500
- treatments (list, optional): The list of treatments. Defaults to ['cm'].
501
- treatment_loc (list, optional): The list of location identifiers for treatments. Defaults to None.
502
-
503
- Returns:
504
- pandas.DataFrame: The annotated DataFrame with the 'host_cells', 'pathogen', 'treatment', and 'condition' columns.
505
- """
506
-
507
-
508
- # Apply mappings or defaults
509
- df['host_cells'] = [cells[0]] * len(df) if cell_loc is None else df.apply(_map_values, args=(cells, cell_loc), axis=1)
510
- df['pathogen'] = [pathogens[0]] * len(df) if pathogen_loc is None else df.apply(_map_values, args=(pathogens, pathogen_loc), axis=1)
511
- df['treatment'] = [treatments[0]] * len(df) if treatment_loc is None else df.apply(_map_values, args=(treatments, treatment_loc), axis=1)
512
-
513
- # Construct condition column
514
- df['condition'] = df.apply(lambda row: '_'.join(filter(None, [row.get('pathogen'), row.get('treatment')])), axis=1)
515
- df['condition'] = df['condition'].apply(lambda x: x if x else 'none')
516
- return df
517
-
518
524
  def is_list_of_lists(var):
519
525
  if isinstance(var, list) and all(isinstance(i, list) for i in var):
520
526
  return True
@@ -1083,67 +1089,74 @@ def _get_cellpose_channels(src, nucleus_channel, pathogen_channel, cell_channel)
1083
1089
  else:
1084
1090
  cellpose_channels['cell'] = [0,0]
1085
1091
  return cellpose_channels
1086
-
1087
- def annotate_conditions(df, cells=['HeLa'], cell_loc=None, pathogens=['rh'], pathogen_loc=None, treatments=['cm'], treatment_loc=None, types = ['col','col','col']):
1092
+
1093
+ def annotate_conditions(df, cells=None, cell_loc=None, pathogens=None, pathogen_loc=None, treatments=None, treatment_loc=None):
1088
1094
  """
1089
- Annotates conditions in a DataFrame based on specified criteria.
1095
+ Annotates conditions in a DataFrame based on specified criteria and combines them into a 'condition' column.
1096
+ NaN is used for missing values, and they are excluded from the 'condition' column.
1090
1097
 
1091
1098
  Args:
1092
1099
  df (pandas.DataFrame): The DataFrame to annotate.
1093
- cells (list, optional): List of host cell types. Defaults to ['HeLa'].
1094
- cell_loc (list, optional): List of corresponding values for each host cell type. Defaults to None.
1095
- pathogens (list, optional): List of pathogens. Defaults to ['rh'].
1096
- pathogen_loc (list, optional): List of corresponding values for each pathogen. Defaults to None.
1097
- treatments (list, optional): List of treatments. Defaults to ['cm'].
1098
- treatment_loc (list, optional): List of corresponding values for each treatment. Defaults to None.
1099
- types (list, optional): List of column types for host cells, pathogens, and treatments. Defaults to ['col','col','col'].
1100
+ cells (list/str, optional): Host cell types. Defaults to None.
1101
+ cell_loc (list of lists, optional): Values for each host cell type. Defaults to None.
1102
+ pathogens (list/str, optional): Pathogens. Defaults to None.
1103
+ pathogen_loc (list of lists, optional): Values for each pathogen. Defaults to None.
1104
+ treatments (list/str, optional): Treatments. Defaults to None.
1105
+ treatment_loc (list of lists, optional): Values for each treatment. Defaults to None.
1100
1106
 
1101
1107
  Returns:
1102
- pandas.DataFrame: The annotated DataFrame.
1108
+ pandas.DataFrame: Annotated DataFrame with a combined 'condition' column.
1103
1109
  """
1110
+
1111
+ def _get_type(val):
1112
+ """Determine if a value maps to 'row' or 'col'."""
1113
+ if isinstance(val, str) and val.startswith('c'):
1114
+ return 'col'
1115
+ elif isinstance(val, str) and val.startswith('r'):
1116
+ return 'row'
1117
+ return None
1104
1118
 
1105
- # Function to apply to each row
1106
- def _map_values(row, dict_, type_='col'):
1119
+ def _map_or_default(column_name, values, loc, df):
1107
1120
  """
1108
- Maps the values in a row to corresponding keys in a dictionary.
1121
+ Consolidates the logic for mapping values or assigning defaults when loc is None.
1109
1122
 
1110
1123
  Args:
1111
- row (dict): The row containing the values to be mapped.
1112
- dict_ (dict): The dictionary containing the mapping values.
1113
- type_ (str, optional): The type of mapping to perform. Defaults to 'col'.
1114
-
1115
- Returns:
1116
- str: The mapped value if found, otherwise None.
1124
+ column_name (str): The column in the DataFrame to annotate.
1125
+ values (list/str): The list of values or a single string to annotate.
1126
+ loc (list of lists): Location mapping for the values, or None if not used.
1127
+ df (pandas.DataFrame): The DataFrame to modify.
1117
1128
  """
1118
- for values, cols in dict_.items():
1119
- if row[type_] in cols:
1120
- return values
1121
- return None
1129
+ if isinstance(values, str) or (isinstance(values, list) and loc is None):
1130
+ # Assign all rows the first value in the list or the single string
1131
+ df[column_name] = values if isinstance(values, str) else values[0]
1132
+ elif values is not None and loc is not None:
1133
+ # Perform the location-based mapping
1134
+ value_dict = {val: key for key, loc_list in zip(values, loc) for val in loc_list}
1135
+ df[column_name] = np.nan
1136
+ for val, key in value_dict.items():
1137
+ loc_type = _get_type(val)
1138
+ if loc_type:
1139
+ df.loc[df[loc_type] == val, column_name] = key
1140
+
1141
+ # Handle cells, pathogens, and treatments using the consolidated logic
1142
+ _map_or_default('host_cells', cells, cell_loc, df)
1143
+ _map_or_default('pathogen', pathogens, pathogen_loc, df)
1144
+ _map_or_default('treatment', treatments, treatment_loc, df)
1145
+
1146
+ # Conditionally fill NaN for pathogen and treatment columns if applicable
1147
+ if pathogens is not None:
1148
+ df['pathogen'].fillna(np.nan, inplace=True)
1149
+ if treatments is not None:
1150
+ df['treatment'].fillna(np.nan, inplace=True)
1151
+
1152
+ # Create the 'condition' column by excluding any NaN values, safely checking if 'host_cells', 'pathogen', and 'treatment' exist
1153
+ df['condition'] = df.apply(
1154
+ lambda x: '_'.join([str(v) for v in [x.get('host_cells'), x.get('pathogen'), x.get('treatment')] if pd.notna(v)]),
1155
+ axis=1
1156
+ )
1122
1157
 
1123
- if cell_loc is None:
1124
- df['host_cells'] = cells[0]
1125
- else:
1126
- cells_dict = dict(zip(cells, cell_loc))
1127
- df['host_cells'] = df.apply(lambda row: _map_values(row, cells_dict, type_=types[0]), axis=1)
1128
- if pathogen_loc is None:
1129
- if pathogens != None:
1130
- df['pathogen'] = 'none'
1131
- else:
1132
- pathogens_dict = dict(zip(pathogens, pathogen_loc))
1133
- df['pathogen'] = df.apply(lambda row: _map_values(row, pathogens_dict, type_=types[1]), axis=1)
1134
- if treatment_loc is None:
1135
- df['treatment'] = 'cm'
1136
- else:
1137
- treatments_dict = dict(zip(treatments, treatment_loc))
1138
- df['treatment'] = df.apply(lambda row: _map_values(row, treatments_dict, type_=types[2]), axis=1)
1139
- if pathogens != None:
1140
- df['condition'] = df['pathogen']+'_'+df['treatment']
1141
- else:
1142
- df['condition'] = df['treatment']
1143
1158
  return df
1144
-
1145
1159
 
1146
-
1147
1160
  def _split_data(df, group_by, object_type):
1148
1161
  """
1149
1162
  Splits the input dataframe into numeric and non-numeric parts, groups them by the specified column,
@@ -1949,9 +1962,10 @@ def add_images_to_tar(paths_chunk, tar_path, total_images):
1949
1962
  tar.add(img_path, arcname=arcname)
1950
1963
  with lock:
1951
1964
  counter.value += 1
1952
- if counter.value % 100 == 0: # Print every 100 updates
1953
- progress = (counter.value / total_images) * 100
1954
- print(f"Progress: {counter.value}/{total_images} ({progress:.2f}%)", end='\r', file=sys.stdout, flush=True)
1965
+ if counter.value % 10 == 0: # Print every 100 updates
1966
+ #progress = (counter.value / total_images) * 100
1967
+ #print(f"Progress: {counter.value}/{total_images} ({progress:.2f}%)", end='\r', file=sys.stdout, flush=True)
1968
+ print_progress(counter.value, total_images, n_jobs=1, time_ls=None, batch_size=None, operation_type="generating .tar dataset")
1955
1969
  except FileNotFoundError:
1956
1970
  print(f"File not found: {img_path}")
1957
1971
 
@@ -2068,52 +2082,6 @@ def check_multicollinearity(x):
2068
2082
  vif_data["VIF"] = [variance_inflation_factor(x.values, i) for i in range(x.shape[1])]
2069
2083
  return vif_data
2070
2084
 
2071
- def generate_dependent_variable(df, dv_loc, pc_min=0.95, nc_max=0.05, agg_type='mean'):
2072
-
2073
- from .plot import _plot_histograms_and_stats, _plot_plates
2074
-
2075
- def qstring_to_float(qstr):
2076
- number = int(qstr[1:]) # Remove the "q" and convert the rest to an integer
2077
- return number / 100.0
2078
-
2079
- print("Unique values in plate:", df['plate'].unique())
2080
- dv_cell_loc = f'{dv_loc}/dv_cell.csv'
2081
- dv_well_loc = f'{dv_loc}/dv_well.csv'
2082
-
2083
- df['pred'] = 1-df['pred'] #if you swiched pc and nc
2084
- df = df[(df['pred'] <= nc_max) | (df['pred'] >= pc_min)]
2085
-
2086
- if 'prc' not in df.columns:
2087
- df['prc'] = df['plate'] + '_' + df['row'] + '_' + df['col']
2088
-
2089
- if agg_type.startswith('q'):
2090
- val = qstring_to_float(agg_type)
2091
- agg_type = lambda x: x.quantile(val)
2092
-
2093
- # Aggregating for mean prediction and total count
2094
- df_grouped = df.groupby('prc').agg(
2095
- pred=('pred', agg_type),
2096
- recruitment=('recruitment', agg_type),
2097
- count_prc=('prc', 'size'),
2098
- #count_above_95=('pred', lambda x: (x > 0.95).sum()),
2099
- mean_pathogen_area=('pathogen_area', 'mean')
2100
- )
2101
-
2102
- df_cell = df[['prc', 'pred', 'pathogen_area', 'recruitment']]
2103
-
2104
- df_cell.to_csv(dv_cell_loc, index=True, header=True, mode='w')
2105
- df_grouped.to_csv(dv_well_loc, index=True, header=True, mode='w') # Changed from loc to dv_loc
2106
- display(df)
2107
- _plot_histograms_and_stats(df)
2108
- df_grouped = df_grouped.sort_values(by='count_prc', ascending=True)
2109
- display(df_grouped)
2110
- print('pred')
2111
- _plot_plates(df=df_cell, variable='pred', grouping='mean', min_max='allq', cmap='viridis')
2112
- print('recruitment')
2113
- _plot_plates(df=df_cell, variable='recruitment', grouping='mean', min_max='allq', cmap='viridis')
2114
-
2115
- return df_grouped
2116
-
2117
2085
  def lasso_reg(merged_df, alpha_value=0.01, reg_type='lasso'):
2118
2086
  # Separate predictors and response
2119
2087
  X = merged_df[['gene', 'grna', 'plate', 'row', 'column']]
@@ -3592,13 +3560,48 @@ def plot_grid(cluster_images, colors, figuresize, black_background, verbose):
3592
3560
  plt.show()
3593
3561
  return grid_fig
3594
3562
 
3595
- def correct_paths(df, base_path):
3563
+ def generate_path_list_from_db(db_path, file_metadata):
3564
+
3565
+ all_paths = []
3566
+
3567
+ # Connect to the database and retrieve the image paths
3568
+ print(f"Reading DataBase: {db_path}")
3569
+ try:
3570
+ with sqlite3.connect(db_path) as conn:
3571
+ cursor = conn.cursor()
3572
+ if file_metadata:
3573
+ if isinstance(file_metadata, str):
3574
+ cursor.execute("SELECT png_path FROM png_list WHERE png_path LIKE ?", (f"%{file_metadata}%",))
3575
+ else:
3576
+ cursor.execute("SELECT png_path FROM png_list")
3596
3577
 
3597
- if 'png_path' not in df.columns:
3598
- print("No 'png_path' column found in the dataframe.")
3599
- return df, None
3578
+ while True:
3579
+ rows = cursor.fetchmany(1000)
3580
+ if not rows:
3581
+ break
3582
+ all_paths.extend([row[0] for row in rows])
3583
+
3584
+ except sqlite3.Error as e:
3585
+ print(f"Database error: {e}")
3586
+ return
3587
+ except Exception as e:
3588
+ print(f"Error: {e}")
3589
+ return
3600
3590
 
3601
- image_paths = df['png_path'].to_list()
3591
+ return all_paths
3592
+
3593
+ def correct_paths(df, base_path):
3594
+
3595
+ if isinstance(df, pd.DataFrame):
3596
+
3597
+ if 'png_path' not in df.columns:
3598
+ print("No 'png_path' column found in the dataframe.")
3599
+ return df, None
3600
+ else:
3601
+ image_paths = df['png_path'].to_list()
3602
+
3603
+ elif isinstance(df, list):
3604
+ image_paths = df
3602
3605
 
3603
3606
  adjusted_image_paths = []
3604
3607
  for path in image_paths:
@@ -3612,9 +3615,11 @@ def correct_paths(df, base_path):
3612
3615
  else:
3613
3616
  adjusted_image_paths.append(path)
3614
3617
 
3615
- df['png_path'] = adjusted_image_paths
3616
- image_paths = df['png_path'].to_list()
3617
- return df, image_paths
3618
+ if isinstance(df, pd.DataFrame):
3619
+ df['png_path'] = adjusted_image_paths
3620
+ return df, adjusted_image_paths
3621
+ else:
3622
+ return adjusted_image_paths
3618
3623
 
3619
3624
  def delete_folder(folder_path):
3620
3625
  if os.path.exists(folder_path) and os.path.isdir(folder_path):
@@ -4422,7 +4427,7 @@ def convert_and_relabel_masks(folder_path):
4422
4427
 
4423
4428
  def correct_masks(src):
4424
4429
 
4425
- from .utils import _load_and_concatenate_arrays
4430
+ from .io import _load_and_concatenate_arrays
4426
4431
 
4427
4432
  cell_path = os.path.join(src,'norm_channel_stack', 'cell_mask_stack')
4428
4433
  convert_and_relabel_masks(cell_path)
@@ -4445,4 +4450,101 @@ def get_cuda_version():
4445
4450
  except (subprocess.CalledProcessError, FileNotFoundError):
4446
4451
  return None
4447
4452
 
4453
+ def all_elements_match(list1, list2):
4454
+ # Check if all elements in list1 are in list2
4455
+ return all(element in list2 for element in list1)
4456
+
4457
+ def prepare_batch_for_segmentation(batch):
4458
+ # Ensure the batch is of dtype float32
4459
+ if batch.dtype != np.float32:
4460
+ batch = batch.astype(np.float32)
4461
+
4462
+ # Normalize each image in the batch
4463
+ for i in range(batch.shape[0]):
4464
+ if batch[i].max() > 1:
4465
+ batch[i] = batch[i] / batch[i].max()
4466
+
4467
+ return batch
4468
+
4469
+ def check_index(df, elements=5, split_char='_'):
4470
+ problematic_indices = []
4471
+ for idx in df.index:
4472
+ parts = str(idx).split(split_char)
4473
+ if len(parts) != elements:
4474
+ problematic_indices.append(idx)
4475
+ if problematic_indices:
4476
+ print("Indices that cannot be separated into 5 parts:")
4477
+ for idx in problematic_indices:
4478
+ print(idx)
4479
+ raise ValueError(f"Found {len(problematic_indices)} problematic indices that do not split into {elements} parts.")
4480
+
4481
+ # Define the mapping function
4482
+ def map_condition(col_value, neg='c1', pos='c2', mix='c3'):
4483
+ if col_value == neg:
4484
+ return 'neg'
4485
+ elif col_value == pos:
4486
+ return 'pos'
4487
+ elif col_value == mix:
4488
+ return 'mix'
4489
+ else:
4490
+ return 'screen'
4491
+
4492
+ def download_models(repo_id="einarolafsson/models", local_dir=None, retries=5, delay=5):
4493
+ """
4494
+ Downloads all model files from Hugging Face and stores them in the specified local directory.
4495
+
4496
+ Args:
4497
+ repo_id (str): The repository ID on Hugging Face (default is 'einarolafsson/models').
4498
+ local_dir (str): The local directory where models will be saved. Defaults to '/home/carruthers/Desktop/test'.
4499
+ retries (int): Number of retry attempts in case of failure.
4500
+ delay (int): Delay in seconds between retries.
4448
4501
 
4502
+ Returns:
4503
+ str: The local path to the downloaded models.
4504
+ """
4505
+ # Create the local directory if it doesn't exist
4506
+ if not os.path.exists(local_dir):
4507
+ os.makedirs(local_dir)
4508
+ elif len(os.listdir(local_dir)) > 0:
4509
+ print(f"Models already downloaded to: {local_dir}")
4510
+ return local_dir
4511
+
4512
+ attempt = 0
4513
+ while attempt < retries:
4514
+ try:
4515
+ # List all files in the repo
4516
+ files = list_repo_files(repo_id, repo_type="dataset")
4517
+ print(f"Files in repository: {files}") # Debugging print to check file list
4518
+
4519
+ # Download each file
4520
+ for file_name in files:
4521
+ for download_attempt in range(retries):
4522
+ try:
4523
+ url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/{file_name}?download=true"
4524
+ print(f"Downloading file from: {url}") # Debugging
4525
+
4526
+ response = requests.get(url, stream=True)
4527
+ print(f"HTTP response status: {response.status_code}") # Debugging
4528
+ response.raise_for_status()
4529
+
4530
+ # Save the file locally
4531
+ local_file_path = os.path.join(local_dir, os.path.basename(file_name))
4532
+ with open(local_file_path, 'wb') as file:
4533
+ for chunk in response.iter_content(chunk_size=8192):
4534
+ file.write(chunk)
4535
+ print(f"Downloaded model file: {file_name} to {local_file_path}")
4536
+ break # Exit the retry loop if successful
4537
+ except (requests.HTTPError, requests.Timeout) as e:
4538
+ print(f"Error downloading {file_name}: {e}. Retrying in {delay} seconds...")
4539
+ time.sleep(delay)
4540
+ else:
4541
+ raise Exception(f"Failed to download {file_name} after multiple attempts.")
4542
+
4543
+ return local_dir # Return the directory where models are saved
4544
+
4545
+ except (requests.HTTPError, requests.Timeout) as e:
4546
+ print(f"Error downloading files: {e}. Retrying in {delay} seconds...")
4547
+ attempt += 1
4548
+ time.sleep(delay)
4549
+
4550
+ raise Exception("Failed to download model files after multiple attempts.")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spacr
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary: Spatial phenotype analysis of crisp screens (SpaCr)
5
5
  Home-page: https://github.com/EinarOlafsson/spacr
6
6
  Author: Einar Birnir Olafsson
@@ -8,6 +8,7 @@ Author-email: olafsson@med.umich.com
8
8
  Classifier: Programming Language :: Python :: 3
9
9
  Classifier: License :: OSI Approved :: MIT License
10
10
  Classifier: Operating System :: OS Independent
11
+ Description-Content-Type: text/x-rst
11
12
  License-File: LICENSE
12
13
  Requires-Dist: torch<3.0,>=2.0
13
14
  Requires-Dist: torchvision<1.0,>=0.1
@@ -58,6 +59,10 @@ Requires-Dist: segmentation-models-pytorch>=0.3.3
58
59
  Requires-Dist: tifffile>=2023.4.12
59
60
  Requires-Dist: tqdm>=4.65.0
60
61
  Requires-Dist: wandb>=0.16.2
62
+ Requires-Dist: openai<2.0,>=1.50.2
63
+ Requires-Dist: nd2reader<4.0,>=3.3.0
64
+ Requires-Dist: czifile
65
+ Requires-Dist: adjustText<2.0,>=1.2.0
61
66
  Requires-Dist: huggingface-hub<0.25,>=0.24.0
62
67
  Provides-Extra: dev
63
68
  Requires-Dist: pytest<3.11,>=3.9; extra == "dev"
@@ -115,6 +120,7 @@ If using Windows, switch to Linux—it's free, open-source, and better.
115
120
  Before installing SpaCr on OSX ensure OpenMP is installed::
116
121
 
117
122
  brew install libomp
123
+ brew install hdf5
118
124
 
119
125
  SpaCr GUI requires Tkinter. On Linux, ensure Tkinter is installed. (Tkinter is included with the standard Python installation on macOS and Windows)::
120
126