spacr 0.3.70__py3-none-any.whl → 0.3.71__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/io.py CHANGED
@@ -2854,6 +2854,12 @@ def generate_loaders(src, mode='train', image_size=224, batch_size=32, classes=[
2854
2854
  else:
2855
2855
  print(f'mode:{mode} is not valid, use mode = train or test')
2856
2856
  return
2857
+
2858
+ class_1_path = os.path.join(data_dir, classes[0])
2859
+ class_2_path = os.path.join(data_dir, classes[1])
2860
+ if not os.path.exists(class_1_path) or not os.path.exists(class_2_path):
2861
+ print(f'One or more classes not found in {data_dir}')
2862
+ print (f'Possible class names are {os.listdir(data_dir)}')
2857
2863
 
2858
2864
  data = spacrDataset(data_dir, classes, transform=transform, shuffle=shuffle, pin_memory=pin_memory)
2859
2865
  num_workers = n_jobs if n_jobs is not None else 0
@@ -2922,7 +2928,8 @@ def generate_training_dataset(settings):
2922
2928
  # Function to get the smallest class size based on the dataset mode
2923
2929
  def get_smallest_class_size(df, settings, dataset_mode):
2924
2930
  if dataset_mode == 'metadata':
2925
- sizes = [len(df[df['metadata_based_class'] == c]) for c in settings['classes']]
2931
+ sizes = [len(df[df['condition'] == c]) for c in settings['class_metadata']]
2932
+ print(f'Class sizes: {sizes}')
2926
2933
  elif dataset_mode == 'annotation':
2927
2934
  sizes = [len(class_paths) for class_paths in df]
2928
2935
  size = min(sizes)
@@ -2977,15 +2984,29 @@ def generate_training_dataset(settings):
2977
2984
  def metadata_based_selection(db_path, settings):
2978
2985
  class_paths_ls = []
2979
2986
  df = filter_png_list(db_path, settings, tables=settings['tables'])
2987
+
2988
+ df = annotate_conditions(df,
2989
+ cells=None,
2990
+ cell_loc=None,
2991
+ pathogens=settings['metadata_item_1_name'],
2992
+ pathogen_loc=settings['metadata_item_1_value'],
2993
+ treatments=settings['metadata_item_2_name'],
2994
+ treatment_loc=settings['metadata_item_2_value'])
2980
2995
 
2981
- df['metadata_based_class'] = pd.NA
2982
- for i, class_ in enumerate(settings['classes']):
2983
- ls = settings['class_metadata'][i]
2984
- df.loc[df[settings['metadata_type_by']].isin(ls), 'metadata_based_class'] = class_
2996
+ #if settings['metadata_type_by'] == 'condition':
2997
+ df = df.dropna(subset=['condition'])
2998
+
2999
+ display(df)
3000
+
3001
+ #df['metadata_based_class'] = pd.NA
3002
+ #for i, class_ in enumerate(settings['classes']):
3003
+ # ls = settings['class_metadata'][i]
3004
+ # df.loc[df[settings['metadata_type_by']].isin(ls), 'metadata_based_class'] = class_
2985
3005
 
2986
3006
  size = get_smallest_class_size(df, settings, 'metadata')
2987
- for class_ in settings['classes']:
2988
- class_temp_df = df[df['metadata_based_class'] == class_]
3007
+
3008
+ for class_ in settings['class_metadata']:
3009
+ class_temp_df = df[df['condition'] == class_]
2989
3010
  print(f'Found {len(class_temp_df)} images for class {class_}')
2990
3011
  class_paths_temp = class_temp_df['png_path'].tolist()
2991
3012
 
@@ -3001,10 +3022,11 @@ def generate_training_dataset(settings):
3001
3022
  def annotation_based_selection(db_path, dst, settings):
3002
3023
  class_paths_ls = training_dataset_from_annotation(db_path, dst, settings['annotation_column'], annotated_classes=settings['annotated_classes'])
3003
3024
 
3004
- #size = get_smallest_class_size(class_paths_ls, settings, 'annotation')
3005
- #for i, class_paths in enumerate(class_paths_ls):
3006
- # if len(class_paths) > size:
3007
- # class_paths_ls[i] = random.sample(class_paths, size)
3025
+ return class_paths_ls
3026
+
3027
+ # Metadata-Annotation-based selection logic
3028
+ def metadata_annotation_based_selection(db_path, dst, settings):
3029
+ class_paths_ls = training_dataset_from_annotation_metadata(db_path, dst, settings['annotation_column'], annotated_classes=settings['annotated_classes'], metadata_type_by=settings['metadata_type_by'], class_metadata=settings['class_metadata'])
3008
3030
 
3009
3031
  return class_paths_ls
3010
3032
 
@@ -3053,6 +3075,14 @@ def generate_training_dataset(settings):
3053
3075
 
3054
3076
  elif settings['dataset_mode'] == 'measurement':
3055
3077
  class_paths_ls = measurement_based_selection(settings, db_path, tables=settings['tables'])
3078
+
3079
+ elif settings['dataset_mode'] == 'metadata_annotation':
3080
+ class_paths_ls = metadata_annotation_based_selection(db_path, dst, settings)
3081
+
3082
+ else:
3083
+ print(f"Invalid dataset mode: {settings['dataset_mode']}")
3084
+ print(f"Valid options are: 'annotation', 'metadata', 'measurement', 'metadata_annotation'")
3085
+ return
3056
3086
 
3057
3087
  if class_path_list is None:
3058
3088
  class_path_list = [[] for _ in range(len(class_paths_ls))]
@@ -3063,7 +3093,7 @@ def generate_training_dataset(settings):
3063
3093
 
3064
3094
  # Generate and return training and testing directories
3065
3095
  print('class_path_list',len(class_path_list))
3066
- train_class_dir, test_class_dir = generate_dataset_from_lists(dst, class_data=class_path_list, classes=settings['classes'], test_split=settings['test_split'])
3096
+ train_class_dir, test_class_dir = generate_dataset_from_lists(dst, class_data=class_path_list, classes=settings['class_metadata'], test_split=settings['test_split'])
3067
3097
 
3068
3098
  return train_class_dir, test_class_dir
3069
3099
 
@@ -3104,11 +3134,16 @@ def training_dataset_from_annotation(db_path, dst, annotation_column='test', ann
3104
3134
  alt_class_paths = [path for path, annotation in all_paths if annotation != target_class]
3105
3135
  print('Alternative paths available:', len(alt_class_paths))
3106
3136
 
3107
- # Randomly sample an equal number of images for the second class
3108
- sampled_alt_class_paths = random.sample(alt_class_paths, min(count_target_class, len(alt_class_paths)))
3109
- print(f'Sampled {len(sampled_alt_class_paths)} alternative images for balancing')
3137
+ # Sample the same number of images for both classes
3138
+ balanced_count = min(count_target_class, len(alt_class_paths))
3139
+ print(f'Sampling {balanced_count} images for each class')
3140
+
3141
+ # Resample target class to match the smaller size
3142
+ sampled_target_class_paths = random.sample(class_paths[0], balanced_count)
3143
+ sampled_alt_class_paths = random.sample(alt_class_paths, balanced_count)
3110
3144
 
3111
- # Append this list as the second class
3145
+ # Update class paths
3146
+ class_paths[0] = sampled_target_class_paths
3112
3147
  class_paths.append(sampled_alt_class_paths)
3113
3148
 
3114
3149
  print(f'Generated a list of lists from annotation of {len(class_paths)} classes')
@@ -3117,7 +3152,7 @@ def training_dataset_from_annotation(db_path, dst, annotation_column='test', ann
3117
3152
 
3118
3153
  return class_paths
3119
3154
 
3120
- def training_dataset_from_annotation_v2(db_path, dst, annotation_column='test', annotated_classes=(1, 2)):
3155
+ def training_dataset_from_annotation_metadata(db_path, dst, annotation_column='test', annotated_classes=(1, 2), metadata_type_by='column_name', class_metadata=['c1','c2']):
3121
3156
  all_paths = []
3122
3157
 
3123
3158
  # Connect to the database and retrieve the image paths and annotations
@@ -3125,7 +3160,7 @@ def training_dataset_from_annotation_v2(db_path, dst, annotation_column='test',
3125
3160
  with sqlite3.connect(db_path) as conn:
3126
3161
  cursor = conn.cursor()
3127
3162
  # Retrieve all paths and annotations from the database
3128
- query = f"SELECT png_path, {annotation_column} FROM png_list"
3163
+ query = f"SELECT png_path, {annotation_column}, row_name, column_name FROM png_list"
3129
3164
  cursor.execute(query)
3130
3165
 
3131
3166
  while True:
@@ -3137,6 +3172,20 @@ def training_dataset_from_annotation_v2(db_path, dst, annotation_column='test',
3137
3172
 
3138
3173
  print('Total paths retrieved:', len(all_paths))
3139
3174
 
3175
+ # Filter all_paths by metadata_type_by and class_metadata
3176
+ filtered_paths = []
3177
+ metadata_index = {'row_name': 2, 'column_name': 3}.get(metadata_type_by, None)
3178
+ if metadata_index is None:
3179
+ raise ValueError(f"Invalid metadata_type_by value: {metadata_type_by}. Must be 'row_name' or 'column_name'. {class_metadata} must be a list formatted as ['c1', 'c2'] or ['r1', 'r2']")
3180
+
3181
+ for row in all_paths:
3182
+ if row[metadata_index] in class_metadata:
3183
+ filtered_paths.append(row)
3184
+
3185
+ print('Total filtered paths:', len(filtered_paths))
3186
+ #all_paths = filtered_paths
3187
+ all_paths = [(row[0], row[1]) for row in filtered_paths]
3188
+
3140
3189
  # Filter paths based on annotated_classes
3141
3190
  class_paths = []
3142
3191
  for class_ in annotated_classes:
@@ -3154,11 +3203,16 @@ def training_dataset_from_annotation_v2(db_path, dst, annotation_column='test',
3154
3203
  alt_class_paths = [path for path, annotation in all_paths if annotation != target_class]
3155
3204
  print('Alternative paths available:', len(alt_class_paths))
3156
3205
 
3157
- # Randomly sample an equal number of images for the second class
3158
- sampled_alt_class_paths = random.sample(alt_class_paths, min(count_target_class, len(alt_class_paths)))
3159
- print(f'Sampled {len(sampled_alt_class_paths)} alternative images for balancing')
3206
+ # Sample the same number of images for both classes
3207
+ balanced_count = min(count_target_class, len(alt_class_paths))
3208
+ print(f'Sampling {balanced_count} images for each class')
3209
+
3210
+ # Resample target class to match the smaller size
3211
+ sampled_target_class_paths = random.sample(class_paths[0], balanced_count)
3212
+ sampled_alt_class_paths = random.sample(alt_class_paths, balanced_count)
3160
3213
 
3161
- # Append this list as the second class
3214
+ # Update class paths
3215
+ class_paths[0] = sampled_target_class_paths
3162
3216
  class_paths.append(sampled_alt_class_paths)
3163
3217
 
3164
3218
  print(f'Generated a list of lists from annotation of {len(class_paths)} classes')
spacr/settings.py CHANGED
@@ -338,7 +338,11 @@ def set_generate_training_dataset_defaults(settings):
338
338
  settings.setdefault('dataset_mode','metadata')
339
339
  settings.setdefault('annotation_column','test')
340
340
  settings.setdefault('annotated_classes',[1,2])
341
- settings.setdefault('classes',['nc','pc'])
341
+ settings.setdefault('class_metadata',['nc','pc'])
342
+ settings.setdefault('metadata_item_1_name',['nc','pc'])
343
+ settings.setdefault('metadata_item_1_value',[['c19','c2'],['c3','c4']])
344
+ settings.setdefault('metadata_item_2_name',['sample1','sample2'])
345
+ settings.setdefault('metadata_item_2_value',[['r1','r2'],['r3','r4']])
342
346
  settings.setdefault('size',224)
343
347
  settings.setdefault('test_split',0.1)
344
348
  settings.setdefault('class_metadata',[['c1'],['c2']])
spacr/utils.py CHANGED
@@ -1368,6 +1368,7 @@ def annotate_conditions(df, cells=None, cell_loc=None, pathogens=None, pathogen_
1368
1368
  lambda x: '_'.join([str(v) for v in [x.get('host_cells'), x.get('pathogen'), x.get('treatment')] if pd.notna(v)]),
1369
1369
  axis=1
1370
1370
  )
1371
+ df.loc[df['condition'] == '', 'condition'] = pd.NA
1371
1372
 
1372
1373
  return df
1373
1374
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spacr
3
- Version: 0.3.70
3
+ Version: 0.3.71
4
4
  Summary: Spatial phenotype analysis of crisp screens (SpaCr)
5
5
  Home-page: https://github.com/EinarOlafsson/spacr
6
6
  Author: Einar Birnir Olafsson
@@ -15,7 +15,7 @@ spacr/gui.py,sha256=ARyn9Q_g8HoP-cXh1nzMLVFCKqthY4v2u9yORyaQqQE,8230
15
15
  spacr/gui_core.py,sha256=N7R7yvfK_dJhOReM_kW3Ci8Bokhi1OzsxeKqvSGdvV4,41460
16
16
  spacr/gui_elements.py,sha256=EKlvEg_4_je7jciEdR3NTgPrcTraowa2e2RUt-xqd6M,138254
17
17
  spacr/gui_utils.py,sha256=u9RoIOWpAXFEOnUlLpMQZrc1pWSg6omZsJMIhJdRv_g,41211
18
- spacr/io.py,sha256=ActzerMS0NC1-MIffGTFBdKcqL1T72d3VjfieTta3O4,143101
18
+ spacr/io.py,sha256=LF6lpphw7GSeuoHQijPykjKNF56wNTFEWFZuDQp3O6Q,145739
19
19
  spacr/logger.py,sha256=lJhTqt-_wfAunCPl93xE65Wr9Y1oIHJWaZMjunHUeIw,1538
20
20
  spacr/measure.py,sha256=2lK-ZcTxLM-MpXV1oZnucRD9iz5aprwahRKw9IEqshg,55085
21
21
  spacr/mediar.py,sha256=FwLvbLQW5LQzPgvJZG8Lw7GniA2vbZx6Jv6vIKu7I5c,14743
@@ -23,13 +23,13 @@ spacr/ml.py,sha256=h0IrXoNnyNzZLPYbtZPFI6c4Qeu1gH8R3iUz_O7-ar0,78114
23
23
  spacr/openai.py,sha256=5vBZ3Jl2llYcW3oaTEXgdyCB2aJujMUIO5K038z7w_A,1246
24
24
  spacr/plot.py,sha256=gXC7y3uT4sx8KRODeSFWQG_A1CylsuJ5B7HYe_un6so,165177
25
25
  spacr/sequencing.py,sha256=ClUfwPPK6rNUbUuiEkzcwakzVyDKKUMv9ricrxT8qQY,25227
26
- spacr/settings.py,sha256=wZcqdTWaRus27wn9P0EGyftcJn_i0IwlM9pyeCVqxr8,80173
26
+ spacr/settings.py,sha256=14PFxw3YK9tUqbaC6BqfbrWk3sN7gyTZAAI8KNy5KBA,80461
27
27
  spacr/sim.py,sha256=1xKhXimNU3ukzIw-3l9cF3Znc_brW8h20yv8fSTzvss,71173
28
28
  spacr/stats.py,sha256=mbhwsyIqt5upsSD346qGjdCw7CFBa0tIS7zHU9e0jNI,9536
29
29
  spacr/submodules.py,sha256=SK8YEs850LAx30YAiwap7ecLpp1_p-bci6H-Or0GLoA,55500
30
30
  spacr/timelapse.py,sha256=KGfG4L4-QnFfgbF7L6C5wL_3gd_rqr05Foje6RsoTBg,39603
31
31
  spacr/toxo.py,sha256=z2nT5aAze3NUIlwnBQcnkARihDwoPfqOgQIVoUluyK0,25087
32
- spacr/utils.py,sha256=zojZlZtGwwDVDY0fgRt5XViVuJLuxadRO1IYctWm_SQ,221885
32
+ spacr/utils.py,sha256=LX2Hu6QC-yG9ZVBiM2dkSN9yytCB0eTTRGfExiZzYzE,221940
33
33
  spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
34
34
  spacr/resources/MEDIAR/.gitignore,sha256=Ff1q9Nme14JUd-4Q3jZ65aeQ5X4uttptssVDgBVHYo8,152
35
35
  spacr/resources/MEDIAR/LICENSE,sha256=yEj_TRDLUfDpHDNM0StALXIt6mLqSgaV2hcCwa6_TcY,1065
@@ -152,9 +152,9 @@ spacr/resources/icons/umap.png,sha256=dOLF3DeLYy9k0nkUybiZMe1wzHQwLJFRmgccppw-8b
152
152
  spacr/resources/images/plate1_E01_T0001F001L01A01Z01C02.tif,sha256=Tl0ZUfZ_AYAbu0up_nO0tPRtF1BxXhWQ3T3pURBCCRo,7958528
153
153
  spacr/resources/images/plate1_E01_T0001F001L01A02Z01C01.tif,sha256=m8N-V71rA1TT4dFlENNg8s0Q0YEXXs8slIn7yObmZJQ,7958528
154
154
  spacr/resources/images/plate1_E01_T0001F001L01A03Z01C03.tif,sha256=Pbhk7xn-KUP6RSIhJsxQcrHFImBm3GEpLkzx7WOc-5M,7958528
155
- spacr-0.3.70.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
156
- spacr-0.3.70.dist-info/METADATA,sha256=152VlHisIA_E2F9NYFd_pqDqVxqpGZ07qUDzb7BTnPc,6032
157
- spacr-0.3.70.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
158
- spacr-0.3.70.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
159
- spacr-0.3.70.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
160
- spacr-0.3.70.dist-info/RECORD,,
155
+ spacr-0.3.71.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
156
+ spacr-0.3.71.dist-info/METADATA,sha256=2qa3TRT-ux_8Gw57GLmzvPJWctnRhfy9H4crTMaqT6Y,6032
157
+ spacr-0.3.71.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
158
+ spacr-0.3.71.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
159
+ spacr-0.3.71.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
160
+ spacr-0.3.71.dist-info/RECORD,,
File without changes