spacr 0.3.70__py3-none-any.whl → 0.3.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/io.py +76 -22
- spacr/settings.py +5 -1
- spacr/utils.py +1 -0
- {spacr-0.3.70.dist-info → spacr-0.3.71.dist-info}/METADATA +1 -1
- {spacr-0.3.70.dist-info → spacr-0.3.71.dist-info}/RECORD +9 -9
- {spacr-0.3.70.dist-info → spacr-0.3.71.dist-info}/LICENSE +0 -0
- {spacr-0.3.70.dist-info → spacr-0.3.71.dist-info}/WHEEL +0 -0
- {spacr-0.3.70.dist-info → spacr-0.3.71.dist-info}/entry_points.txt +0 -0
- {spacr-0.3.70.dist-info → spacr-0.3.71.dist-info}/top_level.txt +0 -0
spacr/io.py
CHANGED
@@ -2854,6 +2854,12 @@ def generate_loaders(src, mode='train', image_size=224, batch_size=32, classes=[
|
|
2854
2854
|
else:
|
2855
2855
|
print(f'mode:{mode} is not valid, use mode = train or test')
|
2856
2856
|
return
|
2857
|
+
|
2858
|
+
class_1_path = os.path.join(data_dir, classes[0])
|
2859
|
+
class_2_path = os.path.join(data_dir, classes[1])
|
2860
|
+
if not os.path.exists(class_1_path) or not os.path.exists(class_2_path):
|
2861
|
+
print(f'One or more classes not found in {data_dir}')
|
2862
|
+
print (f'Possible class names are {os.listdir(data_dir)}')
|
2857
2863
|
|
2858
2864
|
data = spacrDataset(data_dir, classes, transform=transform, shuffle=shuffle, pin_memory=pin_memory)
|
2859
2865
|
num_workers = n_jobs if n_jobs is not None else 0
|
@@ -2922,7 +2928,8 @@ def generate_training_dataset(settings):
|
|
2922
2928
|
# Function to get the smallest class size based on the dataset mode
|
2923
2929
|
def get_smallest_class_size(df, settings, dataset_mode):
|
2924
2930
|
if dataset_mode == 'metadata':
|
2925
|
-
sizes = [len(df[df['
|
2931
|
+
sizes = [len(df[df['condition'] == c]) for c in settings['class_metadata']]
|
2932
|
+
print(f'Class sizes: {sizes}')
|
2926
2933
|
elif dataset_mode == 'annotation':
|
2927
2934
|
sizes = [len(class_paths) for class_paths in df]
|
2928
2935
|
size = min(sizes)
|
@@ -2977,15 +2984,29 @@ def generate_training_dataset(settings):
|
|
2977
2984
|
def metadata_based_selection(db_path, settings):
|
2978
2985
|
class_paths_ls = []
|
2979
2986
|
df = filter_png_list(db_path, settings, tables=settings['tables'])
|
2987
|
+
|
2988
|
+
df = annotate_conditions(df,
|
2989
|
+
cells=None,
|
2990
|
+
cell_loc=None,
|
2991
|
+
pathogens=settings['metadata_item_1_name'],
|
2992
|
+
pathogen_loc=settings['metadata_item_1_value'],
|
2993
|
+
treatments=settings['metadata_item_2_name'],
|
2994
|
+
treatment_loc=settings['metadata_item_2_value'])
|
2980
2995
|
|
2981
|
-
|
2982
|
-
|
2983
|
-
|
2984
|
-
|
2996
|
+
#if settings['metadata_type_by'] == 'condition':
|
2997
|
+
df = df.dropna(subset=['condition'])
|
2998
|
+
|
2999
|
+
display(df)
|
3000
|
+
|
3001
|
+
#df['metadata_based_class'] = pd.NA
|
3002
|
+
#for i, class_ in enumerate(settings['classes']):
|
3003
|
+
# ls = settings['class_metadata'][i]
|
3004
|
+
# df.loc[df[settings['metadata_type_by']].isin(ls), 'metadata_based_class'] = class_
|
2985
3005
|
|
2986
3006
|
size = get_smallest_class_size(df, settings, 'metadata')
|
2987
|
-
|
2988
|
-
|
3007
|
+
|
3008
|
+
for class_ in settings['class_metadata']:
|
3009
|
+
class_temp_df = df[df['condition'] == class_]
|
2989
3010
|
print(f'Found {len(class_temp_df)} images for class {class_}')
|
2990
3011
|
class_paths_temp = class_temp_df['png_path'].tolist()
|
2991
3012
|
|
@@ -3001,10 +3022,11 @@ def generate_training_dataset(settings):
|
|
3001
3022
|
def annotation_based_selection(db_path, dst, settings):
|
3002
3023
|
class_paths_ls = training_dataset_from_annotation(db_path, dst, settings['annotation_column'], annotated_classes=settings['annotated_classes'])
|
3003
3024
|
|
3004
|
-
|
3005
|
-
|
3006
|
-
|
3007
|
-
|
3025
|
+
return class_paths_ls
|
3026
|
+
|
3027
|
+
# Metadata-Annotation-based selection logic
|
3028
|
+
def metadata_annotation_based_selection(db_path, dst, settings):
|
3029
|
+
class_paths_ls = training_dataset_from_annotation_metadata(db_path, dst, settings['annotation_column'], annotated_classes=settings['annotated_classes'], metadata_type_by=settings['metadata_type_by'], class_metadata=settings['class_metadata'])
|
3008
3030
|
|
3009
3031
|
return class_paths_ls
|
3010
3032
|
|
@@ -3053,6 +3075,14 @@ def generate_training_dataset(settings):
|
|
3053
3075
|
|
3054
3076
|
elif settings['dataset_mode'] == 'measurement':
|
3055
3077
|
class_paths_ls = measurement_based_selection(settings, db_path, tables=settings['tables'])
|
3078
|
+
|
3079
|
+
elif settings['dataset_mode'] == 'metadata_annotation':
|
3080
|
+
class_paths_ls = metadata_annotation_based_selection(db_path, dst, settings)
|
3081
|
+
|
3082
|
+
else:
|
3083
|
+
print(f"Invalid dataset mode: {settings['dataset_mode']}")
|
3084
|
+
print(f"Valid options are: 'annotation', 'metadata', 'measurement', 'metadata_annotation'")
|
3085
|
+
return
|
3056
3086
|
|
3057
3087
|
if class_path_list is None:
|
3058
3088
|
class_path_list = [[] for _ in range(len(class_paths_ls))]
|
@@ -3063,7 +3093,7 @@ def generate_training_dataset(settings):
|
|
3063
3093
|
|
3064
3094
|
# Generate and return training and testing directories
|
3065
3095
|
print('class_path_list',len(class_path_list))
|
3066
|
-
train_class_dir, test_class_dir = generate_dataset_from_lists(dst, class_data=class_path_list, classes=settings['
|
3096
|
+
train_class_dir, test_class_dir = generate_dataset_from_lists(dst, class_data=class_path_list, classes=settings['class_metadata'], test_split=settings['test_split'])
|
3067
3097
|
|
3068
3098
|
return train_class_dir, test_class_dir
|
3069
3099
|
|
@@ -3104,11 +3134,16 @@ def training_dataset_from_annotation(db_path, dst, annotation_column='test', ann
|
|
3104
3134
|
alt_class_paths = [path for path, annotation in all_paths if annotation != target_class]
|
3105
3135
|
print('Alternative paths available:', len(alt_class_paths))
|
3106
3136
|
|
3107
|
-
#
|
3108
|
-
|
3109
|
-
print(f'
|
3137
|
+
# Sample the same number of images for both classes
|
3138
|
+
balanced_count = min(count_target_class, len(alt_class_paths))
|
3139
|
+
print(f'Sampling {balanced_count} images for each class')
|
3140
|
+
|
3141
|
+
# Resample target class to match the smaller size
|
3142
|
+
sampled_target_class_paths = random.sample(class_paths[0], balanced_count)
|
3143
|
+
sampled_alt_class_paths = random.sample(alt_class_paths, balanced_count)
|
3110
3144
|
|
3111
|
-
#
|
3145
|
+
# Update class paths
|
3146
|
+
class_paths[0] = sampled_target_class_paths
|
3112
3147
|
class_paths.append(sampled_alt_class_paths)
|
3113
3148
|
|
3114
3149
|
print(f'Generated a list of lists from annotation of {len(class_paths)} classes')
|
@@ -3117,7 +3152,7 @@ def training_dataset_from_annotation(db_path, dst, annotation_column='test', ann
|
|
3117
3152
|
|
3118
3153
|
return class_paths
|
3119
3154
|
|
3120
|
-
def
|
3155
|
+
def training_dataset_from_annotation_metadata(db_path, dst, annotation_column='test', annotated_classes=(1, 2), metadata_type_by='column_name', class_metadata=['c1','c2']):
|
3121
3156
|
all_paths = []
|
3122
3157
|
|
3123
3158
|
# Connect to the database and retrieve the image paths and annotations
|
@@ -3125,7 +3160,7 @@ def training_dataset_from_annotation_v2(db_path, dst, annotation_column='test',
|
|
3125
3160
|
with sqlite3.connect(db_path) as conn:
|
3126
3161
|
cursor = conn.cursor()
|
3127
3162
|
# Retrieve all paths and annotations from the database
|
3128
|
-
query = f"SELECT png_path, {annotation_column} FROM png_list"
|
3163
|
+
query = f"SELECT png_path, {annotation_column}, row_name, column_name FROM png_list"
|
3129
3164
|
cursor.execute(query)
|
3130
3165
|
|
3131
3166
|
while True:
|
@@ -3137,6 +3172,20 @@ def training_dataset_from_annotation_v2(db_path, dst, annotation_column='test',
|
|
3137
3172
|
|
3138
3173
|
print('Total paths retrieved:', len(all_paths))
|
3139
3174
|
|
3175
|
+
# Filter all_paths by metadata_type_by and class_metadata
|
3176
|
+
filtered_paths = []
|
3177
|
+
metadata_index = {'row_name': 2, 'column_name': 3}.get(metadata_type_by, None)
|
3178
|
+
if metadata_index is None:
|
3179
|
+
raise ValueError(f"Invalid metadata_type_by value: {metadata_type_by}. Must be 'row_name' or 'column_name'. {class_metadata} must be a list formatted as ['c1', 'c2'] or ['r1', 'r2']")
|
3180
|
+
|
3181
|
+
for row in all_paths:
|
3182
|
+
if row[metadata_index] in class_metadata:
|
3183
|
+
filtered_paths.append(row)
|
3184
|
+
|
3185
|
+
print('Total filtered paths:', len(filtered_paths))
|
3186
|
+
#all_paths = filtered_paths
|
3187
|
+
all_paths = [(row[0], row[1]) for row in filtered_paths]
|
3188
|
+
|
3140
3189
|
# Filter paths based on annotated_classes
|
3141
3190
|
class_paths = []
|
3142
3191
|
for class_ in annotated_classes:
|
@@ -3154,11 +3203,16 @@ def training_dataset_from_annotation_v2(db_path, dst, annotation_column='test',
|
|
3154
3203
|
alt_class_paths = [path for path, annotation in all_paths if annotation != target_class]
|
3155
3204
|
print('Alternative paths available:', len(alt_class_paths))
|
3156
3205
|
|
3157
|
-
#
|
3158
|
-
|
3159
|
-
print(f'
|
3206
|
+
# Sample the same number of images for both classes
|
3207
|
+
balanced_count = min(count_target_class, len(alt_class_paths))
|
3208
|
+
print(f'Sampling {balanced_count} images for each class')
|
3209
|
+
|
3210
|
+
# Resample target class to match the smaller size
|
3211
|
+
sampled_target_class_paths = random.sample(class_paths[0], balanced_count)
|
3212
|
+
sampled_alt_class_paths = random.sample(alt_class_paths, balanced_count)
|
3160
3213
|
|
3161
|
-
#
|
3214
|
+
# Update class paths
|
3215
|
+
class_paths[0] = sampled_target_class_paths
|
3162
3216
|
class_paths.append(sampled_alt_class_paths)
|
3163
3217
|
|
3164
3218
|
print(f'Generated a list of lists from annotation of {len(class_paths)} classes')
|
spacr/settings.py
CHANGED
@@ -338,7 +338,11 @@ def set_generate_training_dataset_defaults(settings):
|
|
338
338
|
settings.setdefault('dataset_mode','metadata')
|
339
339
|
settings.setdefault('annotation_column','test')
|
340
340
|
settings.setdefault('annotated_classes',[1,2])
|
341
|
-
settings.setdefault('
|
341
|
+
settings.setdefault('class_metadata',['nc','pc'])
|
342
|
+
settings.setdefault('metadata_item_1_name',['nc','pc'])
|
343
|
+
settings.setdefault('metadata_item_1_value',[['c19','c2'],['c3','c4']])
|
344
|
+
settings.setdefault('metadata_item_2_name',['sample1','sample2'])
|
345
|
+
settings.setdefault('metadata_item_2_value',[['r1','r2'],['r3','r4']])
|
342
346
|
settings.setdefault('size',224)
|
343
347
|
settings.setdefault('test_split',0.1)
|
344
348
|
settings.setdefault('class_metadata',[['c1'],['c2']])
|
spacr/utils.py
CHANGED
@@ -1368,6 +1368,7 @@ def annotate_conditions(df, cells=None, cell_loc=None, pathogens=None, pathogen_
|
|
1368
1368
|
lambda x: '_'.join([str(v) for v in [x.get('host_cells'), x.get('pathogen'), x.get('treatment')] if pd.notna(v)]),
|
1369
1369
|
axis=1
|
1370
1370
|
)
|
1371
|
+
df.loc[df['condition'] == '', 'condition'] = pd.NA
|
1371
1372
|
|
1372
1373
|
return df
|
1373
1374
|
|
@@ -15,7 +15,7 @@ spacr/gui.py,sha256=ARyn9Q_g8HoP-cXh1nzMLVFCKqthY4v2u9yORyaQqQE,8230
|
|
15
15
|
spacr/gui_core.py,sha256=N7R7yvfK_dJhOReM_kW3Ci8Bokhi1OzsxeKqvSGdvV4,41460
|
16
16
|
spacr/gui_elements.py,sha256=EKlvEg_4_je7jciEdR3NTgPrcTraowa2e2RUt-xqd6M,138254
|
17
17
|
spacr/gui_utils.py,sha256=u9RoIOWpAXFEOnUlLpMQZrc1pWSg6omZsJMIhJdRv_g,41211
|
18
|
-
spacr/io.py,sha256=
|
18
|
+
spacr/io.py,sha256=LF6lpphw7GSeuoHQijPykjKNF56wNTFEWFZuDQp3O6Q,145739
|
19
19
|
spacr/logger.py,sha256=lJhTqt-_wfAunCPl93xE65Wr9Y1oIHJWaZMjunHUeIw,1538
|
20
20
|
spacr/measure.py,sha256=2lK-ZcTxLM-MpXV1oZnucRD9iz5aprwahRKw9IEqshg,55085
|
21
21
|
spacr/mediar.py,sha256=FwLvbLQW5LQzPgvJZG8Lw7GniA2vbZx6Jv6vIKu7I5c,14743
|
@@ -23,13 +23,13 @@ spacr/ml.py,sha256=h0IrXoNnyNzZLPYbtZPFI6c4Qeu1gH8R3iUz_O7-ar0,78114
|
|
23
23
|
spacr/openai.py,sha256=5vBZ3Jl2llYcW3oaTEXgdyCB2aJujMUIO5K038z7w_A,1246
|
24
24
|
spacr/plot.py,sha256=gXC7y3uT4sx8KRODeSFWQG_A1CylsuJ5B7HYe_un6so,165177
|
25
25
|
spacr/sequencing.py,sha256=ClUfwPPK6rNUbUuiEkzcwakzVyDKKUMv9ricrxT8qQY,25227
|
26
|
-
spacr/settings.py,sha256=
|
26
|
+
spacr/settings.py,sha256=14PFxw3YK9tUqbaC6BqfbrWk3sN7gyTZAAI8KNy5KBA,80461
|
27
27
|
spacr/sim.py,sha256=1xKhXimNU3ukzIw-3l9cF3Znc_brW8h20yv8fSTzvss,71173
|
28
28
|
spacr/stats.py,sha256=mbhwsyIqt5upsSD346qGjdCw7CFBa0tIS7zHU9e0jNI,9536
|
29
29
|
spacr/submodules.py,sha256=SK8YEs850LAx30YAiwap7ecLpp1_p-bci6H-Or0GLoA,55500
|
30
30
|
spacr/timelapse.py,sha256=KGfG4L4-QnFfgbF7L6C5wL_3gd_rqr05Foje6RsoTBg,39603
|
31
31
|
spacr/toxo.py,sha256=z2nT5aAze3NUIlwnBQcnkARihDwoPfqOgQIVoUluyK0,25087
|
32
|
-
spacr/utils.py,sha256=
|
32
|
+
spacr/utils.py,sha256=LX2Hu6QC-yG9ZVBiM2dkSN9yytCB0eTTRGfExiZzYzE,221940
|
33
33
|
spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
|
34
34
|
spacr/resources/MEDIAR/.gitignore,sha256=Ff1q9Nme14JUd-4Q3jZ65aeQ5X4uttptssVDgBVHYo8,152
|
35
35
|
spacr/resources/MEDIAR/LICENSE,sha256=yEj_TRDLUfDpHDNM0StALXIt6mLqSgaV2hcCwa6_TcY,1065
|
@@ -152,9 +152,9 @@ spacr/resources/icons/umap.png,sha256=dOLF3DeLYy9k0nkUybiZMe1wzHQwLJFRmgccppw-8b
|
|
152
152
|
spacr/resources/images/plate1_E01_T0001F001L01A01Z01C02.tif,sha256=Tl0ZUfZ_AYAbu0up_nO0tPRtF1BxXhWQ3T3pURBCCRo,7958528
|
153
153
|
spacr/resources/images/plate1_E01_T0001F001L01A02Z01C01.tif,sha256=m8N-V71rA1TT4dFlENNg8s0Q0YEXXs8slIn7yObmZJQ,7958528
|
154
154
|
spacr/resources/images/plate1_E01_T0001F001L01A03Z01C03.tif,sha256=Pbhk7xn-KUP6RSIhJsxQcrHFImBm3GEpLkzx7WOc-5M,7958528
|
155
|
-
spacr-0.3.
|
156
|
-
spacr-0.3.
|
157
|
-
spacr-0.3.
|
158
|
-
spacr-0.3.
|
159
|
-
spacr-0.3.
|
160
|
-
spacr-0.3.
|
155
|
+
spacr-0.3.71.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
|
156
|
+
spacr-0.3.71.dist-info/METADATA,sha256=2qa3TRT-ux_8Gw57GLmzvPJWctnRhfy9H4crTMaqT6Y,6032
|
157
|
+
spacr-0.3.71.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
|
158
|
+
spacr-0.3.71.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
|
159
|
+
spacr-0.3.71.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
|
160
|
+
spacr-0.3.71.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|