PyPI - spacr - Versions diffs - 0.3.70__py3-none-any.whl → 0.3.71__py3-none-any.whl - Mend

spacr 0.3.70py3-none-any.whl → 0.3.71py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

spacr/io.py CHANGED Viewed

@@ -2854,6 +2854,12 @@ def generate_loaders(src, mode='train', image_size=224, batch_size=32, classes=[
     else:
         print(f'mode:{mode} is not valid, use mode = train or test')
         return
+    class_1_path = os.path.join(data_dir, classes[0])
+    class_2_path = os.path.join(data_dir, classes[1])
+    if not os.path.exists(class_1_path) or not os.path.exists(class_2_path):
+        print(f'One or more classes not found in {data_dir}')
+        print (f'Possible class names are {os.listdir(data_dir)}')
     data = spacrDataset(data_dir, classes, transform=transform, shuffle=shuffle, pin_memory=pin_memory)
     num_workers = n_jobs if n_jobs is not None else 0
@@ -2922,7 +2928,8 @@ def generate_training_dataset(settings):
     # Function to get the smallest class size based on the dataset mode
     def get_smallest_class_size(df, settings, dataset_mode):
         if dataset_mode == 'metadata':
-            sizes = [len(df[df['metadata_based_class'] == c]) for c in settings['classes']]
+            sizes = [len(df[df['condition'] == c]) for c in settings['class_metadata']]
+            print(f'Class sizes: {sizes}')
         elif dataset_mode == 'annotation':
             sizes = [len(class_paths) for class_paths in df]
         size = min(sizes)
@@ -2977,15 +2984,29 @@ def generate_training_dataset(settings):
     def metadata_based_selection(db_path, settings):
         class_paths_ls = []
         df = filter_png_list(db_path, settings, tables=settings['tables'])
+        df = annotate_conditions(df,
+                                 cells=None,
+                                 cell_loc=None,
+                                 pathogens=settings['metadata_item_1_name'],
+                                 pathogen_loc=settings['metadata_item_1_value'],
+                                 treatments=settings['metadata_item_2_name'],
+                                 treatment_loc=settings['metadata_item_2_value'])
-        df['metadata_based_class'] = pd.NA
-        for i, class_ in enumerate(settings['classes']):
-            ls = settings['class_metadata'][i]
-            df.loc[df[settings['metadata_type_by']].isin(ls), 'metadata_based_class'] = class_
+        #if settings['metadata_type_by'] == 'condition':
+        df = df.dropna(subset=['condition'])
+        display(df)
+        #df['metadata_based_class'] = pd.NA
+        #for i, class_ in enumerate(settings['classes']):
+        #    ls = settings['class_metadata'][i]
+        #    df.loc[df[settings['metadata_type_by']].isin(ls), 'metadata_based_class'] = class_
         size = get_smallest_class_size(df, settings, 'metadata')
-        for class_ in settings['classes']:
-            class_temp_df = df[df['metadata_based_class'] == class_]
+        for class_ in settings['class_metadata']:
+            class_temp_df = df[df['condition'] == class_]
             print(f'Found {len(class_temp_df)} images for class {class_}')
             class_paths_temp = class_temp_df['png_path'].tolist()
@@ -3001,10 +3022,11 @@ def generate_training_dataset(settings):
     def annotation_based_selection(db_path, dst, settings):
         class_paths_ls = training_dataset_from_annotation(db_path, dst, settings['annotation_column'], annotated_classes=settings['annotated_classes'])
-        #size = get_smallest_class_size(class_paths_ls, settings, 'annotation')
-        #for i, class_paths in enumerate(class_paths_ls):
-        #    if len(class_paths) > size:
-        #        class_paths_ls[i] = random.sample(class_paths, size)
+        return class_paths_ls
+    # Metadata-Annotation-based selection logic
+    def metadata_annotation_based_selection(db_path, dst, settings):
+        class_paths_ls = training_dataset_from_annotation_metadata(db_path, dst, settings['annotation_column'], annotated_classes=settings['annotated_classes'], metadata_type_by=settings['metadata_type_by'], class_metadata=settings['class_metadata'])
         return class_paths_ls
@@ -3053,6 +3075,14 @@ def generate_training_dataset(settings):
         elif settings['dataset_mode'] == 'measurement':
             class_paths_ls = measurement_based_selection(settings, db_path, tables=settings['tables'])
+        elif settings['dataset_mode'] == 'metadata_annotation':
+            class_paths_ls = metadata_annotation_based_selection(db_path, dst, settings)
+        else:
+            print(f"Invalid dataset mode: {settings['dataset_mode']}")
+            print(f"Valid options are: 'annotation', 'metadata', 'measurement', 'metadata_annotation'")
+            return
         if class_path_list is None:
             class_path_list = [[] for _ in range(len(class_paths_ls))]
@@ -3063,7 +3093,7 @@ def generate_training_dataset(settings):
     # Generate and return training and testing directories
     print('class_path_list',len(class_path_list))
-    train_class_dir, test_class_dir = generate_dataset_from_lists(dst, class_data=class_path_list, classes=settings['classes'], test_split=settings['test_split'])
+    train_class_dir, test_class_dir = generate_dataset_from_lists(dst, class_data=class_path_list, classes=settings['class_metadata'], test_split=settings['test_split'])
     return train_class_dir, test_class_dir
@@ -3104,11 +3134,16 @@ def training_dataset_from_annotation(db_path, dst, annotation_column='test', ann
         alt_class_paths = [path for path, annotation in all_paths if annotation != target_class]
         print('Alternative paths available:', len(alt_class_paths))
-        # Randomly sample an equal number of images for the second class
-        sampled_alt_class_paths = random.sample(alt_class_paths, min(count_target_class, len(alt_class_paths)))
-        print(f'Sampled {len(sampled_alt_class_paths)} alternative images for balancing')
+        # Sample the same number of images for both classes
+        balanced_count = min(count_target_class, len(alt_class_paths))
+        print(f'Sampling {balanced_count} images for each class')
+        # Resample target class to match the smaller size
+        sampled_target_class_paths = random.sample(class_paths[0], balanced_count)
+        sampled_alt_class_paths = random.sample(alt_class_paths, balanced_count)
-        # Append this list as the second class
+        # Update class paths
+        class_paths[0] = sampled_target_class_paths
         class_paths.append(sampled_alt_class_paths)
     print(f'Generated a list of lists from annotation of {len(class_paths)} classes')
@@ -3117,7 +3152,7 @@ def training_dataset_from_annotation(db_path, dst, annotation_column='test', ann
     return class_paths
-def training_dataset_from_annotation_v2(db_path, dst, annotation_column='test', annotated_classes=(1, 2)):
+def training_dataset_from_annotation_metadata(db_path, dst, annotation_column='test', annotated_classes=(1, 2), metadata_type_by='column_name', class_metadata=['c1','c2']):
     all_paths = []
     # Connect to the database and retrieve the image paths and annotations
@@ -3125,7 +3160,7 @@ def training_dataset_from_annotation_v2(db_path, dst, annotation_column='test',
     with sqlite3.connect(db_path) as conn:
         cursor = conn.cursor()
         # Retrieve all paths and annotations from the database
-        query = f"SELECT png_path, {annotation_column} FROM png_list"
+        query = f"SELECT png_path, {annotation_column}, row_name, column_name FROM png_list"
         cursor.execute(query)
         while True:
@@ -3137,6 +3172,20 @@ def training_dataset_from_annotation_v2(db_path, dst, annotation_column='test',
     print('Total paths retrieved:', len(all_paths))
+    # Filter all_paths by metadata_type_by and class_metadata
+    filtered_paths = []
+    metadata_index = {'row_name': 2, 'column_name': 3}.get(metadata_type_by, None)
+    if metadata_index is None:
+        raise ValueError(f"Invalid metadata_type_by value: {metadata_type_by}. Must be 'row_name' or 'column_name'. {class_metadata} must be a list formatted as ['c1', 'c2'] or ['r1', 'r2']")
+    for row in all_paths:
+        if row[metadata_index] in class_metadata:
+            filtered_paths.append(row)
+    print('Total filtered paths:', len(filtered_paths))
+    #all_paths = filtered_paths
+    all_paths = [(row[0], row[1]) for row in filtered_paths]
     # Filter paths based on annotated_classes
     class_paths = []
     for class_ in annotated_classes:
@@ -3154,11 +3203,16 @@ def training_dataset_from_annotation_v2(db_path, dst, annotation_column='test',
         alt_class_paths = [path for path, annotation in all_paths if annotation != target_class]
         print('Alternative paths available:', len(alt_class_paths))
-        # Randomly sample an equal number of images for the second class
-        sampled_alt_class_paths = random.sample(alt_class_paths, min(count_target_class, len(alt_class_paths)))
-        print(f'Sampled {len(sampled_alt_class_paths)} alternative images for balancing')
+        # Sample the same number of images for both classes
+        balanced_count = min(count_target_class, len(alt_class_paths))
+        print(f'Sampling {balanced_count} images for each class')
+        # Resample target class to match the smaller size
+        sampled_target_class_paths = random.sample(class_paths[0], balanced_count)
+        sampled_alt_class_paths = random.sample(alt_class_paths, balanced_count)
-        # Append this list as the second class
+        # Update class paths
+        class_paths[0] = sampled_target_class_paths
         class_paths.append(sampled_alt_class_paths)
     print(f'Generated a list of lists from annotation of {len(class_paths)} classes')

spacr/settings.py CHANGED Viewed

@@ -338,7 +338,11 @@ def set_generate_training_dataset_defaults(settings):
     settings.setdefault('dataset_mode','metadata')
     settings.setdefault('annotation_column','test')
     settings.setdefault('annotated_classes',[1,2])
-    settings.setdefault('classes',['nc','pc'])
+    settings.setdefault('class_metadata',['nc','pc'])
+    settings.setdefault('metadata_item_1_name',['nc','pc'])
+    settings.setdefault('metadata_item_1_value',[['c19','c2'],['c3','c4']])
+    settings.setdefault('metadata_item_2_name',['sample1','sample2'])
+    settings.setdefault('metadata_item_2_value',[['r1','r2'],['r3','r4']])
     settings.setdefault('size',224)
     settings.setdefault('test_split',0.1)
     settings.setdefault('class_metadata',[['c1'],['c2']])

spacr/utils.py CHANGED Viewed

@@ -1368,6 +1368,7 @@ def annotate_conditions(df, cells=None, cell_loc=None, pathogens=None, pathogen_
         lambda x: '_'.join([str(v) for v in [x.get('host_cells'), x.get('pathogen'), x.get('treatment')] if pd.notna(v)]),
         axis=1
     )
+    df.loc[df['condition'] == '', 'condition'] = pd.NA
     return df

{spacr-0.3.70.dist-info → spacr-0.3.71.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: spacr
-Version: 0.3.70
+Version: 0.3.71
 Summary: Spatial phenotype analysis of crisp screens (SpaCr)
 Home-page: https://github.com/EinarOlafsson/spacr
 Author: Einar Birnir Olafsson

{spacr-0.3.70.dist-info → spacr-0.3.71.dist-info}/RECORD RENAMED Viewed

@@ -15,7 +15,7 @@ spacr/gui.py,sha256=ARyn9Q_g8HoP-cXh1nzMLVFCKqthY4v2u9yORyaQqQE,8230
 spacr/gui_core.py,sha256=N7R7yvfK_dJhOReM_kW3Ci8Bokhi1OzsxeKqvSGdvV4,41460
 spacr/gui_elements.py,sha256=EKlvEg_4_je7jciEdR3NTgPrcTraowa2e2RUt-xqd6M,138254
 spacr/gui_utils.py,sha256=u9RoIOWpAXFEOnUlLpMQZrc1pWSg6omZsJMIhJdRv_g,41211
-spacr/io.py,sha256=ActzerMS0NC1-MIffGTFBdKcqL1T72d3VjfieTta3O4,143101
+spacr/io.py,sha256=LF6lpphw7GSeuoHQijPykjKNF56wNTFEWFZuDQp3O6Q,145739
 spacr/logger.py,sha256=lJhTqt-_wfAunCPl93xE65Wr9Y1oIHJWaZMjunHUeIw,1538
 spacr/measure.py,sha256=2lK-ZcTxLM-MpXV1oZnucRD9iz5aprwahRKw9IEqshg,55085
 spacr/mediar.py,sha256=FwLvbLQW5LQzPgvJZG8Lw7GniA2vbZx6Jv6vIKu7I5c,14743
@@ -23,13 +23,13 @@ spacr/ml.py,sha256=h0IrXoNnyNzZLPYbtZPFI6c4Qeu1gH8R3iUz_O7-ar0,78114
 spacr/openai.py,sha256=5vBZ3Jl2llYcW3oaTEXgdyCB2aJujMUIO5K038z7w_A,1246
 spacr/plot.py,sha256=gXC7y3uT4sx8KRODeSFWQG_A1CylsuJ5B7HYe_un6so,165177
 spacr/sequencing.py,sha256=ClUfwPPK6rNUbUuiEkzcwakzVyDKKUMv9ricrxT8qQY,25227
-spacr/settings.py,sha256=wZcqdTWaRus27wn9P0EGyftcJn_i0IwlM9pyeCVqxr8,80173
+spacr/settings.py,sha256=14PFxw3YK9tUqbaC6BqfbrWk3sN7gyTZAAI8KNy5KBA,80461
 spacr/sim.py,sha256=1xKhXimNU3ukzIw-3l9cF3Znc_brW8h20yv8fSTzvss,71173
 spacr/stats.py,sha256=mbhwsyIqt5upsSD346qGjdCw7CFBa0tIS7zHU9e0jNI,9536
 spacr/submodules.py,sha256=SK8YEs850LAx30YAiwap7ecLpp1_p-bci6H-Or0GLoA,55500
 spacr/timelapse.py,sha256=KGfG4L4-QnFfgbF7L6C5wL_3gd_rqr05Foje6RsoTBg,39603
 spacr/toxo.py,sha256=z2nT5aAze3NUIlwnBQcnkARihDwoPfqOgQIVoUluyK0,25087
-spacr/utils.py,sha256=zojZlZtGwwDVDY0fgRt5XViVuJLuxadRO1IYctWm_SQ,221885
+spacr/utils.py,sha256=LX2Hu6QC-yG9ZVBiM2dkSN9yytCB0eTTRGfExiZzYzE,221940
 spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
 spacr/resources/MEDIAR/.gitignore,sha256=Ff1q9Nme14JUd-4Q3jZ65aeQ5X4uttptssVDgBVHYo8,152
 spacr/resources/MEDIAR/LICENSE,sha256=yEj_TRDLUfDpHDNM0StALXIt6mLqSgaV2hcCwa6_TcY,1065
@@ -152,9 +152,9 @@ spacr/resources/icons/umap.png,sha256=dOLF3DeLYy9k0nkUybiZMe1wzHQwLJFRmgccppw-8b
 spacr/resources/images/plate1_E01_T0001F001L01A01Z01C02.tif,sha256=Tl0ZUfZ_AYAbu0up_nO0tPRtF1BxXhWQ3T3pURBCCRo,7958528
 spacr/resources/images/plate1_E01_T0001F001L01A02Z01C01.tif,sha256=m8N-V71rA1TT4dFlENNg8s0Q0YEXXs8slIn7yObmZJQ,7958528
 spacr/resources/images/plate1_E01_T0001F001L01A03Z01C03.tif,sha256=Pbhk7xn-KUP6RSIhJsxQcrHFImBm3GEpLkzx7WOc-5M,7958528
-spacr-0.3.70.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
-spacr-0.3.70.dist-info/METADATA,sha256=152VlHisIA_E2F9NYFd_pqDqVxqpGZ07qUDzb7BTnPc,6032
-spacr-0.3.70.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
-spacr-0.3.70.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
-spacr-0.3.70.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
-spacr-0.3.70.dist-info/RECORD,,
+spacr-0.3.71.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
+spacr-0.3.71.dist-info/METADATA,sha256=2qa3TRT-ux_8Gw57GLmzvPJWctnRhfy9H4crTMaqT6Y,6032
+spacr-0.3.71.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
+spacr-0.3.71.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
+spacr-0.3.71.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
+spacr-0.3.71.dist-info/RECORD,,

{spacr-0.3.70.dist-info → spacr-0.3.71.dist-info}/LICENSE RENAMED Viewed

File without changes

{spacr-0.3.70.dist-info → spacr-0.3.71.dist-info}/WHEEL RENAMED Viewed

File without changes

{spacr-0.3.70.dist-info → spacr-0.3.71.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{spacr-0.3.70.dist-info → spacr-0.3.71.dist-info}/top_level.txt RENAMED Viewed

File without changes

spacr 0.3.70__py3-none-any.whl → 0.3.71__py3-none-any.whl

spacr 0.3.70py3-none-any.whl → 0.3.71py3-none-any.whl