spacr 0.3.80__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/utils.py CHANGED
@@ -1372,40 +1372,6 @@ def annotate_conditions(df, cells=None, cell_loc=None, pathogens=None, pathogen_
1372
1372
 
1373
1373
  return df
1374
1374
 
1375
- def _split_data_v1(df, group_by, object_type):
1376
- """
1377
- Splits the input dataframe into numeric and non-numeric parts, groups them by the specified column,
1378
- and returns the grouped dataframes.
1379
-
1380
- Parameters:
1381
- df (pandas.DataFrame): The input dataframe.
1382
- group_by (str): The column name to group the dataframes by.
1383
- object_type (str): The column name to concatenate with 'prcf' to create a new column 'prcfo'.
1384
-
1385
- Returns:
1386
- grouped_numeric (pandas.DataFrame): The grouped dataframe containing numeric columns.
1387
- grouped_non_numeric (pandas.DataFrame): The grouped dataframe containing non-numeric columns.
1388
- """
1389
-
1390
- if 'prcf' not in df.columns:
1391
- try:
1392
- df['prcf'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column_name'].astype(str) + '_' + df['field'].astype(str)
1393
- except Exception as e:
1394
- print(e)
1395
-
1396
- df['prcfo'] = df['prcf'] + '_' + df[object_type]
1397
- df = df.set_index(group_by, inplace=False)
1398
-
1399
- df_numeric = df.select_dtypes(include=np.number)
1400
- df_non_numeric = df.select_dtypes(exclude=np.number)
1401
-
1402
- []
1403
-
1404
- grouped_numeric = df_numeric.groupby(df_numeric.index).mean()
1405
- grouped_non_numeric = df_non_numeric.groupby(df_non_numeric.index).first()
1406
-
1407
- return pd.DataFrame(grouped_numeric), pd.DataFrame(grouped_non_numeric)
1408
-
1409
1375
  def _split_data(df, group_by, object_type):
1410
1376
  """
1411
1377
  Splits the input dataframe into numeric and non-numeric parts, groups them by the specified column,
@@ -5045,19 +5011,22 @@ def generate_cytoplasm_mask(nucleus_mask, cell_mask):
5045
5011
  return cytoplasm_mask
5046
5012
 
5047
5013
  def add_column_to_database(settings):
5048
- """
5049
- Adds a new column to the database table by matching on a common column from the DataFrame.
5050
- If the column already exists in the database, it adds the column with a suffix.
5051
- NaN values will remain as NULL in the database.
5052
-
5053
- Parameters:
5054
- - settings: A dictionary containing the following keys:
5055
- - 'csv_path': Path to the CSV file with the data to be added.
5056
- - 'db_path': Path to the SQLite database (or connection string for other databases).
5057
- - 'table_name': The name of the table in the database.
5058
- - 'update_column': The name of the new column in the DataFrame to add to the database.
5059
- - 'match_column': The common column used to match rows.
5060
- """
5014
+ #"""
5015
+ #Adds a new column to the database table by matching on a common column from the DataFrame.
5016
+ #If the column already exists in the database, it adds the column with a suffix.
5017
+ #NaN values will remain as NULL in the database.
5018
+
5019
+ #Parameters:
5020
+ # settings (dict): A dictionary containing the following keys:
5021
+ # csv_path (str): Path to the CSV file with the data to be added.
5022
+ # db_path (str): Path to the SQLite database (or connection string for other databases).
5023
+ # table_name (str): The name of the table in the database.
5024
+ # update_column (str): The name of the new column in the DataFrame to add to the database.
5025
+ # match_column (str): The common column used to match rows.
5026
+
5027
+ #Returns:
5028
+ # None
5029
+ #"""
5061
5030
 
5062
5031
  # Read the DataFrame from the provided CSV path
5063
5032
  df = pd.read_csv(settings['csv_path'])
@@ -5147,8 +5116,8 @@ def correct_metadata_column_names(df):
5147
5116
  df = df.rename(columns={'plate_name': 'plate'})
5148
5117
  if 'column_name' in df.columns:
5149
5118
  df = df.rename(columns={'column_name': 'column'})
5150
- if 'column_name' in df.columns:
5151
- df = df.rename(columns={'column_name': 'column'})
5119
+ if 'col' in df.columns:
5120
+ df = df.rename(columns={'col': 'column'})
5152
5121
  if 'row_name' in df.columns:
5153
5122
  df = df.rename(columns={'row_name': 'row_name'})
5154
5123
  if 'grna_name' in df.columns:
@@ -5242,3 +5211,129 @@ def group_feature_class(df, feature_groups=['cell', 'cytoplasm', 'nucleus', 'pat
5242
5211
  , ignore_index=True)
5243
5212
 
5244
5213
  return df
5214
+
5215
+ def delete_intermedeate_files(settings):
5216
+
5217
+ path_orig = os.path.join(settings['src'], 'orig')
5218
+ path_stack = os.path.join(settings['src'], 'stack')
5219
+ merged_stack = os.path.join(settings['src'], 'merged')
5220
+ path_norm_chan_stack = os.path.join(settings['src'], 'norm_channel_stack')
5221
+ path_1 = os.path.join(settings['src'], '1')
5222
+ path_2 = os.path.join(settings['src'], '2')
5223
+ path_3 = os.path.join(settings['src'], '3')
5224
+ path_4 = os.path.join(settings['src'], '4')
5225
+ path_5 = os.path.join(settings['src'], '5')
5226
+ path_6 = os.path.join(settings['src'], '6')
5227
+ path_7 = os.path.join(settings['src'], '7')
5228
+ path_8 = os.path.join(settings['src'], '8')
5229
+ path_9 = os.path.join(settings['src'], '9')
5230
+ path_10 = os.path.join(settings['src'], '10')
5231
+
5232
+ paths = [path_stack, path_norm_chan_stack, path_1, path_2, path_3, path_4, path_5, path_6, path_7, path_8, path_9, path_10]
5233
+
5234
+ merged_len = len(merged_stack)
5235
+ stack_len = len(path_stack)
5236
+
5237
+ if merged_len == stack_len and stack_len != 0:
5238
+ if 'src' in settings:
5239
+ if os.path.exists(settings['src']):
5240
+ if os.path.exists(path_orig):
5241
+ for path in paths:
5242
+ if os.path.exists(path):
5243
+ try:
5244
+ shutil.rmtree(path)
5245
+ print(f"Deleted {path}")
5246
+ except OSError as e:
5247
+ print(f"{path} could not be deleted: {e}. Delete manually.")
5248
+ else:
5249
+ print(f"{path_orig} does not exist.")
5250
+ else:
5251
+ print(f"{settings['src']} does not exist.")
5252
+ else:
5253
+ print("No 'src' key in settings dictionary.")
5254
+
5255
+ def filter_and_save_csv(input_csv, output_csv, column_name, upper_threshold, lower_threshold):
5256
+ """
5257
+ Reads a CSV into a DataFrame, filters rows based on a column for values > upper_threshold and < lower_threshold,
5258
+ and saves the filtered DataFrame to a new CSV file.
5259
+
5260
+ Parameters:
5261
+ input_csv (str): Path to the input CSV file.
5262
+ output_csv (str): Path to save the filtered CSV file.
5263
+ column_name (str): Column name to apply the filters on.
5264
+ upper_threshold (float): Upper threshold for filtering (values greater than this are retained).
5265
+ lower_threshold (float): Lower threshold for filtering (values less than this are retained).
5266
+
5267
+ Returns:
5268
+ None
5269
+ """
5270
+ # Read the input CSV file into a DataFrame
5271
+ df = pd.read_csv(input_csv)
5272
+
5273
+ # Filter rows based on the thresholds
5274
+ filtered_df = df[(df[column_name] > upper_threshold) | (df[column_name] < lower_threshold)]
5275
+
5276
+ # Save the filtered DataFrame to a new CSV file
5277
+ filtered_df.to_csv(output_csv, index=False)
5278
+ display(filtered_df)
5279
+
5280
+ print(f"Filtered DataFrame saved to {output_csv}")
5281
+
5282
+ def extract_tar_bz2_files(folder_path):
5283
+ """
5284
+ Extracts all .tar.bz2 files in the given folder into subfolders with the same name as the tar file.
5285
+
5286
+ Parameters:
5287
+ folder_path (str): Path to the folder containing .tar.bz2 files.
5288
+ """
5289
+ if not os.path.isdir(folder_path):
5290
+ raise ValueError(f"The provided path '{folder_path}' is not a valid folder.")
5291
+
5292
+ # Iterate over files in the folder
5293
+ for file_name in os.listdir(folder_path):
5294
+ if file_name.endswith('.tar.bz2'):
5295
+ file_path = os.path.join(folder_path, file_name)
5296
+ extract_folder = os.path.join(folder_path, os.path.splitext(os.path.splitext(file_name)[0])[0])
5297
+
5298
+ # Create the subfolder for extraction if it doesn't exist
5299
+ os.makedirs(extract_folder, exist_ok=True)
5300
+
5301
+ # Extract the tar.bz2 file
5302
+ try:
5303
+ with tarfile.open(file_path, 'r:bz2') as tar:
5304
+ tar.extractall(path=extract_folder)
5305
+ print(f"Extracted: {file_name} -> {extract_folder}")
5306
+ except Exception as e:
5307
+ print(f"Failed to extract {file_name}: {e}")
5308
+
5309
+
5310
+ def calculate_shortest_distance(df, object1, object2):
5311
+ """
5312
+ Calculate the shortest edge-to-edge distance between two objects (e.g., pathogen and nucleus).
5313
+
5314
+ Parameters:
5315
+ - df: Pandas DataFrame containing measurements
5316
+ - object1: String, name of the first object (e.g., "pathogen")
5317
+ - object2: String, name of the second object (e.g., "nucleus")
5318
+
5319
+ Returns:
5320
+ - df: Pandas DataFrame with a new column for shortest edge-to-edge distance.
5321
+ """
5322
+
5323
+ # Compute centroid-to-centroid Euclidean distance
5324
+ centroid_distance = np.sqrt(
5325
+ (df[f'{object1}_channel_0_centroid_weighted-0'] - df[f'{object2}_channel_0_centroid_weighted-0'])**2 +
5326
+ (df[f'{object1}_channel_0_centroid_weighted-1'] - df[f'{object2}_channel_0_centroid_weighted-1'])**2
5327
+ )
5328
+
5329
+ # Estimate object radii using Feret diameters
5330
+ object1_radius = df[f'{object1}_feret_diameter_max'] / 2
5331
+ object2_radius = df[f'{object2}_feret_diameter_max'] / 2
5332
+
5333
+ # Compute shortest edge-to-edge distance
5334
+ shortest_distance = centroid_distance - (object1_radius + object2_radius)
5335
+
5336
+ # Ensure distances are non-negative (overlapping objects should have distance 0)
5337
+ df[f'{object1}_{object2}_shortest_distance'] = np.maximum(shortest_distance, 0)
5338
+
5339
+ return df
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spacr
3
- Version: 0.3.80
3
+ Version: 0.4.0
4
4
  Summary: Spatial phenotype analysis of crisp screens (SpaCr)
5
5
  Home-page: https://github.com/EinarOlafsson/spacr
6
6
  Author: Einar Birnir Olafsson
@@ -32,6 +32,7 @@ Requires-Dist: monai>=1.3.0
32
32
  Requires-Dist: captum<1.0,>=0.7.0
33
33
  Requires-Dist: seaborn<1.0,>=0.13.2
34
34
  Requires-Dist: matplotlib<4.0,>=3.8.3
35
+ Requires-Dist: matplotlib-venn<2.0,>=1.1
35
36
  Requires-Dist: adjustText<2.0,>=1.2.0
36
37
  Requires-Dist: bottleneck<2.0,>=1.3.6
37
38
  Requires-Dist: numexpr<3.0,>=2.8.4
@@ -1,4 +1,4 @@
1
- spacr/__init__.py,sha256=fvk5JfLpOqUA1W0yPcsVZnS9qbpXFOceFk09LKolVfw,1627
1
+ spacr/__init__.py,sha256=KgkkUyqbm4kh8bwxWeFpp4rilNE0y0RkeylPvS-cTLY,1395
2
2
  spacr/__main__.py,sha256=bkAJJD2kjIqOP-u1kLvct9jQQCeUXzlEjdgitwi1Lm8,75
3
3
  spacr/app_annotate.py,sha256=W9eLPa_LZIvXsXx_-0iDFEU938LBDvRy6prXo0qF4KQ,2533
4
4
  spacr/app_classify.py,sha256=urTP_wlZ58hSyM5a19slYlBxN0PdC-9-ga0hvq8CGWc,165
@@ -9,27 +9,27 @@ spacr/app_sequencing.py,sha256=DjG26jy4cpddnV8WOOAIiExtOe9MleVMY4MFa5uTo5w,157
9
9
  spacr/app_umap.py,sha256=ZWAmf_OsIKbYvolYuWPMYhdlVe-n2CADoJulAizMiEo,153
10
10
  spacr/cellpose.py,sha256=RBHMs2vwXcfkj0xqAULpALyzJYXddSRycgZSzmwI7v0,14755
11
11
  spacr/chat_bot.py,sha256=n3Fhqg3qofVXHmh3H9sUcmfYy9MmgRnr48663MVdY9E,1244
12
- spacr/core.py,sha256=3u2qKmPmTlswvE1uKTF4gi7KQ3sJBHV9No_ysgk7JCU,48487
13
- spacr/deep_spacr.py,sha256=V3diLyxX-0_F5UxhX_b94ROOvL9eoLvnoUmF3nMBqPQ,43250
12
+ spacr/core.py,sha256=lKeqmsVrGQ8cPU_WkoNGNBWrk-gtR1RkRkwDdnJ0u64,48829
13
+ spacr/deep_spacr.py,sha256=AsAbehxPChAq65YVPuFqCt5JabdeO8AwVWZmcmyLeFw,58020
14
14
  spacr/gui.py,sha256=ARyn9Q_g8HoP-cXh1nzMLVFCKqthY4v2u9yORyaQqQE,8230
15
- spacr/gui_core.py,sha256=6NKv8ebqC9Zuior4f2-L1By_Pjtt-RPCrEgnRuE9P54,45576
16
- spacr/gui_elements.py,sha256=I_eSYF1RkAG0zsa-ZiQT0EaaVvUpucULCuWCowO6t4E,138248
17
- spacr/gui_utils.py,sha256=u9RoIOWpAXFEOnUlLpMQZrc1pWSg6omZsJMIhJdRv_g,41211
18
- spacr/io.py,sha256=LF6lpphw7GSeuoHQijPykjKNF56wNTFEWFZuDQp3O6Q,145739
15
+ spacr/gui_core.py,sha256=U0A7waKgWq_Es9fMwcZbXUZYGzCqt2bgfY3HbxiFXnw,47466
16
+ spacr/gui_elements.py,sha256=HmITDncklKwtdFhxLhtYXOwndsRfgwWIPVi83VlXHB4,146419
17
+ spacr/gui_utils.py,sha256=0rDF23BUGcmjSJvfCiLoxhlGJdHkio1jTxyCzrMXr-g,41211
18
+ spacr/io.py,sha256=oqJwDJWksVdWE0bRAwytTOsjlL0o-J9lr_pQaw2cQ4Y,138288
19
19
  spacr/logger.py,sha256=lJhTqt-_wfAunCPl93xE65Wr9Y1oIHJWaZMjunHUeIw,1538
20
- spacr/measure.py,sha256=2lK-ZcTxLM-MpXV1oZnucRD9iz5aprwahRKw9IEqshg,55085
20
+ spacr/measure.py,sha256=jmOnLBudq3TuY723Cfo1EJBn67P6rlEvL6I-2FSkUgI,55315
21
21
  spacr/mediar.py,sha256=FwLvbLQW5LQzPgvJZG8Lw7GniA2vbZx6Jv6vIKu7I5c,14743
22
- spacr/ml.py,sha256=x19S8OsR5omb8e6MU9I99Nz95J_QvM5siyk-zaAU3p8,82866
22
+ spacr/ml.py,sha256=MrIAtUUxMOibWVL1SjCUnYlizawCp3l3SeY4Y9yEsPw,97251
23
23
  spacr/openai.py,sha256=5vBZ3Jl2llYcW3oaTEXgdyCB2aJujMUIO5K038z7w_A,1246
24
- spacr/plot.py,sha256=gXC7y3uT4sx8KRODeSFWQG_A1CylsuJ5B7HYe_un6so,165177
24
+ spacr/plot.py,sha256=p4AY5EWmVNPMqFD0I4NARJkvJA5gTErbDDYqDMU18pc,169479
25
25
  spacr/sequencing.py,sha256=ClUfwPPK6rNUbUuiEkzcwakzVyDKKUMv9ricrxT8qQY,25227
26
- spacr/settings.py,sha256=xTFTD04H8uXRJ5m4Pnr4Znhx0f_FxdgStMPXol3apxM,80888
26
+ spacr/settings.py,sha256=e6QWZ5o6Im02_t-3GQh3H4kksMTQmIZ1Rbh3BeQmmsw,84000
27
27
  spacr/sim.py,sha256=1xKhXimNU3ukzIw-3l9cF3Znc_brW8h20yv8fSTzvss,71173
28
28
  spacr/stats.py,sha256=mbhwsyIqt5upsSD346qGjdCw7CFBa0tIS7zHU9e0jNI,9536
29
- spacr/submodules.py,sha256=SK8YEs850LAx30YAiwap7ecLpp1_p-bci6H-Or0GLoA,55500
29
+ spacr/submodules.py,sha256=e_JNMGBHakEra_2pstHFmgI1NhF9TybfvTNDAegVsl0,67626
30
30
  spacr/timelapse.py,sha256=KGfG4L4-QnFfgbF7L6C5wL_3gd_rqr05Foje6RsoTBg,39603
31
- spacr/toxo.py,sha256=z2nT5aAze3NUIlwnBQcnkARihDwoPfqOgQIVoUluyK0,25087
32
- spacr/utils.py,sha256=SiUcctyUETEX_GZ-Nflba5whZiEjJynncaH-xcZPK1k,222242
31
+ spacr/toxo.py,sha256=TmuhejSIPLBvsgeblsUgSvBFCR1gOkApyTKidooJ5Us,26044
32
+ spacr/utils.py,sha256=m4MZiNbmQpZD78eu5DNmxk4cU_tk2VeVLCe_R_7YShY,226287
33
33
  spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
34
34
  spacr/resources/MEDIAR/.gitignore,sha256=Ff1q9Nme14JUd-4Q3jZ65aeQ5X4uttptssVDgBVHYo8,152
35
35
  spacr/resources/MEDIAR/LICENSE,sha256=yEj_TRDLUfDpHDNM0StALXIt6mLqSgaV2hcCwa6_TcY,1065
@@ -152,9 +152,9 @@ spacr/resources/icons/umap.png,sha256=dOLF3DeLYy9k0nkUybiZMe1wzHQwLJFRmgccppw-8b
152
152
  spacr/resources/images/plate1_E01_T0001F001L01A01Z01C02.tif,sha256=Tl0ZUfZ_AYAbu0up_nO0tPRtF1BxXhWQ3T3pURBCCRo,7958528
153
153
  spacr/resources/images/plate1_E01_T0001F001L01A02Z01C01.tif,sha256=m8N-V71rA1TT4dFlENNg8s0Q0YEXXs8slIn7yObmZJQ,7958528
154
154
  spacr/resources/images/plate1_E01_T0001F001L01A03Z01C03.tif,sha256=Pbhk7xn-KUP6RSIhJsxQcrHFImBm3GEpLkzx7WOc-5M,7958528
155
- spacr-0.3.80.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
156
- spacr-0.3.80.dist-info/METADATA,sha256=Q0YV4N-C8XyUHH8HFW_k9ryAftcU8v9oMxNhgzvU8cA,6032
157
- spacr-0.3.80.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
158
- spacr-0.3.80.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
159
- spacr-0.3.80.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
160
- spacr-0.3.80.dist-info/RECORD,,
155
+ spacr-0.4.0.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
156
+ spacr-0.4.0.dist-info/METADATA,sha256=uloYFy8DrWtHZvy5W47jBRNUEF_SeX4nLZ0OL2wMizc,6072
157
+ spacr-0.4.0.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
158
+ spacr-0.4.0.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
159
+ spacr-0.4.0.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
160
+ spacr-0.4.0.dist-info/RECORD,,
File without changes