spacr 0.0.80__py3-none-any.whl → 0.0.82__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/core.py CHANGED
@@ -77,7 +77,7 @@ def analyze_plaques(folder):
77
77
  def train_cellpose(settings):
78
78
 
79
79
  from .io import _load_normalized_images_and_labels, _load_images_and_labels
80
- from .utils import resize_images_and_labels
80
+ #from .utils import resize_images_and_labels
81
81
 
82
82
  img_src = settings['img_src']
83
83
  mask_src = os.path.join(img_src, 'masks')
@@ -99,7 +99,6 @@ def train_cellpose(settings):
99
99
  Signal_to_noise = settings.setdefault( 'Signal_to_noise', 10)
100
100
  verbose = settings.setdefault( 'verbose', False)
101
101
 
102
-
103
102
  channels = settings.setdefault( 'channels', [0,0])
104
103
  normalize = settings.setdefault( 'normalize', True)
105
104
  percentiles = settings.setdefault( 'percentiles', None)
@@ -119,7 +118,7 @@ def train_cellpose(settings):
119
118
  test_img_src = os.path.join(os.path.dirname(img_src), 'test')
120
119
  test_mask_src = os.path.join(test_img_src, 'mask')
121
120
 
122
- test_images, test_masks, test_image_names, test_mask_names = None,None,None,None,
121
+ test_images, test_masks, test_image_names, test_mask_names = None,None,None,None
123
122
  print(settings)
124
123
 
125
124
  if from_scratch:
@@ -147,13 +146,13 @@ def train_cellpose(settings):
147
146
 
148
147
  image_files = [os.path.join(img_src, f) for f in os.listdir(img_src) if f.endswith('.tif')]
149
148
  label_files = [os.path.join(mask_src, f) for f in os.listdir(mask_src) if f.endswith('.tif')]
150
- images, masks, image_names, mask_names = _load_normalized_images_and_labels(image_files, label_files, channels, percentiles, circular, invert, verbose, remove_background, background, Signal_to_noise)
149
+ images, masks, image_names, mask_names = _load_normalized_images_and_labels(image_files, label_files, channels, percentiles, circular, invert, verbose, remove_background, background, Signal_to_noise, target_height, target_width)
151
150
  images = [np.squeeze(img) if img.shape[-1] == 1 else img for img in images]
152
151
 
153
152
  if test:
154
153
  test_image_files = [os.path.join(test_img_src, f) for f in os.listdir(test_img_src) if f.endswith('.tif')]
155
154
  test_label_files = [os.path.join(test_mask_src, f) for f in os.listdir(test_mask_src) if f.endswith('.tif')]
156
- test_images, test_masks, test_image_names, test_mask_names = _load_normalized_images_and_labels(test_image_files, test_label_files, channels, percentiles, circular, invert, verbose, remove_background, background, Signal_to_noise)
155
+ test_images, test_masks, test_image_names, test_mask_names = _load_normalized_images_and_labels(test_image_files, test_label_files, channels, percentiles, circular, invert, verbose, remove_background, background, Signal_to_noise, target_height, target_width)
157
156
  test_images = [np.squeeze(img) if img.shape[-1] == 1 else img for img in test_images]
158
157
 
159
158
  else:
@@ -164,8 +163,8 @@ def train_cellpose(settings):
164
163
  test_images, test_masks, test_image_names, test_mask_names = _load_images_and_labels(img_src=test_img_src, mask_src=test_mask_src, circular=circular, invert=invert)
165
164
  test_images = [np.squeeze(img) if img.shape[-1] == 1 else img for img in test_images]
166
165
 
167
- if resize:
168
- images, masks = resize_images_and_labels(images, masks, target_height, target_width, show_example=True)
166
+ #if resize:
167
+ # images, masks = resize_images_and_labels(images, masks, target_height, target_width, show_example=True)
169
168
 
170
169
  if model_type == 'cyto':
171
170
  cp_channels = [0,1]
spacr/io.py CHANGED
@@ -87,7 +87,7 @@ def _load_images_and_labels(image_files, label_files, circular=False, invert=Fal
87
87
  print(f'image shape: {images[0].shape}, image type: images[0].shape mask shape: {labels[0].shape}, image type: labels[0].shape')
88
88
  return images, labels, image_names, label_names
89
89
 
90
- def _load_normalized_images_and_labels(image_files, label_files, channels=None, percentiles=None, circular=False, invert=False, visualize=False, remove_background=False, background=0, Signal_to_noise=10):
90
+ def _load_normalized_images_and_labels_v1(image_files, label_files, channels=None, percentiles=None, circular=False, invert=False, visualize=False, remove_background=False, background=0, Signal_to_noise=10):
91
91
 
92
92
  from .plot import normalize_and_visualize
93
93
  from .utils import invert_image, apply_mask
@@ -182,6 +182,113 @@ def _load_normalized_images_and_labels(image_files, label_files, channels=None,
182
182
 
183
183
  return normalized_images, labels, image_names, label_names
184
184
 
185
+ def _load_normalized_images_and_labels(image_files, label_files, channels=None, percentiles=None, circular=False, invert=False, visualize=False, remove_background=False, background=0, Signal_to_noise=10, target_height=None, target_width=None):
186
+
187
+ from .plot import normalize_and_visualize, plot_resize
188
+ from .utils import invert_image, apply_mask
189
+ from skimage.transform import resize as resizescikit
190
+
191
+ signal_thresholds = background * Signal_to_noise
192
+ lower_percentile = 2
193
+
194
+ images = []
195
+ labels = []
196
+
197
+ num_channels = 4
198
+ percentiles_1 = [[] for _ in range(num_channels)]
199
+ percentiles_99 = [[] for _ in range(num_channels)]
200
+
201
+ image_names = [os.path.basename(f) for f in image_files]
202
+ image_dir = os.path.dirname(image_files[0])
203
+
204
+ if label_files is not None:
205
+ label_names = [os.path.basename(f) for f in label_files]
206
+ label_dir = os.path.dirname(label_files[0])
207
+
208
+ # Load, normalize, and resize images
209
+ for i, img_file in enumerate(image_files):
210
+ image = cellpose.io.imread(img_file)
211
+ if invert:
212
+ image = invert_image(image)
213
+ if circular:
214
+ image = apply_mask(image, output_value=0)
215
+
216
+ # If specific channels are specified, select them
217
+ if channels is not None and image.ndim == 3:
218
+ image = image[..., channels]
219
+
220
+ if remove_background:
221
+ image[image < background] = 0
222
+
223
+ if image.ndim < 3:
224
+ image = np.expand_dims(image, axis=-1)
225
+
226
+ if percentiles is None:
227
+ for c in range(image.shape[-1]):
228
+ p1 = np.percentile(image[..., c], lower_percentile)
229
+ percentiles_1[c].append(p1)
230
+ for percentile in [98, 99, 99.9, 99.99, 99.999]:
231
+ p = np.percentile(image[..., c], percentile)
232
+ if p > signal_thresholds:
233
+ percentiles_99[c].append(p)
234
+ break
235
+
236
+ # Resize image
237
+ if target_height is not None and target_width is not None:
238
+ if image.ndim == 2:
239
+ image_shape = (target_height, target_width)
240
+ elif image.ndim == 3:
241
+ image_shape = (target_height, target_width, image.shape[-1])
242
+
243
+ image = resizescikit(image, image_shape, preserve_range=True, anti_aliasing=True).astype(image.dtype)
244
+
245
+ images.append(image)
246
+
247
+ if percentiles is None:
248
+ # Calculate average percentiles for normalization
249
+ avg_p1 = [np.mean(p) for p in percentiles_1]
250
+ avg_p99 = [np.mean(p) if len(p) > 0 else np.mean(percentiles_1[i]) for i, p in enumerate(percentiles_99)]
251
+
252
+ print(f'Average 1st percentiles: {avg_p1}, Average 99th percentiles: {avg_p99}')
253
+
254
+ normalized_images = []
255
+ for image in images:
256
+ normalized_image = np.zeros_like(image, dtype=np.float32)
257
+ for c in range(image.shape[-1]):
258
+ normalized_image[..., c] = rescale_intensity(image[..., c], in_range=(avg_p1[c], avg_p99[c]), out_range=(0, 1))
259
+ normalized_images.append(normalized_image)
260
+ if visualize:
261
+ normalize_and_visualize(image, normalized_image, title=f"Channel {c+1} Normalized")
262
+ else:
263
+ normalized_images = []
264
+ for image in images:
265
+ normalized_image = np.zeros_like(image, dtype=np.float32)
266
+ for c in range(image.shape[-1]):
267
+ low_p = np.percentile(image[..., c], percentiles[0])
268
+ high_p = np.percentile(image[..., c], percentiles[1])
269
+ normalized_image[..., c] = rescale_intensity(image[..., c], in_range=(low_p, high_p), out_range=(0, 1))
270
+ normalized_images.append(normalized_image)
271
+ if visualize:
272
+ normalize_and_visualize(image, normalized_image, title=f"Channel {c+1} Normalized")
273
+
274
+ if label_files is not None:
275
+ for lbl_file in label_files:
276
+ label = cellpose.io.imread(lbl_file)
277
+ # Resize label
278
+ if target_height is not None and target_width is not None:
279
+ label = resizescikit(label, (target_height, target_width), order=0, preserve_range=True, anti_aliasing=False).astype(label.dtype)
280
+ labels.append(label)
281
+ else:
282
+ label_names = []
283
+ label_dir = None
284
+
285
+ print(f'Loaded and normalized {len(normalized_images)} images and {len(labels)} labels from {image_dir} and {label_dir}')
286
+
287
+ if visualize and images and labels:
288
+ plot_resize(images, normalized_images, labels, labels)
289
+
290
+ return normalized_images, labels, image_names, label_names
291
+
185
292
  class CombineLoaders:
186
293
 
187
294
  """
spacr/sequencing.py CHANGED
@@ -7,10 +7,18 @@ import matplotlib.pyplot as plt
7
7
  import seaborn as sns
8
8
  from Bio import pairwise2
9
9
  import statsmodels.api as sm
10
- import statsmodels.formula.api as smf
10
+ from statsmodels.regression.mixed_linear_model import MixedLM
11
+ from statsmodels.stats.outliers_influence import variance_inflation_factor
11
12
  from scipy.stats import gmean
12
13
  from difflib import SequenceMatcher
13
14
  from collections import Counter
15
+ from IPython.display import display
16
+
17
+ from sklearn.linear_model import LinearRegression, Lasso, Ridge
18
+ from sklearn.preprocessing import FunctionTransformer, MinMaxScaler
19
+
20
+ from scipy.stats import shapiro
21
+ from patsy import dmatrices
14
22
 
15
23
  def analyze_reads(settings):
16
24
  """
@@ -28,7 +36,7 @@ def analyze_reads(settings):
28
36
  None
29
37
  """
30
38
 
31
- def save_chunk_to_hdf5(output_file_path, data_chunk, chunk_counter):
39
+ def save_chunk_to_hdf5_v1(output_file_path, data_chunk, chunk_counter):
32
40
  """
33
41
  Save a data chunk to an HDF5 file.
34
42
 
@@ -44,6 +52,28 @@ def analyze_reads(settings):
44
52
  with pd.HDFStore(output_file_path, mode='a', complevel=5, complib='blosc') as store:
45
53
  store.put(f'reads/chunk_{chunk_counter}', df, format='table', append=True)
46
54
 
55
+ def save_chunk_to_hdf5(output_file_path, data_chunk, chunk_counter):
56
+ """
57
+ Save a data chunk to an HDF5 file.
58
+
59
+ Parameters:
60
+ - output_file_path (str): The path to the output HDF5 file.
61
+ - data_chunk (list): The data chunk to be saved.
62
+ - chunk_counter (int): The counter for the current chunk.
63
+
64
+ Returns:
65
+ None
66
+ """
67
+ df = pd.DataFrame(data_chunk, columns=['combined_read', 'grna', 'plate_row', 'column', 'sample'])
68
+ with pd.HDFStore(output_file_path, mode='a', complevel=5, complib='blosc') as store:
69
+ store.put(
70
+ f'reads/chunk_{chunk_counter}',
71
+ df,
72
+ format='table',
73
+ append=True,
74
+ min_itemsize={'combined_read': 300, 'grna': 50, 'plate_row': 20, 'column': 20, 'sample': 50}
75
+ )
76
+
47
77
  def reverse_complement(seq):
48
78
  """
49
79
  Returns the reverse complement of a DNA sequence.
@@ -139,7 +169,7 @@ def analyze_reads(settings):
139
169
  best_alignment = alignments[0]
140
170
  return best_alignment
141
171
 
142
- def combine_reads(samples_dict, src, chunk_size, barecode_length, upstream, downstream):
172
+ def combine_reads(samples_dict, src, chunk_size, barecode_length_1, barecode_length_2, upstream, downstream):
143
173
  """
144
174
  Combine reads from paired-end sequencing files and save the combined reads to a new file.
145
175
 
@@ -186,7 +216,7 @@ def analyze_reads(settings):
186
216
  r1_size_est = os.path.getsize(r1_path) // (avg_read_length * 4) if r1_path else 0
187
217
  r2_size_est = os.path.getsize(r2_path) // (avg_read_length * 4) if r2_path else 0
188
218
  max_size = max(r1_size_est, r2_size_est) * 10
189
-
219
+ test10 =0
190
220
  with tqdm(total=max_size, desc=f"Processing {sample}") as pbar:
191
221
  total_length_processed = 0
192
222
  read_count = 0
@@ -229,12 +259,26 @@ def analyze_reads(settings):
229
259
  combo_split_index_1 = read_combo.find(upstream)
230
260
  combo_split_index_2 = read_combo.find(downstream)
231
261
 
232
- barcode_1 = read_combo[combo_split_index_1 - barecode_length:combo_split_index_1]
262
+ barcode_1 = read_combo[combo_split_index_1 - barecode_length_1:combo_split_index_1]
233
263
  grna = read_combo[combo_split_index_1 + len(upstream):combo_split_index_2]
234
- barcode_2 = read_combo[combo_split_index_2 + len(downstream):combo_split_index_2 + len(downstream) + barecode_length]
264
+ barcode_2 = read_combo[combo_split_index_2 + len(downstream):combo_split_index_2 + len(downstream) + barecode_length_2]
235
265
  barcode_2 = reverse_complement(barcode_2)
236
266
  data_chunk.append((read_combo, grna, barcode_1, barcode_2, sample))
237
267
 
268
+ if settings['test']:
269
+ if read_count % 1000 == 0:
270
+ print(f"Read count: {read_count}")
271
+ print(f"Read 1: {r1_read_rc}")
272
+ print(f"Read 2: {r2_read}")
273
+ print(f"Read combo: {read_combo}")
274
+ print(f"Barcode 1: {barcode_1}")
275
+ print(f"gRNA: {grna}")
276
+ print(f"Barcode 2: {barcode_2}")
277
+ print()
278
+ test10 += 1
279
+ if test10 == 10:
280
+ break
281
+
238
282
  read_count += 1
239
283
  total_length_processed += len(r1_read) + len(r2_read)
240
284
 
@@ -261,13 +305,15 @@ def analyze_reads(settings):
261
305
  qc_df = pd.DataFrame([qc])
262
306
  qc_df.to_csv(qc_file_path, index=False)
263
307
 
264
- settings.setdefault('upstream', 'CTTCTGGTAAATGGGGATGTCAAGTT')
265
- settings.setdefault('downstream', 'GTTTAAGAGCTATGCTGGAAACAGCA')
266
- settings.setdefault('barecode_length', 8)
308
+ settings.setdefault('upstream', 'CTTCTGGTAAATGGGGATGTCAAGTT')
309
+ settings.setdefault('downstream', 'GTTTAAGAGCTATGCTGGAAACAGCAG') #This is the reverce compliment of the column primer starting from the end #TGCTGTTTAAGAGCTATGCTGGAAACAGCA
310
+ settings.setdefault('barecode_length_1', 8)
311
+ settings.setdefault('barecode_length_2', 7)
267
312
  settings.setdefault('chunk_size', 1000000)
313
+ settings.setdefault('test', False)
268
314
 
269
315
  samples_dict = parse_gz_files(settings['src'])
270
- combine_reads(samples_dict, settings['src'], settings['chunk_size'], settings['barecode_length'], settings['upstream'], settings['downstream'])
316
+ combine_reads(samples_dict, settings['src'], settings['chunk_size'], settings['barecode_length_1'], settings['barecode_length_2'], settings['upstream'], settings['downstream'])
271
317
 
272
318
  def map_barcodes(h5_file_path, settings={}):
273
319
  """
@@ -280,27 +326,20 @@ def map_barcodes(h5_file_path, settings={}):
280
326
  Returns:
281
327
  None
282
328
  """
283
- def get_read_qc(df, df_cleaned):
329
+ def get_read_qc(df, settings):
284
330
  """
285
331
  Calculate quality control metrics for sequencing reads.
286
332
 
287
333
  Parameters:
288
334
  - df: DataFrame containing the sequencing reads.
289
- - df_cleaned: DataFrame containing the cleaned sequencing reads.
290
335
 
291
336
  Returns:
292
- - qc_dict: Dictionary containing the following quality control metrics:
293
- - 'reads': Total number of reads.
294
- - 'cleaned_reads': Total number of cleaned reads.
295
- - 'NaN_grna': Number of reads with missing 'grna_metadata'.
296
- - 'NaN_plate_row': Number of reads with missing 'plate_row_metadata'.
297
- - 'NaN_column': Number of reads with missing 'column_metadata'.
298
- - 'NaN_plate': Number of reads with missing 'plate_metadata'.
299
- - 'unique_grna': Counter object containing the count of unique 'grna_metadata' values.
300
- - 'unique_plate_row': Counter object containing the count of unique 'plate_row_metadata' values.
301
- - 'unique_column': Counter object containing the count of unique 'column_metadata' values.
302
- - 'unique_plate': Counter object containing the count of unique 'plate_metadata' values.
337
+ - df_cleaned: DataFrame containing the cleaned sequencing reads.
338
+ - qc_dict: Dictionary containing the quality control metrics.
303
339
  """
340
+
341
+ df_cleaned = df.dropna()
342
+
304
343
  qc_dict = {}
305
344
  qc_dict['reads'] = len(df)
306
345
  qc_dict['cleaned_reads'] = len(df_cleaned)
@@ -312,9 +351,56 @@ def map_barcodes(h5_file_path, settings={}):
312
351
  qc_dict['unique_plate_row'] = Counter(df['plate_row_metadata'].dropna().tolist())
313
352
  qc_dict['unique_column'] = Counter(df['column_metadata'].dropna().tolist())
314
353
  qc_dict['unique_plate'] = Counter(df['plate_metadata'].dropna().tolist())
354
+
355
+ # Calculate control error rates using cleaned DataFrame
356
+ total_pc_non_nan = df_cleaned[(df_cleaned['column_metadata'] == settings['pc_loc'])].shape[0]
357
+ total_nc_non_nan = df_cleaned[(df_cleaned['column_metadata'] == settings['nc_loc'])].shape[0]
315
358
 
316
- return qc_dict
317
-
359
+ pc_count_pc = df_cleaned[(df_cleaned['column_metadata'] == settings['pc_loc']) & (df_cleaned['grna_metadata'] == settings['pc'])].shape[0]
360
+ nc_count_nc = df_cleaned[(df_cleaned['column_metadata'] == settings['nc_loc']) & (df_cleaned['grna_metadata'] == settings['nc'])].shape[0]
361
+
362
+ pc_error_count = df_cleaned[(df_cleaned['column_metadata'] == settings['pc_loc']) & (df_cleaned['grna_metadata'] != settings['pc'])].shape[0]
363
+ nc_error_count = df_cleaned[(df_cleaned['column_metadata'] == settings['nc_loc']) & (df_cleaned['grna_metadata'] != settings['nc'])].shape[0]
364
+
365
+ pc_in_nc_loc_count = df_cleaned[(df_cleaned['column_metadata'] == settings['nc_loc']) & (df_cleaned['grna_metadata'] == settings['pc'])].shape[0]
366
+ nc_in_pc_loc_count = df_cleaned[(df_cleaned['column_metadata'] == settings['pc_loc']) & (df_cleaned['grna_metadata'] == settings['nc'])].shape[0]
367
+
368
+ # Collect QC metrics into a dictionary
369
+ # PC
370
+ qc_dict['pc_total_count'] = total_pc_non_nan
371
+ qc_dict['pc_count_pc'] = pc_count_pc
372
+ qc_dict['nc_count_pc'] = pc_in_nc_loc_count
373
+ qc_dict['pc_error_count'] = pc_error_count
374
+ # NC
375
+ qc_dict['nc_total_count'] = total_nc_non_nan
376
+ qc_dict['nc_count_nc'] = nc_count_nc
377
+ qc_dict['pc_count_nc'] = nc_in_pc_loc_count
378
+ qc_dict['nc_error_count'] = nc_error_count
379
+
380
+ return df_cleaned, qc_dict
381
+
382
+ def get_per_row_qc(df, settings):
383
+ """
384
+ Calculate quality control metrics for each unique row in the control columns.
385
+
386
+ Parameters:
387
+ - df: DataFrame containing the sequencing reads.
388
+ - settings: Dictionary containing the settings for control values.
389
+
390
+ Returns:
391
+ - dict: Dictionary containing the quality control metrics for each unique row.
392
+ """
393
+ qc_dict_per_row = {}
394
+ unique_rows = df['plate_row_metadata'].dropna().unique().tolist()
395
+ unique_rows = list(set(unique_rows)) # Remove duplicates
396
+
397
+ for row in unique_rows:
398
+ df_row = df[(df['plate_row_metadata'] == row)]
399
+ _, qc_dict_row = get_read_qc(df_row, settings)
400
+ qc_dict_per_row[row] = qc_dict_row
401
+
402
+ return qc_dict_per_row
403
+
318
404
  def mapping_dicts(df, settings):
319
405
  """
320
406
  Maps the values in the DataFrame columns to corresponding metadata using dictionaries.
@@ -339,22 +425,94 @@ def map_barcodes(h5_file_path, settings={}):
339
425
  df['plate_row_metadata'] = df['plate_row'].map(plate_row_dict)
340
426
  df['column_metadata'] = df['column'].map(column_dict)
341
427
  df['plate_metadata'] = df['sample'].map(plate_dict)
342
-
428
+
343
429
  return df
344
430
 
345
- settings.setdefault('grna', '/home/carruthers/Documents/grna_barecodes.csv')
346
- settings.setdefault('barcodes', '/home/carruthers/Documents/SCREEN_BARECODES.csv')
431
+ def filter_combinations(df, settings):
432
+ """
433
+ Takes the combination counts Data Frame, filters the rows based on specific conditions,
434
+ and removes rows with a count lower than the highest value of max_count_c1 and max_count_c2.
435
+
436
+ Args:
437
+ combination_counts_file_path (str): The file path to the CSV file containing the combination counts.
438
+ pc (str, optional): The positive control sequence. Defaults to 'TGGT1_220950_1'.
439
+ nc (str, optional): The negative control sequence. Defaults to 'TGGT1_233460_4'.
440
+
441
+ Returns:
442
+ pd.DataFrame: The filtered DataFrame.
443
+ """
444
+
445
+ pc = settings['pc']
446
+ nc = settings['nc']
447
+ pc_loc = settings['pc_loc']
448
+ nc_loc = settings['nc_loc']
449
+
450
+ filtered_c1 = df[(df['column'] == nc_loc) & (df['grna'] != nc)]
451
+ max_count_c1 = filtered_c1['count'].max()
452
+
453
+ filtered_c2 = df[(df['column'] == pc_loc) & (df['grna'] != pc)]
454
+ max_count_c2 = filtered_c2['count'].max()
455
+
456
+ #filtered_c3 = df[(df['column'] != nc_loc) & (df['grna'] == nc)]
457
+ #max_count_c3 = filtered_c3['count'].max()
458
+
459
+ #filtered_c4 = df[(df['column'] != pc_loc) & (df['grna'] == pc)]
460
+ #max_count_c4 = filtered_c4['count'].max()
461
+
462
+ # Find the highest value between max_count_c1 and max_count_c2
463
+ highest_max_count = max(max_count_c1, max_count_c2)
464
+
465
+ # Filter the DataFrame to remove rows with a count lower than the highest_max_count
466
+ filtered_df = df[df['count'] >= highest_max_count]
467
+
468
+ # Calculate total read counts for each unique combination of plate_row and column
469
+ filtered_df['total_reads'] = filtered_df.groupby(['plate_row', 'column'])['count'].transform('sum')
470
+
471
+ # Calculate read fraction for each row
472
+ filtered_df['read_fraction'] = filtered_df['count'] / filtered_df['total_reads']
473
+
474
+ if settings['verbose']:
475
+ print(f"Max count for non {nc} in {nc_loc}: {max_count_c1}")
476
+ print(f"Max count for non {pc} in {pc_loc}: {max_count_c2}")
477
+ #print(f"Max count for {nc} in other columns: {max_count_c3}")
478
+
479
+ return filtered_df
480
+
481
+ settings.setdefault('grna', '/home/carruthers/Documents/grna_barcodes.csv')
482
+ settings.setdefault('barcodes', '/home/carruthers/Documents/SCREEN_BARCODES.csv')
347
483
  settings.setdefault('plate_dict', {'EO1': 'plate1', 'EO2': 'plate2', 'EO3': 'plate3', 'EO4': 'plate4', 'EO5': 'plate5', 'EO6': 'plate6', 'EO7': 'plate7', 'EO8': 'plate8'})
348
484
  settings.setdefault('test', False)
349
485
  settings.setdefault('verbose', True)
350
- settings.setdefault('min_itemsize', 1000)
351
-
352
- qc_file_path = os.path.splitext(h5_file_path)[0] + '_qc_step_2.csv'
353
- unique_grna_file_path = os.path.splitext(h5_file_path)[0] + '_unique_grna.csv'
354
- unique_plate_row_file_path = os.path.splitext(h5_file_path)[0] + '_unique_plate_row.csv'
355
- unique_column_file_path = os.path.splitext(h5_file_path)[0] + '_unique_column.csv'
356
- unique_plate_file_path = os.path.splitext(h5_file_path)[0] + '_unique_plate.csv'
357
- new_h5_file_path = os.path.splitext(h5_file_path)[0] + '_cleaned.h5'
486
+
487
+ settings.setdefault('pc', 'TGGT1_220950_1')
488
+ settings.setdefault('pc_loc', 'c2')
489
+ settings.setdefault('nc', 'TGGT1_233460_4')
490
+ settings.setdefault('nc_loc', 'c1')
491
+
492
+ fldr = os.path.splitext(h5_file_path)[0]
493
+ file_name = os.path.basename(fldr)
494
+
495
+ if settings['test']:
496
+ fldr = os.path.join(fldr, 'test')
497
+ os.makedirs(fldr, exist_ok=True)
498
+
499
+ qc_file_path = os.path.join(fldr, f'{file_name}_qc_step_2.csv')
500
+ unique_grna_file_path = os.path.join(fldr, f'{file_name}_unique_grna.csv')
501
+ unique_plate_row_file_path = os.path.join(fldr, f'{file_name}_unique_plate_row.csv')
502
+ unique_column_file_path = os.path.join(fldr, f'{file_name}_unique_column.csv')
503
+ unique_plate_file_path = os.path.join(fldr, f'{file_name}_unique_plate.csv')
504
+ new_h5_file_path = os.path.join(fldr, f'{file_name}_cleaned.h5')
505
+ combination_counts_file_path = os.path.join(fldr, f'{file_name}_combination_counts.csv')
506
+ combination_counts_file_path_cleaned = os.path.join(fldr, f'{file_name}_combination_counts_cleaned.csv')
507
+
508
+ #qc_file_path = os.path.splitext(h5_file_path)[0] + '_qc_step_2.csv'
509
+ #unique_grna_file_path = os.path.splitext(h5_file_path)[0] + '_unique_grna.csv'
510
+ #unique_plate_row_file_path = os.path.splitext(h5_file_path)[0] + '_unique_plate_row.csv'
511
+ #unique_column_file_path = os.path.splitext(h5_file_path)[0] + '_unique_column.csv'
512
+ #unique_plate_file_path = os.path.splitext(h5_file_path)[0] + '_unique_plate.csv'
513
+ #new_h5_file_path = os.path.splitext(h5_file_path)[0] + '_cleaned.h5'
514
+ #combination_counts_file_path = os.path.splitext(h5_file_path)[0] + '_combination_counts.csv'
515
+ #combination_counts_file_path_cleaned = os.path.splitext(h5_file_path)[0] + '_combination_counts_cleaned.csv'
358
516
 
359
517
  # Initialize the HDF5 store for cleaned data
360
518
  store_cleaned = pd.HDFStore(new_h5_file_path, mode='a', complevel=5, complib='blosc')
@@ -370,38 +528,89 @@ def map_barcodes(h5_file_path, settings={}):
370
528
  'unique_grna': Counter(),
371
529
  'unique_plate_row': Counter(),
372
530
  'unique_column': Counter(),
373
- 'unique_plate': Counter()
531
+ 'unique_plate': Counter(),
532
+ 'pc_total_count': 0,
533
+ 'pc_count_pc': 0,
534
+ 'nc_total_count': 0,
535
+ 'nc_count_nc': 0,
536
+ 'pc_count_nc': 0,
537
+ 'nc_count_pc': 0,
538
+ 'pc_error_count': 0,
539
+ 'nc_error_count': 0,
540
+ 'pc_fraction_pc': 0,
541
+ 'nc_fraction_nc': 0,
542
+ 'pc_fraction_nc': 0,
543
+ 'nc_fraction_pc': 0
374
544
  }
375
545
 
546
+ per_row_qc = {}
547
+ combination_counts = Counter()
548
+
376
549
  with pd.HDFStore(h5_file_path, mode='r') as store:
377
550
  keys = [key for key in store.keys() if key.startswith('/reads/chunk_')]
378
-
551
+
552
+ if settings['test']:
553
+ keys = keys[:3] # Only read the first chunks if in test mode
554
+
379
555
  for key in keys:
380
556
  df = store.get(key)
381
557
  df = mapping_dicts(df, settings)
382
- df_cleaned = df.dropna()
383
- qc_dict = get_read_qc(df, df_cleaned)
384
-
385
- # Accumulate QC metrics
386
- overall_qc['reads'] += qc_dict['reads']
387
- overall_qc['cleaned_reads'] += qc_dict['cleaned_reads']
388
- overall_qc['NaN_grna'] += qc_dict['NaN_grna']
389
- overall_qc['NaN_plate_row'] += qc_dict['NaN_plate_row']
390
- overall_qc['NaN_column'] += qc_dict['NaN_column']
391
- overall_qc['NaN_plate'] += qc_dict['NaN_plate']
392
- overall_qc['unique_grna'].update(qc_dict['unique_grna'])
393
- overall_qc['unique_plate_row'].update(qc_dict['unique_plate_row'])
394
- overall_qc['unique_column'].update(qc_dict['unique_column'])
395
- overall_qc['unique_plate'].update(qc_dict['unique_plate'])
558
+ df_cleaned, qc_dict = get_read_qc(df, settings)
559
+
560
+ # Accumulate counts for unique combinations
561
+ combinations = df_cleaned[['plate_row_metadata', 'column_metadata', 'grna_metadata']].apply(tuple, axis=1)
396
562
 
397
- df_cleaned = df_cleaned[df_cleaned['grna_length'] >= 30]
398
-
563
+ combination_counts.update(combinations)
564
+
565
+ if settings['test'] and settings['verbose']:
566
+ os.makedirs(os.path.join(os.path.splitext(h5_file_path)[0],'test'), exist_ok=True)
567
+ df.to_csv(os.path.join(os.path.splitext(h5_file_path)[0],'test','chunk_1_df.csv'), index=False)
568
+ df_cleaned.to_csv(os.path.join(os.path.splitext(h5_file_path)[0],'test','chunk_1_df_cleaned.csv'), index=False)
569
+
570
+ # Accumulate QC metrics for all rows
571
+ for metric in qc_dict:
572
+ if isinstance(overall_qc[metric], Counter):
573
+ overall_qc[metric].update(qc_dict[metric])
574
+ else:
575
+ overall_qc[metric] += qc_dict[metric]
576
+
577
+ # Update per_row_qc dictionary
578
+ chunk_per_row_qc = get_per_row_qc(df, settings)
579
+ for row in chunk_per_row_qc:
580
+ if row not in per_row_qc:
581
+ per_row_qc[row] = chunk_per_row_qc[row]
582
+ else:
583
+ for metric in chunk_per_row_qc[row]:
584
+ if isinstance(per_row_qc[row][metric], Counter):
585
+ per_row_qc[row][metric].update(chunk_per_row_qc[row][metric])
586
+ else:
587
+ per_row_qc[row][metric] += chunk_per_row_qc[row][metric]
588
+
589
+ # Ensure the DataFrame columns are in the desired order
590
+ df_cleaned = df_cleaned[['grna', 'plate_row', 'column', 'sample', 'grna_metadata', 'plate_row_metadata', 'column_metadata', 'plate_metadata']]
591
+
399
592
  # Save cleaned data to the new HDF5 store
400
593
  store_cleaned.put('reads/cleaned_data', df_cleaned, format='table', append=True)
401
-
594
+
402
595
  del df_cleaned, df
403
596
  gc.collect()
404
597
 
598
+ # Calculate overall fractions after accumulating all metrics
599
+ overall_qc['pc_fraction_pc'] = overall_qc['pc_count_pc'] / overall_qc['pc_total_count'] if overall_qc['pc_total_count'] else 0
600
+ overall_qc['nc_fraction_nc'] = overall_qc['nc_count_nc'] / overall_qc['nc_total_count'] if overall_qc['nc_total_count'] else 0
601
+ overall_qc['pc_fraction_nc'] = overall_qc['pc_count_nc'] / overall_qc['nc_total_count'] if overall_qc['nc_total_count'] else 0
602
+ overall_qc['nc_fraction_pc'] = overall_qc['nc_count_pc'] / overall_qc['pc_total_count'] if overall_qc['pc_total_count'] else 0
603
+
604
+ for row in per_row_qc:
605
+ if row != 'all_rows':
606
+ per_row_qc[row]['pc_fraction_pc'] = per_row_qc[row]['pc_count_pc'] / per_row_qc[row]['pc_total_count'] if per_row_qc[row]['pc_total_count'] else 0
607
+ per_row_qc[row]['nc_fraction_nc'] = per_row_qc[row]['nc_count_nc'] / per_row_qc[row]['nc_total_count'] if per_row_qc[row]['nc_total_count'] else 0
608
+ per_row_qc[row]['pc_fraction_nc'] = per_row_qc[row]['pc_count_nc'] / per_row_qc[row]['nc_total_count'] if per_row_qc[row]['nc_total_count'] else 0
609
+ per_row_qc[row]['nc_fraction_pc'] = per_row_qc[row]['nc_count_pc'] / per_row_qc[row]['pc_total_count'] if per_row_qc[row]['pc_total_count'] else 0
610
+
611
+ # Add overall_qc to per_row_qc with the key 'all_rows'
612
+ per_row_qc['all_rows'] = overall_qc
613
+
405
614
  # Convert the Counter objects to DataFrames and save them to CSV files
406
615
  unique_grna_df = pd.DataFrame(overall_qc['unique_grna'].items(), columns=['key', 'value'])
407
616
  unique_plate_row_df = pd.DataFrame(overall_qc['unique_plate_row'].items(), columns=['key', 'value'])
@@ -422,89 +631,128 @@ def map_barcodes(h5_file_path, settings={}):
422
631
  # Combine all remaining QC metrics into a single DataFrame and save it to CSV
423
632
  qc_df = pd.DataFrame([overall_qc])
424
633
  qc_df.to_csv(qc_file_path, index=False)
634
+
635
+ # Convert per_row_qc to a DataFrame and save it to CSV
636
+ per_row_qc_df = pd.DataFrame.from_dict(per_row_qc, orient='index')
637
+ per_row_qc_df = per_row_qc_df.sort_values(by='reads', ascending=False)
638
+ per_row_qc_df = per_row_qc_df.drop(['unique_grna', 'unique_plate_row', 'unique_column', 'unique_plate'], axis=1, errors='ignore')
639
+ per_row_qc_df = per_row_qc_df.dropna(subset=['reads'])
640
+ per_row_qc_df.to_csv(os.path.splitext(h5_file_path)[0] + '_per_row_qc.csv', index=True)
641
+
642
+ if settings['verbose']:
643
+ display(per_row_qc_df)
644
+
645
+ # Save the combination counts to a CSV file
646
+ try:
647
+ combination_counts_df = pd.DataFrame(combination_counts.items(), columns=['combination', 'count'])
648
+ combination_counts_df[['plate_row', 'column', 'grna']] = pd.DataFrame(combination_counts_df['combination'].tolist(), index=combination_counts_df.index)
649
+ combination_counts_df = combination_counts_df.drop('combination', axis=1)
650
+ combination_counts_df.to_csv(combination_counts_file_path, index=False)
651
+
652
+ grna_plate_heatmap(combination_counts_file_path, specific_grna=None)
653
+ grna_plate_heatmap(combination_counts_file_path, specific_grna=settings['pc'])
654
+ grna_plate_heatmap(combination_counts_file_path, specific_grna=settings['nc'])
655
+
656
+ combination_counts_df_cleaned = filter_combinations(combination_counts_df, settings)
657
+ combination_counts_df_cleaned.to_csv(combination_counts_file_path_cleaned, index=False)
658
+
659
+ grna_plate_heatmap(combination_counts_file_path_cleaned, specific_grna=None)
660
+ grna_plate_heatmap(combination_counts_file_path_cleaned, specific_grna=settings['pc'])
661
+ grna_plate_heatmap(combination_counts_file_path_cleaned, specific_grna=settings['nc'])
662
+ except Exception as e:
663
+ print(e)
425
664
 
426
665
  # Close the HDF5 store
427
666
  store_cleaned.close()
428
-
429
667
  gc.collect()
430
668
  return
431
669
 
432
- def map_barcodes_v1(h5_file_path, settings={}):
670
+ def grna_plate_heatmap(path, specific_grna=None, min_max='all', cmap='viridis', min_count=0, save=True):
671
+ """
672
+ Generate a heatmap of gRNA plate data.
433
673
 
434
- def get_read_qc(df, df_cleaned):
435
- qc_dict = {}
436
- qc_dict['reads'] = len(df)
437
- qc_dict['cleaned_reads'] = len(df_cleaned)
438
- qc_dict['NaN_grna'] = df['grna_metadata'].isna().sum()
439
- qc_dict['NaN_plate_row'] = df['plate_row_metadata'].isna().sum()
440
- qc_dict['NaN_column'] = df['column_metadata'].isna().sum()
441
- qc_dict['NaN_plate'] = df['plate_metadata'].isna().sum()
442
-
443
-
444
- qc_dict['unique_grna'] = len(df['grna_metadata'].dropna().unique().tolist())
445
- qc_dict['unique_plate_row'] = len(df['plate_row_metadata'].dropna().unique().tolist())
446
- qc_dict['unique_column'] = len(df['column_metadata'].dropna().unique().tolist())
447
- qc_dict['unique_plate'] = len(df['plate_metadata'].dropna().unique().tolist())
448
- qc_dict['value_counts_grna'] = df['grna_metadata'].value_counts(dropna=True)
449
- qc_dict['value_counts_plate_row'] = df['plate_row_metadata'].value_counts(dropna=True)
450
- qc_dict['value_counts_column'] = df['column_metadata'].value_counts(dropna=True)
674
+ Args:
675
+ path (str): The path to the CSV file containing the gRNA plate data.
676
+ specific_grna (str, optional): The specific gRNA to filter the data for. Defaults to None.
677
+ min_max (str or list or tuple, optional): The range of values to use for the color scale.
678
+ If 'all', the range will be determined by the minimum and maximum values in the data.
679
+ If 'allq', the range will be determined by the 2nd and 98th percentiles of the data.
680
+ If a list or tuple of two values, the range will be determined by those values.
681
+ Defaults to 'all'.
682
+ cmap (str, optional): The colormap to use for the heatmap. Defaults to 'viridis'.
683
+ min_count (int, optional): The minimum count threshold for including a gRNA in the heatmap.
684
+ Defaults to 0.
685
+ save (bool, optional): Whether to save the heatmap as a PDF file. Defaults to True.
686
+
687
+ Returns:
688
+ matplotlib.figure.Figure: The generated heatmap figure.
689
+ """
690
+ def generate_grna_plate_heatmap(df, plate_number, min_max, min_count, specific_grna=None):
691
+ df = df.copy() # Work on a copy to avoid SettingWithCopyWarning
451
692
 
452
- return qc_dict
453
-
454
- def mapping_dicts(df, settings):
455
- grna_df = pd.read_csv(settings['grna'])
456
- barcode_df = pd.read_csv(settings['barcodes'])
693
+ # Filtering the dataframe based on the plate_number and specific gRNA if provided
694
+ df = df[df['plate_row'].str.startswith(plate_number)]
695
+ if specific_grna:
696
+ df = df[df['grna'] == specific_grna]
457
697
 
458
- grna_dict = {row['sequence']: row['name'] for _, row in grna_df.iterrows()}
459
- plate_row_dict = {row['sequence']: row['name'] for _, row in barcode_df.iterrows() if row['name'].startswith('p')}
460
- column_dict = {row['sequence']: row['name'] for _, row in barcode_df.iterrows() if row['name'].startswith('c')}
461
- plate_dict = settings['plate_dict']
698
+ # Split plate_row into plate and row
699
+ df[['plate', 'row']] = df['plate_row'].str.split('_', expand=True)
462
700
 
463
- df['grna_metadata'] = df['grna'].map(grna_dict)
464
- df['grna_length'] = df['grna'].apply(len)
465
- df['plate_row_metadata'] = df['plate_row'].map(plate_row_dict)
466
- df['column_metadata'] = df['column'].map(column_dict)
467
- df['plate_metadata'] = df['sample'].map(plate_dict)
468
-
469
- return df
470
-
471
- settings.setdefault('grna', '/home/carruthers/Documents/grna_barcodes.csv')
472
- settings.setdefault('barcodes', '/home/carruthers/Documents/SCREEN_BARCODES.csv')
473
- settings.setdefault('plate_dict', {'EO1': 'plate1', 'EO2': 'plate2', 'EO3': 'plate3', 'EO4': 'plate4', 'EO5': 'plate5', 'EO6': 'plate6', 'EO7': 'plate7', 'EO8': 'plate8'})
474
- settings.setdefault('test', False)
475
- settings.setdefault('verbose', True)
476
- settings.setdefault('min_itemsize', 1000)
701
+ # Ensure proper ordering
702
+ row_order = [f'r{i}' for i in range(1, 17)]
703
+ col_order = [f'c{i}' for i in range(1, 28)]
477
704
 
478
- qc_file_path = os.path.splitext(h5_file_path)[0] + '_qc_step_2.csv'
479
- new_h5_file_path = os.path.splitext(h5_file_path)[0] + '_cleaned.h5'
480
-
481
- # Initialize the HDF5 store for cleaned data
482
- store_cleaned = pd.HDFStore(new_h5_file_path, mode='a', complevel=5, complib='blosc')
705
+ df['row'] = pd.Categorical(df['row'], categories=row_order, ordered=True)
706
+ df['column'] = pd.Categorical(df['column'], categories=col_order, ordered=True)
707
+
708
+ # Group by row and column, summing counts
709
+ grouped = df.groupby(['row', 'column'], observed=True)['count'].sum().reset_index()
710
+
711
+ plate_map = pd.pivot_table(grouped, values='count', index='row', columns='column').fillna(0)
712
+
713
+ if min_max == 'all':
714
+ min_max = [plate_map.min().min(), plate_map.max().max()]
715
+ elif min_max == 'allq':
716
+ min_max = np.quantile(plate_map.values, [0.02, 0.98])
717
+ elif isinstance(min_max, (list, tuple)) and len(min_max) == 2:
718
+ if isinstance(min_max[0], (float)) and isinstance(min_max[1], (float)):
719
+ min_max = np.quantile(plate_map.values, [min_max[0], min_max[1]])
720
+ if isinstance(min_max[0], (int)) and isinstance(min_max[1], (int)):
721
+ min_max = [min_max[0], min_max[1]]
722
+
723
+ return plate_map, min_max
483
724
 
484
- # Initialize the DataFrame for QC metrics
485
- qc_df_list = []
725
+ if isinstance(path, pd.DataFrame):
726
+ df = path
727
+ else:
728
+ df = pd.read_csv(path)
486
729
 
487
- with pd.HDFStore(h5_file_path, mode='r') as store:
488
- keys = [key for key in store.keys() if key.startswith('/reads/chunk_')]
489
-
490
- for key in keys:
491
- df = store.get(key)
492
- df = mapping_dicts(df, settings)
493
- df_cleaned = df.dropna()
494
- qc_dict = get_read_qc(df, df_cleaned)
495
- qc_df_list.append(qc_dict)
496
- df_cleaned = df_cleaned[df_cleaned['grna_length'] >= 30]
497
-
498
- # Save cleaned data to the new HDF5 store
499
- store_cleaned.put('reads/cleaned_data', df_cleaned, format='table', append=True)
730
+ plates = df['plate_row'].str.split('_', expand=True)[0].unique()
731
+ n_rows, n_cols = (len(plates) + 3) // 4, 4
732
+ fig, ax = plt.subplots(n_rows, n_cols, figsize=(40, 5 * n_rows))
733
+ ax = ax.flatten()
500
734
 
501
- # Combine all QC metrics into a single DataFrame and save it to CSV
502
- qc_df = pd.DataFrame(qc_df_list)
503
- qc_df.to_csv(qc_file_path, index=False)
735
+ for index, plate in enumerate(plates):
736
+ plate_map, min_max_values = generate_grna_plate_heatmap(df, plate, min_max, min_count, specific_grna)
737
+ sns.heatmap(plate_map, cmap=cmap, vmin=min_max_values[0], vmax=min_max_values[1], ax=ax[index])
738
+ ax[index].set_title(plate)
739
+
740
+ for i in range(len(plates), n_rows * n_cols):
741
+ fig.delaxes(ax[i])
504
742
 
505
- # Close the HDF5 store
506
- store_cleaned.close()
507
- return
743
+ plt.subplots_adjust(wspace=0.1, hspace=0.4)
744
+
745
+ # Save the figure
746
+ if save:
747
+ filename = path.replace('.csv', '')
748
+ if specific_grna:
749
+ filename += f'_{specific_grna}'
750
+ filename += '.pdf'
751
+ plt.savefig(filename)
752
+ print(f'saved {filename}')
753
+ plt.show()
754
+
755
+ return fig
508
756
 
509
757
  def map_barcodes_folder(src, settings={}):
510
758
  for file in os.listdir(src):
@@ -1144,4 +1392,427 @@ def generate_fraction_map(df, gene_column, min_=10, plates=['p1','p2','p3','p4']
1144
1392
  independent_variables = independent_variables.drop('sum', axis=1)
1145
1393
  independent_variables.index.name = 'prc'
1146
1394
  independent_variables = independent_variables.loc[:, (independent_variables.sum() != 0)]
1147
- return independent_variables
1395
+ return independent_variables
1396
+
1397
+
1398
+ def plot_histogram(df, dependent_variable):
1399
+ # Plot histogram of the dependent variable
1400
+ plt.figure(figsize=(10, 6))
1401
+ sns.histplot(df[dependent_variable], kde=True)
1402
+ plt.title(f'Histogram of {dependent_variable}')
1403
+ plt.xlabel(dependent_variable)
1404
+ plt.ylabel('Frequency')
1405
+ plt.show()
1406
+
1407
+ def precess_reads(csv_path, fraction_threshold, plate):
1408
+ # Read the CSV file into a DataFrame
1409
+ csv_df = pd.read_csv(csv_path)
1410
+
1411
+ # Ensure the necessary columns are present
1412
+ if not all(col in csv_df.columns for col in ['grna', 'count', 'column']):
1413
+ raise ValueError("The CSV file must contain 'grna', 'count', 'plate_row', and 'column' columns.")
1414
+
1415
+ if 'plate_row' in csv_df.columns:
1416
+ csv_df[['plate', 'row']] = csv_df['plate_row'].str.split('_', expand=True)
1417
+ if plate is not None:
1418
+ csv_df = csv_df.drop(columns=['plate'])
1419
+ csv_df['plate'] = plate
1420
+
1421
+ if plate is not None:
1422
+ csv_df['plate'] = plate
1423
+
1424
+ # Create the prc column
1425
+ csv_df['prc'] = csv_df['plate'] + '_' + csv_df['row'] + '_' + csv_df['column']
1426
+
1427
+ # Group by prc and calculate the sum of counts
1428
+ grouped_df = csv_df.groupby('prc')['count'].sum().reset_index()
1429
+ grouped_df = grouped_df.rename(columns={'count': 'total_counts'})
1430
+ merged_df = pd.merge(csv_df, grouped_df, on='prc')
1431
+ merged_df['fraction'] = merged_df['count'] / merged_df['total_counts']
1432
+
1433
+ # Filter rows with fraction under the threshold
1434
+ if fraction_threshold is not None:
1435
+ observations_before = len(merged_df)
1436
+ merged_df = merged_df[merged_df['fraction'] >= fraction_threshold]
1437
+ observations_after = len(merged_df)
1438
+ removed = observations_before - observations_after
1439
+ print(f'Removed {removed} observation below fraction threshold: {fraction_threshold}')
1440
+
1441
+ merged_df = merged_df[['prc', 'grna', 'fraction']]
1442
+
1443
+ if not all(col in merged_df.columns for col in ['grna', 'gene']):
1444
+ try:
1445
+ merged_df[['org', 'gene', 'grna']] = merged_df['grna'].str.split('_', expand=True)
1446
+ merged_df = merged_df.drop(columns=['org'])
1447
+ merged_df['grna'] = merged_df['gene'] + '_' + merged_df['grna']
1448
+ except:
1449
+ print('Error splitting grna into org, gene, grna.')
1450
+
1451
+ return merged_df
1452
+
1453
+ def apply_transformation(X, transform):
1454
+ if transform == 'log':
1455
+ transformer = FunctionTransformer(np.log1p, validate=True)
1456
+ elif transform == 'sqrt':
1457
+ transformer = FunctionTransformer(np.sqrt, validate=True)
1458
+ elif transform == 'square':
1459
+ transformer = FunctionTransformer(np.square, validate=True)
1460
+ else:
1461
+ transformer = None
1462
+ return transformer
1463
+
1464
+ def check_normality(data, variable_name, verbose=False):
1465
+ """Check if the data is normally distributed using the Shapiro-Wilk test."""
1466
+ stat, p_value = shapiro(data)
1467
+ if verbose:
1468
+ print(f"Shapiro-Wilk Test for {variable_name}:\nStatistic: {stat}, P-value: {p_value}")
1469
+ if p_value > 0.05:
1470
+ if verbose:
1471
+ print(f"The data for {variable_name} is normally distributed.")
1472
+ return True
1473
+ else:
1474
+ if verbose:
1475
+ print(f"The data for {variable_name} is not normally distributed.")
1476
+ return False
1477
+
1478
+ def process_scores(df, dependent_variable, plate, min_cell_count=25, agg_type='mean', transform=None):
1479
+
1480
+ if plate is not None:
1481
+ df['plate'] = plate
1482
+
1483
+ df['prc'] = df['plate'] + '_' + df['row'] + '_' + df['col']
1484
+ df = df[['prc', dependent_variable]]
1485
+
1486
+ # Group by prc and calculate the mean and count of the dependent_variable
1487
+ grouped = df.groupby('prc')[dependent_variable]
1488
+
1489
+ print(f'Using agg_type: {agg_type}')
1490
+ if agg_type == 'median':
1491
+ dependent_df = grouped.median().reset_index()
1492
+ elif agg_type == 'mean':
1493
+ dependent_df = grouped.mean().reset_index()
1494
+ elif agg_type == 'quantile':
1495
+ dependent_df = grouped.quantile(0.75).reset_index()
1496
+ elif agg_type == None:
1497
+ dependent_df = df.reset_index()
1498
+ if 'prcfo' in dependent_df.columns:
1499
+ dependent_df = dependent_df.drop(columns=['prcfo'])
1500
+
1501
+ else:
1502
+ raise ValueError(f"Unsupported aggregation type {agg_type}")
1503
+
1504
+ # Calculate cell_count for all cases
1505
+ cell_count = grouped.size().reset_index(name='cell_count')
1506
+
1507
+ if agg_type is None:
1508
+ dependent_df = pd.merge(dependent_df, cell_count, on='prc')
1509
+ else:
1510
+ dependent_df['cell_count'] = cell_count['cell_count']
1511
+
1512
+ dependent_df = dependent_df[dependent_df['cell_count'] >= min_cell_count]
1513
+
1514
+ is_normal = check_normality(dependent_df[dependent_variable], dependent_variable)
1515
+
1516
+ if not transform is None:
1517
+ transformer = apply_transformation(dependent_df[dependent_variable], transform=transform)
1518
+ transformed_var = f'{transform}_{dependent_variable}'
1519
+ df[transformed_var] = transformer.fit_transform(dependent_df[[dependent_variable]])
1520
+ dependent_variable = transformed_var
1521
+ is_normal = check_normality(dependent_df[transformed_var], transformed_var)
1522
+
1523
+ if not is_normal:
1524
+ print(f'{dependent_variable} is not normally distributed')
1525
+ else:
1526
+ print(f'{dependent_variable} is normally distributed')
1527
+
1528
+ return dependent_df, dependent_variable
1529
+
1530
+ def perform_mixed_model(y, X, groups, alpha=1.0):
1531
+ # Ensure groups are defined correctly and check for multicollinearity
1532
+ if groups is None:
1533
+ raise ValueError("Groups must be defined for mixed model regression")
1534
+
1535
+ # Check for multicollinearity by calculating the VIF for each feature
1536
+ X_np = X.values
1537
+ vif = [variance_inflation_factor(X_np, i) for i in range(X_np.shape[1])]
1538
+ print(f"VIF: {vif}")
1539
+ if any(v > 10 for v in vif):
1540
+ print(f"Multicollinearity detected with VIF: {vif}. Applying Ridge regression to the fixed effects.")
1541
+ ridge = Ridge(alpha=alpha)
1542
+ ridge.fit(X, y)
1543
+ X_ridge = ridge.coef_ * X # Adjust X with Ridge coefficients
1544
+ model = MixedLM(y, X_ridge, groups=groups)
1545
+ else:
1546
+ model = MixedLM(y, X, groups=groups)
1547
+
1548
+ result = model.fit()
1549
+ return result
1550
+
1551
+ def regression_model(X, y, regression_type='ols', groups=None, alpha=1.0, remove_row_column_effect=True):
1552
+
1553
+ if regression_type == 'ols':
1554
+ model = sm.OLS(y, X).fit()
1555
+
1556
+ elif regression_type == 'gls':
1557
+ model = sm.GLS(y, X).fit()
1558
+
1559
+ elif regression_type == 'wls':
1560
+ model = sm.WLS(y, X, weights=weights).fit()
1561
+
1562
+ elif regression_type == 'rlm':
1563
+ model = sm.RLM(y, X, M=sm.robust.norms.HuberT()).fit()
1564
+ #model = sm.RLM(y, X, M=sm.robust.norms.TukeyBiweight()).fit()
1565
+ #model = sm.RLM(y, X, M=sm.robust.norms.Hampel()).fit()
1566
+ #model = sm.RLM(y, X, M=sm.robust.norms.LeastSquares()).fit()
1567
+ #model = sm.RLM(y, X, M=sm.robust.norms.RamsayE()).fit()
1568
+ #model = sm.RLM(y, X, M=sm.robust.norms.TrimmedMean()).fit()
1569
+
1570
+ elif regression_type == 'glm':
1571
+ model = sm.GLM(y, X, family=sm.families.Gaussian()).fit() # Gaussian: Used for continuous data, similar to OLS regression.
1572
+ #model = sm.GLM(y, X, family=sm.families.Binomial()).fit() # Binomial: Used for binary data, modeling the probability of success.
1573
+ #model = sm.GLM(y, X, family=sm.families.Poisson()).fit() # Poisson: Used for count data.
1574
+ #model = sm.GLM(y, X, family=sm.families.Gamma()).fit() # Gamma: Used for continuous, positive data, often for modeling waiting times or life data.
1575
+ #model = sm.GLM(y, X, family=sm.families.InverseGaussian()).fit() # Inverse Gaussian: Used for positive continuous data with a variance that increases with the
1576
+ #model = sm.GLM(y, X, family=sm.families.NegativeBinomial()).fit() # Negative Binomial: Used for count data with overdispersion (variance greater than the mean).
1577
+ #model = sm.GLM(y, X, family=sm.families.Tweedie()).fit() # Tweedie: Used for data that can take both positive continuous and count values, allowing for a mixture of distributions.
1578
+
1579
+ elif regression_type == 'mixed':
1580
+ model = perform_mixed_model(y, X, groups, alpha=alpha)
1581
+
1582
+ elif regression_type == 'quantile':
1583
+ model = sm.QuantReg(y, X).fit(q=alpha)
1584
+
1585
+ elif regression_type == 'logit':
1586
+ model = sm.Logit(y, X).fit()
1587
+
1588
+ elif regression_type == 'probit':
1589
+ model = sm.Probit(y, X).fit()
1590
+
1591
+ elif regression_type == 'poisson':
1592
+ model_poisson = sm.Poisson(y, X).fit()
1593
+
1594
+ elif regression_type == 'lasso':
1595
+ model = Lasso(alpha=alpha).fit(X, y)
1596
+
1597
+ elif regression_type == 'ridge':
1598
+ model = Ridge(alpha=alpha).fit(X, y)
1599
+
1600
+ else:
1601
+ raise ValueError(f"Unsupported regression type {regression_type}")
1602
+
1603
+ if regression_type in ['lasso', 'ridge']:
1604
+ y_pred = model.predict(X)
1605
+ plt.scatter(X.iloc[:, 1], y, color='blue', label='Data')
1606
+ plt.plot(X.iloc[:, 1], y_pred, color='red', label='Regression line')
1607
+ plt.xlabel('Features')
1608
+ plt.ylabel('Dependent Variable')
1609
+ plt.legend()
1610
+ plt.show()
1611
+
1612
+ return model
1613
+
1614
+ def volcano_plot(coef_df, filename='volcano_plot.pdf'):
1615
+ # Create the volcano plot
1616
+ plt.figure(figsize=(10, 6))
1617
+ sns.scatterplot(
1618
+ data=coef_df,
1619
+ x='coefficient',
1620
+ y='-log10(p_value)',
1621
+ hue='highlight',
1622
+ palette={True: 'red', False: 'blue'}
1623
+ )
1624
+ plt.title('Volcano Plot of Coefficients')
1625
+ plt.xlabel('Coefficient')
1626
+ plt.ylabel('-log10(p-value)')
1627
+ plt.axhline(y=-np.log10(0.05), color='red', linestyle='--')
1628
+ plt.legend().remove()
1629
+ plt.savefig(filename, format='pdf')
1630
+ print(f'Saved Volcano plot: {filename}')
1631
+ plt.show()
1632
+
1633
+ def clean_controls(df,pc,nc,other):
1634
+ if 'col' in df.columns:
1635
+ df['column'] = df['col']
1636
+ if nc != None:
1637
+ df = df[~df['column'].isin([nc])]
1638
+ if pc != None:
1639
+ df = df[~df['column'].isin([pc])]
1640
+ if other != None:
1641
+ df = df[~df['column'].isin([other])]
1642
+ print(f'Removed data from {nc, pc, other}')
1643
+ return df
1644
+
1645
+ def regression(df, csv_path, dependent_variable='predictions', regression_type=None, alpha=1.0, remove_row_column_effect=False):
1646
+
1647
+ volcano_filename = os.path.splitext(os.path.basename(csv_path))[0] + '_volcano_plot.pdf'
1648
+ volcano_filename = regression_type+'_'+volcano_filename
1649
+ if regression_type == 'quantile':
1650
+ volcano_filename = str(alpha)+'_'+volcano_filename
1651
+ volcano_path=os.path.join(os.path.dirname(csv_path), volcano_filename)
1652
+
1653
+ if regression_type is None:
1654
+ if is_normal:
1655
+ regression_type = 'ols'
1656
+ else:
1657
+ regression_type = 'glm'
1658
+
1659
+ if remove_row_column_effect:
1660
+
1661
+ ## 1. Fit the initial model with row and column to estimate their effects
1662
+ ## 2. Fit the initial model using the specified regression type
1663
+ ## 3. Calculate the residuals
1664
+ ### Residual calculation: Residuals are the differences between the observed and predicted values. This step checks if the initial_model has an attribute resid (residuals). If it does, it directly uses them. Otherwise, it calculates residuals manually by subtracting the predicted values from the observed values (y_with_row_col).
1665
+ ## 4. Use the residuals as the new dependent variable in the final regression model without row and column
1666
+ ### Formula creation: A new regression formula is created, excluding row and column effects, with residuals as the new dependent variable.
1667
+ ### Matrix creation: dmatrices is used again to create new design matrices (X for independent variables and y for the new dependent variable, residuals) based on the new formula and the dataframe df.
1668
+ #### Remove Confounding Effects:Variables like row and column can introduce systematic biases or confounding effects that might obscure the relationships between the dependent variable and the variables of interest (fraction:gene and fraction:grna).
1669
+ #### By first estimating the effects of row and column and then using the residuals (the part of the dependent variable that is not explained by row and column), we can focus the final regression model on the relationships of interest without the interference from row and column.
1670
+
1671
+ #### Reduce Multicollinearity: Including variables like row and column along with other predictors can sometimes lead to multicollinearity, where predictors are highly correlated with each other. This can make it difficult to determine the individual effect of each predictor.
1672
+ #### By regressing out the effects of row and column first, we reduce potential multicollinearity issues in the final model.
1673
+
1674
+ # Fit the initial model with row and column to estimate their effects
1675
+ formula_with_row_col = f'{dependent_variable} ~ row + column'
1676
+ y_with_row_col, X_with_row_col = dmatrices(formula_with_row_col, data=df, return_type='dataframe')
1677
+
1678
+ # Fit the initial model using the specified regression type
1679
+ initial_model = regression_model(X_with_row_col, y_with_row_col, regression_type=regression_type, alpha=alpha)
1680
+
1681
+ # Calculate the residuals manually
1682
+ if hasattr(initial_model, 'resid'):
1683
+ df['residuals'] = initial_model.resid
1684
+ else:
1685
+ df['residuals'] = y_with_row_col.values.ravel() - initial_model.predict(X_with_row_col)
1686
+
1687
+ # Use the residuals as the new dependent variable in the final regression model without row and column
1688
+ formula_without_row_col = 'residuals ~ fraction:gene + fraction:grna'
1689
+ y, X = dmatrices(formula_without_row_col, data=df, return_type='dataframe')
1690
+
1691
+ # Plot histogram of the residuals
1692
+ plot_histogram(df, 'residuals')
1693
+
1694
+ # Scale the independent variables and residuals
1695
+ scaler_X = MinMaxScaler()
1696
+ scaler_y = MinMaxScaler()
1697
+ X = pd.DataFrame(scaler_X.fit_transform(X), columns=X.columns)
1698
+ y = scaler_y.fit_transform(y)
1699
+
1700
+ else:
1701
+ formula = f'{dependent_variable} ~ fraction:gene + fraction:grna + row + column'
1702
+ y, X = dmatrices(formula, data=df, return_type='dataframe')
1703
+
1704
+ plot_histogram(y, dependent_variable)
1705
+
1706
+ # Scale the independent variables and dependent variable
1707
+ scaler_X = MinMaxScaler()
1708
+ scaler_y = MinMaxScaler()
1709
+ X = pd.DataFrame(scaler_X.fit_transform(X), columns=X.columns)
1710
+ y = scaler_y.fit_transform(y)
1711
+
1712
+ groups = df['prc'] if regression_type == 'mixed' else None
1713
+ print(f'performing {regression_type} regression')
1714
+ model = regression_model(X, y, regression_type=regression_type, groups=groups, alpha=alpha, remove_row_column_effect=remove_row_column_effect)
1715
+
1716
+ # Get the model coefficients and p-values
1717
+ if regression_type in ['ols','gls','wls','rlm','glm','mixed','quantile','logit','probit','poisson','lasso','ridge']:
1718
+ coefs = model.params
1719
+ p_values = model.pvalues
1720
+
1721
+ coef_df = pd.DataFrame({
1722
+ 'feature': coefs.index,
1723
+ 'coefficient': coefs.values,
1724
+ 'p_value': p_values.values
1725
+ })
1726
+ else:
1727
+ coefs = model.coef_
1728
+ intercept = model.intercept_
1729
+ feature_names = X.design_info.column_names
1730
+
1731
+ coef_df = pd.DataFrame({
1732
+ 'feature': feature_names,
1733
+ 'coefficient': coefs
1734
+ })
1735
+ coef_df.loc[0, 'coefficient'] += intercept
1736
+ coef_df['p_value'] = np.nan # Placeholder since sklearn doesn't provide p-values
1737
+
1738
+ coef_df['-log10(p_value)'] = -np.log10(coef_df['p_value'])
1739
+ coef_df_v = coef_df[coef_df['feature'] != 'Intercept']
1740
+
1741
+ # Create the highlight column
1742
+ coef_df['highlight'] = coef_df['feature'].apply(lambda x: '220950' in x)
1743
+ coef_df = coef_df[~coef_df['feature'].str.contains('row|column')]
1744
+ volcano_plot(coef_df, volcano_path)
1745
+
1746
+ return model, coef_df
1747
+
1748
+ def set_regression_defaults(settings):
1749
+ settings.setdefault('gene_weights_csv', '/nas_mnt/carruthers/Einar/mitoscreen/sequencing/combined_reads/EO1_combined/EO1_combined_combination_counts.csv')
1750
+ settings.setdefault('dependent_variable','predictions')
1751
+ settings.setdefault('transform',None)
1752
+ settings.setdefault('agg_type','mean')
1753
+ settings.setdefault('min_cell_count',25)
1754
+ settings.setdefault('regression_type','ols')
1755
+ settings.setdefault('remove_row_column_effect',False)
1756
+ settings.setdefault('alpha',1)
1757
+ settings.setdefault('fraction_threshold',0.1)
1758
+ settings.setdefault('nc','c1')
1759
+ settings.setdefault('pc','c2')
1760
+ settings.setdefault('other','c3')
1761
+ settings.setdefault('plate','plate1')
1762
+
1763
+ if settings['regression_type'] == 'quantile':
1764
+ print(f"Using alpha as quantile for quantile regression, alpha: {settings['alpha']}")
1765
+ settings['agg_type'] = None
1766
+ print(f'agg_type set to None for quantile regression')
1767
+ return settings
1768
+
1769
+ def perform_regression(df, settings):
1770
+
1771
+ from spacr.plot import _plot_plates
1772
+
1773
+ results_filename = os.path.splitext(os.path.basename(settings['gene_weights_csv']))[0] + '_results.csv'
1774
+ hits_filename = os.path.splitext(os.path.basename(settings['gene_weights_csv']))[0] + '_results_significant.csv'
1775
+
1776
+ results_filename = settings['regression_type']+'_'+results_filename
1777
+ hits_filename = settings['regression_type']+'_'+hits_filename
1778
+ if settings['regression_type'] == 'quantile':
1779
+ results_filename = str(settings['alpha'])+'_'+results_filename
1780
+ hits_filename = str(settings['alpha'])+'_'+hits_filename
1781
+ results_path=os.path.join(os.path.dirname(settings['gene_weights_csv']), results_filename)
1782
+ hits_path=os.path.join(os.path.dirname(settings['gene_weights_csv']), hits_filename)
1783
+
1784
+ settings = set_regression_defaults(settings)
1785
+
1786
+ df = clean_controls(df,settings['pc'],settings['nc'],settings['other'])
1787
+ dependent_df, dependent_variable = process_scores(df, settings['dependent_variable'], settings['plate'], settings['min_cell_count'], settings['agg_type'], settings['transform'])
1788
+ display(dependent_df)
1789
+
1790
+ independent_df = precess_reads(settings['gene_weights_csv'], settings['fraction_threshold'], settings['plate'])
1791
+ display(independent_df)
1792
+
1793
+ merged_df = pd.merge(independent_df, dependent_df, on='prc')
1794
+
1795
+ merged_df[['plate', 'row', 'column']] = merged_df['prc'].str.split('_', expand=True)
1796
+
1797
+ plate_heatmap = _plot_plates(df, variable=dependent_variable, grouping='mean', min_max='allq', cmap='viridis', min_count=settings['min_cell_count'])
1798
+
1799
+ model, coef_df = regression(merged_df, settings['gene_weights_csv'], dependent_variable, settings['regression_type'], settings['alpha'], settings['remove_row_column_effect'])
1800
+
1801
+ coef_df.to_csv(results_path, index=False)
1802
+
1803
+ if settings['regression_type'] == 'lasso':
1804
+ significant = coef_df[coef_df['coefficient'] > 0]
1805
+
1806
+ else:
1807
+ significant = coef_df[coef_df['p_value']<= 0.05]
1808
+ #significant = significant[significant['coefficient'] > 0.1]
1809
+ significant.sort_values(by='coefficient', ascending=False, inplace=True)
1810
+ significant = significant[~significant['feature'].str.contains('row|column')]
1811
+
1812
+ if settings['regression_type'] == 'ols':
1813
+ print(model.summary())
1814
+
1815
+ significant.to_csv(hits_path, index=False)
1816
+ print('Significant Genes')
1817
+ display(significant)
1818
+ return coef_df
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spacr
3
- Version: 0.0.80
3
+ Version: 0.0.82
4
4
  Summary: Spatial phenotype analysis of crisp screens (SpaCr)
5
5
  Home-page: https://github.com/EinarOlafsson/spacr
6
6
  Author: Einar Birnir Olafsson
@@ -38,6 +38,7 @@ Requires-Dist: PyWavelets <2.0,>=1.6.0
38
38
  Requires-Dist: torchcam <1.0,>=0.4.0
39
39
  Requires-Dist: ttf-opensans >=2020.10.30
40
40
  Requires-Dist: customtkinter <6.0,>=5.2.2
41
+ Requires-Dist: biopython <2.0,>=1.80
41
42
  Requires-Dist: lxml <6.0,>=5.1.0
42
43
  Provides-Extra: dev
43
44
  Requires-Dist: pytest >=3.9 ; extra == 'dev'
@@ -4,7 +4,7 @@ spacr/alpha.py,sha256=Y95sLEfpK2OSYKRn3M8eUOU33JJeXfV8zhrC4KnwSTY,35244
4
4
  spacr/annotate_app.py,sha256=w7t7Zilu31FSIRDKtIPae8X4MZGez3cJugFM3rOmnlQ,20617
5
5
  spacr/chris.py,sha256=YlBjSgeZaY8HPy6jkrT_ISAnCMAKVfvCxF0I9eAZLFM,2418
6
6
  spacr/cli.py,sha256=507jfOOEV8BoL4eeUcblvH-iiDHdBrEVJLu1ghAAPSc,1800
7
- spacr/core.py,sha256=CHtBCYnx-oIU7f78X8QBMrVtHtaU0Dwu12zpYouUa7E,155454
7
+ spacr/core.py,sha256=L2z9HmB0TjrwTQ-iDfoacQ9BClqfCeVEJQQbKkP3Yas,155517
8
8
  spacr/deep_spacr.py,sha256=ljIakns6q74an5QwDU7j0xoj6jRCAz-ejY0QHj9X0d8,33193
9
9
  spacr/foldseek.py,sha256=YIP1d4Ci6CeA9jSyiv-HTDbNmAmcSM9Y_DaOs7wYzLY,33546
10
10
  spacr/get_alfafold_structures.py,sha256=ehx_MQgb12k3hFecP6cYVlm5TLO8iWjgevy8ESyS3cw,3544
@@ -16,21 +16,21 @@ spacr/gui_mask_app.py,sha256=WKkAH0jv-SnfaZdJ8MkC7mkUIVSSrNE8lUfH3QBvUak,9747
16
16
  spacr/gui_measure_app.py,sha256=5vjjds5NFaOcE8XeuWDug9k-NI4jbTrwp54sJ7DNaNI,9625
17
17
  spacr/gui_sim_app.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
18
  spacr/gui_utils.py,sha256=JRWwmGEEVSPgs0UtZRukdNwIUJepbP675_Fvs5qocPk,49718
19
- spacr/io.py,sha256=Ix0nzh-4n4f4mIayxDF6YVBAmP_mTckrueCJ81uCP7s,105040
19
+ spacr/io.py,sha256=WOKkFA-Npc22EeWJVxYPhCrbqjyEGz4cTih53eAxhMM,109690
20
20
  spacr/logger.py,sha256=7Zqr3TuuOQLWT32gYr2q1qvv7x0a2JhLANmZcnBXAW8,670
21
21
  spacr/mask_app.py,sha256=jlKmj_evveIkkyH3PYEcAshcLXN0DOPWB1oc4hAwq9E,44201
22
22
  spacr/measure.py,sha256=-pR43dO1MPiwIa7zACcWyNTBpHYDyiYFV_6sTo3qqRk,54975
23
23
  spacr/old_code.py,sha256=jw67DAGoLBd7mWofVzRJSEmCI1Qrff26zIo65SEkV00,13817
24
24
  spacr/plot.py,sha256=fnswxUXHwSLmxRpqSAmoUl5ln-_ueYPeYQlDmiYSwzQ,63299
25
- spacr/sequencing.py,sha256=TWQtylArdWZCYcjYrvfy7AAZdVprCMwXc1WMEavw10E,50987
25
+ spacr/sequencing.py,sha256=xS-0n_Du_zK0jIt2HE5GUCXij9CpWquXdk8E19xiMWo,82310
26
26
  spacr/sim.py,sha256=FveaVgBi3eypO2oVB5Dx-v0CC1Ny7UPfXkJiiRRodAk,71212
27
27
  spacr/timelapse.py,sha256=5TNmkzR_urMxy0eVB4quGdjNj2QduyiwrLL2I-udlAg,39614
28
28
  spacr/utils.py,sha256=3cA3qUNf7l_VEeuhype2kI7B5IoYK0hb6Y31Q6Si3ds,184107
29
29
  spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
30
30
  spacr/models/cp/toxo_pv_lumen.CP_model,sha256=2y_CindYhmTvVwBH39SNILF3rI3x9SsRn6qrMxHy3l0,26562451
31
- spacr-0.0.80.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
32
- spacr-0.0.80.dist-info/METADATA,sha256=ZvBlLVEEUqE0JiUTbokhpMPI33nHAzvY2Ahmg1WueLk,5121
33
- spacr-0.0.80.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
34
- spacr-0.0.80.dist-info/entry_points.txt,sha256=xncHsqD9MI5wj0_p4mgZlrB8dHm_g_qF0Ggo1c78LqY,315
35
- spacr-0.0.80.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
36
- spacr-0.0.80.dist-info/RECORD,,
31
+ spacr-0.0.82.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
32
+ spacr-0.0.82.dist-info/METADATA,sha256=iiIvFLIDU5M7F8VIuGMWah3zpzjbQv_a8kIlIikp174,5158
33
+ spacr-0.0.82.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
34
+ spacr-0.0.82.dist-info/entry_points.txt,sha256=xncHsqD9MI5wj0_p4mgZlrB8dHm_g_qF0Ggo1c78LqY,315
35
+ spacr-0.0.82.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
36
+ spacr-0.0.82.dist-info/RECORD,,
File without changes