tol-sdk 1.7.0__py3-none-any.whl → 1.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tol/api_client/client.py CHANGED
@@ -392,7 +392,7 @@ class JsonApiClient(HttpClient):
392
392
  return r.json()
393
393
 
394
394
  def __detail_url(self, object_type: str, object_id: str) -> str:
395
- return f'{self.__data_url}/{object_type}/{quote(object_id)}'
395
+ return f'{self.__data_url}/{object_type}/{quote(str(object_id))}'
396
396
 
397
397
  def __list_url(self, object_type: str) -> str:
398
398
  return f'{self.__data_url}/{object_type}'
@@ -424,7 +424,7 @@ class JsonApiClient(HttpClient):
424
424
 
425
425
  hop_string = '/'.join(relationship_hops)
426
426
  base_url = (
427
- f'{self.__data_url}/{object_type}:to-one/{quote(object_id)}'
427
+ f'{self.__data_url}/{object_type}:to-one/{quote(str(object_id))}'
428
428
  )
429
429
  return f'{base_url}/{hop_string}'
430
430
 
@@ -434,9 +434,8 @@ class JsonApiClient(HttpClient):
434
434
  object_id: str,
435
435
  relationship_name: str
436
436
  ) -> str:
437
-
438
437
  base_url = (
439
- f'{self.__data_url}/{object_type}:to-many/{quote(object_id)}'
438
+ f'{self.__data_url}/{object_type}:to-many/{quote(str(object_id))}'
440
439
  )
441
440
  return f'{base_url}/{relationship_name}'
442
441
 
@@ -23,11 +23,11 @@ Output: Table with cols:
23
23
  3) submission_sample_id: [character] Foreign key to other entities and results in Benchling. Origin: BWH
24
24
  4) eln_file_registry_id: [character] id in Benchling Registry. Origin: BWH
25
25
  5) extraction_id: [character] Original DNA extract entity name. For pooled samples, the first DNA extract pooled. Origin: BWH
26
- 6) pooled_sample_id: [character] DNA pooled sample id. Present only if the submitted sample was pooled. Origin: BWH
27
- 7) submission_sample_name: [character] Entity name. Origin: BWH
28
- 8) fluidx_id: [character] Container barcode of the DNA fluidx tube. Origin: BWH
29
- 9) programme_id: [character] ToLID. Origin: BWH
30
- 10) specimen_id: [character] Specimen ID. Origin: STS
26
+ 6) submission_sample_name: [character] Entity name. Origin: BWH
27
+ 7) fluidx_id: [character] Container barcode of the DNA fluidx tube. Origin: BWH
28
+ 8) programme_id: [character] ToLID. Origin: BWH
29
+ 9) specimen_id: [character] Specimen ID. Origin: STS
30
+ 10) tube_name: [character] Name of the submission tube/container.
31
31
  11) sanger_sample_id: [character] Sanger Sample ID or Sanger UUID of the PacBio submission.
32
32
  12) plate_name: [character] Name of submission plate.
33
33
  13) pipeline: [character] name of the submission pipeline.
@@ -47,12 +47,12 @@ Output: Table with cols:
47
47
  27) priority: [character]
48
48
  28) completion_date: [Date]
49
49
  29) sequencing_platform: [character] Sequencing platform: pacbio.
50
- 30) source: [character] Data source: v1, v2, legacy_bnt or v1_pooled
50
+ 30) source: [character] Data source: v1, v1_pooled, v2, v2_pooled, legacy_bnt
51
51
 
52
52
  NOTES:
53
53
 
54
54
  1) Data types were casted explicitly to conserved the data type stored in BWH.
55
- 2) To add the Fluidx ID of the origininal DNA extract a few filters were applied to
55
+ 2) To add the Fluidx ID of the original DNA extract a few filters were applied to
56
56
  delete Vouchers, tubes archived because they were made in error, and
57
57
  invalid container names.
58
58
  3) Pooled samples must be added as an independent CTE because the filters for DNA fluidx tubes
@@ -76,7 +76,12 @@ pacbio_submissions_container_routine AS (
76
76
  c_dna.barcode AS fluidx_id,
77
77
  t.programme_id,
78
78
  t.specimen_id,
79
- con.name AS sanger_sample_id,
79
+ con.name AS tube_name,
80
+ CASE
81
+ WHEN pbsum.submission_date < DATE '2025-09-01'
82
+ THEN con.name
83
+ ELSE ssid.sanger_sample_id
84
+ END AS sanger_sample_id,
80
85
  NULL::varchar AS plate_name,
81
86
  NULL::varchar AS pipeline,
82
87
  pbsum.sequencing_type_please_fill AS library_type,
@@ -119,17 +124,19 @@ pacbio_submissions_container_routine AS (
119
124
  ON subsam.project_id$ = proj.id
120
125
  LEFT JOIN folder$raw AS f
121
126
  ON subsam.folder_id$ = f.id
127
+ LEFT JOIN sanger_sample_id$raw AS ssid
128
+ ON con.id = ssid.sample_tube
122
129
  WHERE pbsum.archived$ = FALSE -- Excluding archived submission containers
123
130
  -- Filters to add DNA extract fluidx tubes
124
131
  AND tube.type IS NULL -- Selecting non-Voucher containers
125
132
  AND (c_dna.archive_purpose$ != ('Made in error') OR c_dna.archive_purpose$ IS NULL) -- Excluding containers made by mistake
126
133
  AND c_dna.barcode LIKE 'F%' -- Selecting only valid FluidX IDs
127
- AND proj.name = 'ToL Core Lab' -- Selecting ToL Core Lab sbmissions only
134
+ AND proj.name = 'ToL Core Lab' -- Selecting ToL Core Lab submissions only
128
135
  AND f.name IN ('Routine Throughput', 'PacBio prep', 'Submissions', 'Core Lab Entities', 'Benchling MS Project Move')
129
136
  ),
130
137
 
131
- pacbio_submissions_container_pooled_deprecated AS (
132
-
138
+ pacbio_submissions_container_pooled AS (
139
+
133
140
  SELECT DISTINCT
134
141
  t.sts_id,
135
142
  t.taxon_id,
@@ -140,8 +147,13 @@ pacbio_submissions_container_pooled_deprecated AS (
140
147
  subsam.name$ AS eln_submission_sample_name,
141
148
  c_pool.barcode AS fluidx_id,
142
149
  t.programme_id,
143
- t.specimen_id,
144
- con.name AS sanger_sample_id,
150
+ t.specimen_id,
151
+ con.name AS tube_name,
152
+ CASE
153
+ WHEN pbsum.submission_date < DATE '2025-09-01'
154
+ THEN con.name
155
+ ELSE ssid.sanger_sample_id
156
+ END AS sanger_sample_id,
145
157
  NULL::varchar AS plate_name,
146
158
  NULL::varchar AS pipeline,
147
159
  pbsum.sequencing_type_please_fill AS library_type,
@@ -186,6 +198,8 @@ pacbio_submissions_container_pooled_deprecated AS (
186
198
  ON subsam.project_id$ = proj.id
187
199
  LEFT JOIN folder$raw AS f
188
200
  ON subsam.folder_id$ = f.id
201
+ LEFT JOIN sanger_sample_id$raw AS ssid
202
+ ON con.id = ssid.sample_tube
189
203
  WHERE pbsum.archived$ = FALSE -- Excluding archived submission containers
190
204
  -- Filters to add DNA extract fluidx tubes
191
205
  AND tube.type IS NULL -- Selecting non-Voucher containers
@@ -195,70 +209,6 @@ pacbio_submissions_container_pooled_deprecated AS (
195
209
  AND f.name IN ('Routine Throughput', 'PacBio prep', 'Submissions', 'Core Lab Entities', 'Benchling MS Project Move')
196
210
  ),
197
211
 
198
- pacbio_submissions_container_pooled AS (
199
-
200
- SELECT DISTINCT
201
- t.sts_id,
202
- t.taxon_id,
203
- tp.id AS tissue_prep_id,
204
- subsam.id AS eln_submission_sample_id,
205
- subsam.file_registry_id$ AS eln_file_registry_id,
206
- subsam.pooled_sample AS extraction_id,
207
- subsam.name$ AS submission_sample_name,
208
- c_pool.barcode AS fluidx_id,
209
- t.programme_id,
210
- t.specimen_id,
211
- con.name AS sanger_sample_id,
212
- plt.name AS plate_name,
213
- pbsubm_p.pipeline,
214
- pbsubm_p.library_type,
215
- pbsubm_p.retention_instructions,
216
- pbsubm_p.gb_yield_of_ccs_data_required,
217
- pbsubm_p.number_of_smrt_cells_required,
218
- pbsubm_p.sheared_femto_fragment_size_bp,
219
- pbsubm_p.post_spri_concentration_ngul,
220
- pbsubm_p.post_spri_volume_ul,
221
- pbsubm_p.nanodrop_260280,
222
- pbsubm_p.nanodrop_260230,
223
- pbsubm_p.nanodrop_concentration_ngul,
224
- pbsubm_p.sample_prep_additional_requirements,
225
- pbsubm_p.include_5mc_cells_in_cpg_motifs,
226
- pbsubm_p.cc5_output_includes_kinetics_information,
227
- pbsubm_p.priority,
228
- DATE(pbsubm_p.created_at$) AS completion_date,
229
- 'pacbio'::varchar AS sequencing_platform,
230
- 'v1_pooled'::varchar AS source
231
- FROM pacbio_submission_plate_output$raw AS pbsubm_p
232
- LEFT JOIN submission_samples$raw AS subsam
233
- ON pbsubm_p.sample_name = subsam.id
234
- LEFT JOIN pooled_samples$raw AS pool
235
- ON subsam.pooled_sample = pool.id
236
- LEFT JOIN dna_extract$raw AS dna -- Chunk to add Tissue metadata
237
- ON pool.samples ->> 0 = dna.id
238
- LEFT JOIN tissue_prep$raw AS tp
239
- ON dna.tissue_prep = tp.id
240
- LEFT JOIN tissue$raw AS t
241
- ON tp.tissue = t.id -- End of Tissue metadata Chunk
242
- LEFT JOIN container_content$raw AS cc_pool -- Chunk to add DNA fluidx id
243
- ON pool.id = cc_pool.entity_id
244
- LEFT JOIN container$raw AS c_pool
245
- ON cc_pool.container_id = c_pool.id
246
- LEFT JOIN tube$raw AS tube
247
- ON c_pool.id = tube.id -- End of DNA fluidx id Chunk
248
- LEFT JOIN container$raw AS con -- To add sanger uuid
249
- ON pbsubm_p.sanger_uuid ->> 0 = con.id
250
- LEFT JOIN plate$raw AS plt
251
- ON con.plate_id = plt.id
252
- LEFT JOIN project$raw AS proj
253
- ON subsam.project_id$ = proj.id
254
- LEFT JOIN folder$raw AS f
255
- ON subsam.folder_id$ = f.id
256
- WHERE subsam.pooled_sample IS NOT NULL
257
- AND proj.name = 'ToL Core Lab'
258
- AND f.name IN ('Routine Throughput', 'PacBio prep', 'Submissions', 'Core Lab Entities', 'Benchling MS Project Move', 'R&D Sample Processing Requests')
259
- AND pbsubm_p.archived$ = FALSE
260
- ),
261
-
262
212
  pacbio_submissions_container_legacy_deprecated AS (
263
213
 
264
214
  SELECT DISTINCT
@@ -272,6 +222,7 @@ pacbio_submissions_container_legacy_deprecated AS (
272
222
  c_dna.barcode AS fluidx_id,
273
223
  t.programme_id,
274
224
  t.specimen_id,
225
+ con.name AS tube_name,
275
226
  con.name AS sanger_sample_id,
276
227
  NULL::varchar AS plate_name,
277
228
  NULL::varchar AS pipeline,
@@ -335,10 +286,11 @@ pacbio_submissions_plate_automated_manifest AS (
335
286
  c_dna.barcode AS fluidx_id,
336
287
  t.programme_id,
337
288
  t.specimen_id,
289
+ con.name AS tube_name,
338
290
  con.name AS sanger_sample_id,
339
291
  plt.name AS plate_name,
340
292
  pbsubm_p.pipeline,
341
- NULL::varchar AS library_type,
293
+ pbsubm_p.library_type,
342
294
  pbsubm_p.retention_instructions,
343
295
  pbsubm_p.gb_yield_of_ccs_data_required,
344
296
  pbsubm_p.number_of_smrt_cells_required,
@@ -352,7 +304,7 @@ pacbio_submissions_plate_automated_manifest AS (
352
304
  pbsubm_p.include_5mc_cells_in_cpg_motifs,
353
305
  pbsubm_p.cc5_output_includes_kinetics_information,
354
306
  pbsubm_p.priority,
355
- pbsubm_p.created_at$ AS completion_date,
307
+ DATE(pbsubm_p.created_at$) AS completion_date,
356
308
  'pacbio'::varchar AS sequencing_platform,
357
309
  'v2'::varchar AS source
358
310
  FROM pacbio_submission_plate_output$raw AS pbsubm_p
@@ -388,7 +340,73 @@ pacbio_submissions_plate_automated_manifest AS (
388
340
  AND f.name IN ('Routine Throughput', 'PacBio prep', 'Submissions', 'Core Lab Entities', 'Benchling MS Project Move', 'R&D Sample Processing Requests')
389
341
  ),
390
342
 
343
+ pacbio_submissions_plate_automated_manifest_pooled AS (
344
+
345
+ SELECT DISTINCT
346
+ t.sts_id,
347
+ t.taxon_id,
348
+ tp.id AS tissue_prep_id,
349
+ subsam.id AS eln_submission_sample_id,
350
+ subsam.file_registry_id$ AS eln_file_registry_id,
351
+ subsam.pooled_sample AS extraction_id,
352
+ subsam.name$ AS submission_sample_name,
353
+ c_pool.barcode AS fluidx_id,
354
+ t.programme_id,
355
+ t.specimen_id,
356
+ con.name AS tube_name,
357
+ con.name AS sanger_sample_id,
358
+ plt.name AS plate_name,
359
+ pbsubm_p.pipeline,
360
+ pbsubm_p.library_type,
361
+ pbsubm_p.retention_instructions,
362
+ pbsubm_p.gb_yield_of_ccs_data_required,
363
+ pbsubm_p.number_of_smrt_cells_required,
364
+ pbsubm_p.sheared_femto_fragment_size_bp,
365
+ pbsubm_p.post_spri_concentration_ngul,
366
+ pbsubm_p.post_spri_volume_ul,
367
+ pbsubm_p.nanodrop_260280,
368
+ pbsubm_p.nanodrop_260230,
369
+ pbsubm_p.nanodrop_concentration_ngul,
370
+ pbsubm_p.sample_prep_additional_requirements,
371
+ pbsubm_p.include_5mc_cells_in_cpg_motifs,
372
+ pbsubm_p.cc5_output_includes_kinetics_information,
373
+ pbsubm_p.priority,
374
+ DATE(pbsubm_p.created_at$) AS completion_date,
375
+ 'pacbio'::varchar AS sequencing_platform,
376
+ 'v2_pooled'::varchar AS source
377
+ FROM pacbio_submission_plate_output$raw AS pbsubm_p
378
+ LEFT JOIN submission_samples$raw AS subsam
379
+ ON pbsubm_p.sample_name = subsam.id
380
+ LEFT JOIN pooled_samples$raw AS pool
381
+ ON subsam.pooled_sample = pool.id
382
+ LEFT JOIN dna_extract$raw AS dna -- Chunk to add Tissue metadata
383
+ ON pool.samples ->> 0 = dna.id
384
+ LEFT JOIN tissue_prep$raw AS tp
385
+ ON dna.tissue_prep = tp.id
386
+ LEFT JOIN tissue$raw AS t
387
+ ON tp.tissue = t.id -- End of Tissue metadata Chunk
388
+ LEFT JOIN container_content$raw AS cc_pool -- Chunk to add DNA fluidx id
389
+ ON pool.id = cc_pool.entity_id
390
+ LEFT JOIN container$raw AS c_pool
391
+ ON cc_pool.container_id = c_pool.id
392
+ LEFT JOIN tube$raw AS tube
393
+ ON c_pool.id = tube.id -- End of DNA fluidx id Chunk
394
+ LEFT JOIN container$raw AS con -- To add sanger uuid
395
+ ON pbsubm_p.sanger_uuid ->> 0 = con.id
396
+ LEFT JOIN plate$raw AS plt
397
+ ON con.plate_id = plt.id
398
+ LEFT JOIN project$raw AS proj
399
+ ON subsam.project_id$ = proj.id
400
+ LEFT JOIN folder$raw AS f
401
+ ON subsam.folder_id$ = f.id
402
+ WHERE subsam.pooled_sample IS NOT NULL
403
+ AND proj.name = 'ToL Core Lab'
404
+ AND f.name IN ('Routine Throughput', 'PacBio prep', 'Submissions', 'Core Lab Entities', 'Benchling MS Project Move', 'R&D Sample Processing Requests')
405
+ AND pbsubm_p.archived$ = FALSE
406
+ ),
407
+
391
408
  pacbio_submissions_plate_routine AS (
409
+
392
410
  SELECT
393
411
  t.sts_id,
394
412
  t.taxon_id,
@@ -397,10 +415,11 @@ pacbio_submissions_plate_routine AS (
397
415
  subsam.file_registry_id$ AS eln_file_registry_id,
398
416
  subsam.original_dna_extract AS extraction_id,
399
417
  subsam.name$ AS submission_sample_name,
400
- NULL::varchar AS fluidx_id,
418
+ c_dna.barcode AS fluidx_id,
401
419
  t.programme_id,
402
420
  t.specimen_id,
403
- CAST(pbsubm_p.sanger_sample_id ->>0 AS varchar) AS sanger_sample_id,
421
+ c_subsam.name AS tube_name,
422
+ ssid.sanger_sample_id AS sanger_sample_id,
404
423
  plate.name$ AS plate_name,
405
424
  NULL::varchar AS pipeline,
406
425
  pbsubm_p.sequencing_type AS library_type,
@@ -423,6 +442,10 @@ pacbio_submissions_plate_routine AS (
423
442
  FROM pacbio_sequencing_submission_plate_output$raw AS pbsubm_p
424
443
  LEFT JOIN submission_samples$raw AS subsam
425
444
  ON pbsubm_p.submission_sample = subsam.id
445
+ LEFT JOIN container_content$raw AS cc_subsam -- Chunk to connect SubSam to the well
446
+ ON subsam.id = cc_subsam.entity_id
447
+ LEFT JOIN container$raw AS c_subsam
448
+ ON cc_subsam.container_id = c_subsam.id -- End of connecting SubSam to well
426
449
  LEFT JOIN dna_extract$raw AS dna
427
450
  ON subsam.original_dna_extract = dna.id
428
451
  LEFT JOIN tissue_prep$raw AS tp
@@ -431,17 +454,100 @@ pacbio_submissions_plate_routine AS (
431
454
  ON tp.tissue = t.id
432
455
  LEFT JOIN container$raw AS con
433
456
  ON pbsubm_p.plate_well_id ->>0 = con.id
457
+ LEFT JOIN container_content$raw AS cc_dna -- Chunk to add DNA fluidx id
458
+ ON dna.id = cc_dna.entity_id
459
+ LEFT JOIN container$raw AS c_dna
460
+ ON cc_dna.container_id = c_dna.id
461
+ LEFT JOIN tube$raw AS tube
462
+ ON c_dna.id = tube.id -- End of DNA fluidx id Chunk
434
463
  LEFT JOIN "_96w_pacbio_plate$raw" AS plate
435
464
  ON con.plate_id = plate.id
465
+ LEFT JOIN sanger_sample_id$raw AS ssid
466
+ ON con.id = ssid.sample_tube
467
+ LEFT JOIN project$raw AS proj
468
+ ON subsam.project_id$ = proj.id
469
+ LEFT JOIN folder$raw AS f
470
+ ON subsam.folder_id$ = f.id
471
+ WHERE pbsubm_p.archived$ = FALSE -- Excluding archived submissions
472
+ AND tube.type IS NULL -- Selecting non-Voucher containers
473
+ AND (c_dna.archive_purpose$ != ('Made in error') OR c_dna.archive_purpose$ IS NULL) -- Excluding containers made by mistake
474
+ AND c_dna.barcode LIKE 'F%' -- Selecting only valid FluidX IDs
475
+ AND proj.name = 'ToL Core Lab' -- Selecting ToL Core Lab submissions only
476
+ AND f.name IN ('Routine Throughput', 'PacBio prep', 'Submissions', 'Core Lab Entities', 'Benchling MS Project Move')
477
+ ),
478
+
479
+ pacbio_submissions_plate_routine_pooled AS (
480
+ SELECT
481
+ t.sts_id,
482
+ t.taxon_id,
483
+ tp.id AS tissue_prep_id,
484
+ subsam.id AS submission_sample_id,
485
+ subsam.file_registry_id$ AS eln_file_registry_id,
486
+ subsam.pooled_sample AS extraction_id,
487
+ subsam.name$ AS submission_sample_name,
488
+ c_pool.barcode AS fluidx_id,
489
+ t.programme_id,
490
+ t.specimen_id,
491
+ c_subsam.name AS tube_name,
492
+ ssid.sanger_sample_id AS sanger_sample_id,
493
+ plate.name$ AS plate_name,
494
+ NULL::varchar AS pipeline,
495
+ pbsubm_p.sequencing_type AS library_type,
496
+ NULL::varchar AS retention_instructions,
497
+ NULL::float8 AS gb_yield_of_ccs_data_required,
498
+ pbsubm_p.number_of_smrt_cells_required,
499
+ NULL::float8 AS sheared_femto_fragment_size_bp,
500
+ NULL::float8 AS post_spri_concentration_ngul,
501
+ NULL::JSONB AS post_spri_volume_ul,
502
+ NULL::float8 AS nanodrop_260280,
503
+ NULL::float8 AS nanodrop_260230,
504
+ NULL::float8 AS nanodrop_concentration_ngul,
505
+ NULL::varchar AS sample_prep_additional_requirements,
506
+ NULL::varchar AS include_5mc_cells_in_cpg_motifs,
507
+ NULL::varchar AS cc5_output_includes_kinetics_information,
508
+ NULL::varchar AS priority,
509
+ pbsubm_p.created_at$ AS completion_date,
510
+ 'pacbio'::varchar AS sequencing_platform,
511
+ 'v2'::varchar AS SOURCE
512
+ FROM pacbio_sequencing_submission_plate_output$raw AS pbsubm_p
513
+ LEFT JOIN submission_samples$raw AS subsam
514
+ ON pbsubm_p.submission_sample = subsam.id
515
+ LEFT JOIN container_content$raw AS cc_subsam -- Connect SubSam to the well
516
+ ON subsam.id = cc_subsam.entity_id
517
+ LEFT JOIN container$raw AS c_subsam
518
+ ON cc_subsam.container_id = c_subsam.id -- End of chunk to connect subsam to the well
519
+ LEFT JOIN container$raw AS con -- Chunk to get plate ID
520
+ ON pbsubm_p.plate_well_id ->>0 = con.id
521
+ LEFT JOIN "_96w_pacbio_plate$raw" AS plate
522
+ ON con.plate_id = plate.id -- End of chunk to get the plate ID
523
+ LEFT JOIN sanger_sample_id$raw AS ssid
524
+ ON con.id = ssid.sample_tube
525
+ LEFT JOIN pooled_samples$raw AS pool
526
+ ON subsam.pooled_sample = pool.id
527
+ LEFT JOIN container_content$raw AS cc_pool -- Chunk to connect pooled sample to the FluidX tube
528
+ ON pool.id = cc_pool.entity_id
529
+ LEFT JOIN container$raw AS c_pool
530
+ ON cc_pool.container_id = c_pool.id -- End of chunk to connect pooled sample to the FluidX tube
531
+ LEFT JOIN dna_extract$raw AS dna -- Chunk to add Tissue metadata
532
+ ON pool.samples ->> 0 = dna.id
533
+ LEFT JOIN tissue_prep$raw AS tp
534
+ ON dna.tissue_prep = tp.id
535
+ LEFT JOIN tissue$raw AS t
536
+ ON tp.tissue = t.id -- End of Tissue metadata Chunk
537
+ LEFT JOIN project$raw AS proj
538
+ ON subsam.project_id$ = proj.id
539
+ LEFT JOIN folder$raw AS f
540
+ ON subsam.folder_id$ = f.id
541
+ WHERE subsam.pooled_sample IS NOT NULL
542
+ AND pbsubm_p.archived$ = FALSE
543
+ AND proj.name = 'ToL Core Lab' -- Selecting ToL Core Lab submissions only
544
+ AND f.name IN ('Routine Throughput', 'PacBio prep', 'Submissions', 'Core Lab Entities', 'Benchling MS Project Move')
436
545
  )
437
546
 
438
547
  SELECT *
439
548
  FROM pacbio_submissions_container_routine
440
549
  UNION
441
550
  SELECT *
442
- FROM pacbio_submissions_container_pooled_deprecated
443
- UNION
444
- SELECT *
445
551
  FROM pacbio_submissions_container_pooled
446
552
  UNION
447
553
  SELECT *
@@ -449,7 +555,13 @@ FROM pacbio_submissions_container_legacy_deprecated
449
555
  UNION
450
556
  SELECT *
451
557
  FROM pacbio_submissions_plate_automated_manifest
558
+ UNION
559
+ SELECT *
560
+ FROM pacbio_submissions_plate_automated_manifest_pooled
452
561
  UNION
453
562
  SELECT *
454
563
  FROM pacbio_submissions_plate_routine
564
+ UNION
565
+ SELECT *
566
+ FROM pacbio_submissions_plate_routine_pooled
455
567
  ORDER BY source DESC
@@ -462,7 +462,7 @@ class MlwhDataSource(DataSource, DetailGetter, ListGetter):
462
462
  return "','".join([str(s) for s in values])
463
463
 
464
464
  def _conditions_string(self, platform_type: str, in_list: Dict):
465
- if in_list is None:
465
+ if not in_list:
466
466
  return '1=1' # Something to go with the where clause
467
467
  sql_conditions = []
468
468
  if platform_type.lower() == 'illumina':
@@ -484,7 +484,7 @@ class MlwhDataSource(DataSource, DetailGetter, ListGetter):
484
484
  def _execute_query(self, query, object_type):
485
485
  cur_mlwh = self.mlwh.cursor(dictionary=True)
486
486
  cur_mlwh.execute(query)
487
- for row in cur_mlwh.fetchall():
487
+ for row in cur_mlwh:
488
488
  yield self._format_mlwh_row(object_type, row)
489
489
 
490
490
  def __get_in_lists(self, f: DataSourceFilter):
@@ -4,4 +4,6 @@
4
4
 
5
5
  from .allowed_values import AllowedValues, AllowedValuesValidator # noqa
6
6
  from .allowed_keys import AllowedKeysValidator # noqa
7
+ from .regex import Regex, RegexValidator # noqa
8
+ from .regex_by_value import RegexByValueValidator # noqa
7
9
  from .unique_values import UniqueValuesValidator # noqa
@@ -0,0 +1,109 @@
1
+ # SPDX-FileCopyrightText: 2025 Genome Research Ltd.
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ import re
6
+ from dataclasses import dataclass
7
+ from typing import Any
8
+
9
+ from tol.core import DataObject
10
+ from tol.core.validate import Validator
11
+
12
+
13
+ @dataclass(frozen=True, kw_only=True)
14
+ class Regex:
15
+ key: str
16
+ regex: str
17
+
18
+ is_error: bool = True
19
+ detail: str = 'Value is not allowed for given key'
20
+
21
+ def is_allowed(self, __v: Any) -> bool:
22
+ # Check regex
23
+ return re.search(self.regex, str(__v or ''))
24
+
25
+
26
+ RegexDict = dict[
27
+ str,
28
+ str | bool | list[Any],
29
+ ]
30
+ """Can also specify `Regex` as a `dict`"""
31
+
32
+
33
+ class RegexValidator(Validator):
34
+ """
35
+ Validates an incoming stream of `DataObject` instances
36
+ according to the specified allowed values for a given
37
+ key.
38
+ """
39
+
40
+ def __init__(
41
+ self,
42
+ config: list[Regex | RegexDict]
43
+ ) -> None:
44
+
45
+ super().__init__()
46
+
47
+ self.__config = self.__get_config(config)
48
+
49
+ def _validate_data_object(
50
+ self,
51
+ obj: DataObject
52
+ ) -> None:
53
+
54
+ for k, v in obj.attributes.items():
55
+ self.__validate_attribute(obj, k, v)
56
+
57
+ def __get_config(
58
+ self,
59
+ config: list[Regex | RegexDict],
60
+ ) -> list[Regex]:
61
+
62
+ # Ensure config is in Regex format
63
+ # (as you can either pass in a list of Regex or a RegexDict,
64
+ # which can be used to initialize a Regex)
65
+ return [
66
+ c if isinstance(c, Regex) else Regex(**c)
67
+ for c in config
68
+ ]
69
+
70
+ def __validate_attribute(
71
+ self,
72
+ obj: DataObject,
73
+ key: str,
74
+ value: Any,
75
+ ) -> None:
76
+
77
+ config = self.__filter_config(key)
78
+
79
+ for c in config:
80
+ if not c.is_allowed(value):
81
+ self.__add_result(obj, c)
82
+
83
+ def __filter_config(
84
+ self,
85
+ key: str,
86
+ ) -> list[Regex]:
87
+ return [
88
+ a for a in self.__config
89
+ if a.key == key
90
+ ]
91
+
92
+ def __add_result(
93
+ self,
94
+ obj: DataObject,
95
+ c: Regex,
96
+ ) -> None:
97
+
98
+ if c.is_error:
99
+ self.add_error(
100
+ object_id=obj.id,
101
+ detail=c.detail,
102
+ field=c.key
103
+ )
104
+ else:
105
+ self.add_warning(
106
+ object_id=obj.id,
107
+ detail=c.detail,
108
+ field=c.key,
109
+ )
@@ -0,0 +1,99 @@
1
+ # SPDX-FileCopyrightText: 2025 Genome Research Ltd.
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ from typing import Any
6
+
7
+ from tol.core import DataObject
8
+ from tol.core.validate import Validator
9
+
10
+ from .regex import Regex
11
+
12
+ RegexDict = dict[
13
+ str,
14
+ str | bool | list[Any],
15
+ ]
16
+ Config = dict[str, str | dict[str, list[Regex | RegexDict]]]
17
+
18
+ """Can also specify `Regex` as a `dict`"""
19
+
20
+
21
+ class RegexByValueValidator(Validator):
22
+ """
23
+ Validates an incoming stream of `DataObject` instances
24
+ according to the specified allowed values for a given
25
+ key.
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ config: dict[str, str | list[str]]
31
+ ) -> None:
32
+
33
+ super().__init__()
34
+
35
+ self.__config = self.__get_config(config)
36
+
37
+ def __get_config(
38
+ self,
39
+ config: Config,
40
+ ) -> Config:
41
+
42
+ return {
43
+ 'key_column': config['key_column'],
44
+ 'regexes': {
45
+ k: [
46
+ # Ensure they're all in Regex format
47
+ # (as you can either pass in a list of Regex or a RegexDict,
48
+ # which can be used to initialize a Regex)
49
+ c if isinstance(c, Regex) else Regex(**c)
50
+ for c in v
51
+ ]
52
+ for k, v in config['regexes'].items()
53
+ }
54
+ }
55
+
56
+ def _validate_data_object(
57
+ self,
58
+ obj: DataObject
59
+ ) -> None:
60
+ # Pull out value of the 'key_column' attribute
61
+ key_column_value = obj.attributes.get(self.__config['key_column'])
62
+ if not key_column_value:
63
+ return
64
+
65
+ # Pull out relevant regex list based on this value: {[{'name': 'regex'}]}
66
+ regex_list = self.__config['regexes'].get(key_column_value)
67
+ if not regex_list:
68
+ return
69
+ self.__validate_attribute(obj, regex_list)
70
+
71
+ def __validate_attribute(
72
+ self,
73
+ obj: DataObject,
74
+ regexes: list[Regex],
75
+ ) -> None:
76
+ for r in regexes:
77
+ attribute_name = r.key
78
+ value = obj.attributes.get(attribute_name)
79
+ if not r.is_allowed(value):
80
+ self.__add_result(obj, r)
81
+
82
+ def __add_result(
83
+ self,
84
+ obj: DataObject,
85
+ c: Regex,
86
+ ) -> None:
87
+
88
+ if c.is_error:
89
+ self.add_error(
90
+ object_id=obj.id,
91
+ detail=c.detail,
92
+ field=c.key
93
+ )
94
+ else:
95
+ self.add_warning(
96
+ object_id=obj.id,
97
+ detail=c.detail,
98
+ field=c.key,
99
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tol-sdk
3
- Version: 1.7.0
3
+ Version: 1.7.2
4
4
  Summary: SDK for interaction with ToL, Sanger and external services
5
5
  Author-email: ToL Platforms Team <tol-platforms@sanger.ac.uk>
6
6
  License: MIT
@@ -37,7 +37,7 @@ Requires-Dist: atlassian-python-api==3.41.14; extra == "jira"
37
37
  Provides-Extra: json
38
38
  Requires-Dist: minio==7.2.15; extra == "json"
39
39
  Provides-Extra: mysql
40
- Requires-Dist: mysql-connector-python; extra == "mysql"
40
+ Requires-Dist: mysql-connector-python==9.5.0; extra == "mysql"
41
41
  Provides-Extra: postgresql
42
42
  Requires-Dist: SQLAlchemy==2.0.35; extra == "postgresql"
43
43
  Requires-Dist: psycopg2-binary==2.9.9; extra == "postgresql"
@@ -31,7 +31,7 @@ tol/api_base/misc/relation_url.py,sha256=qfo-okp8Gv9-PEDghMfGZ2pHdYbHRhohvA9v3Go
31
31
  tol/api_base/misc/stats_parameters.py,sha256=IVpHqUeGQyjuih59jwqT-fIQMCBeESi2T9b4r9i4J28,1721
32
32
  tol/api_client/__init__.py,sha256=58SAywuMrIUCBAY9us_d_RLTMnaUTYWWts0LRQC5wLo,187
33
33
  tol/api_client/api_datasource.py,sha256=GOHvAmFzrHdux2wxY-MwUbp6eWbbS01L7FvmVyXJVZM,14330
34
- tol/api_client/client.py,sha256=hy1MT1iDCDDOOZfhsD5-8vA7ksf56BIoe96UCiqQ06Y,13965
34
+ tol/api_client/client.py,sha256=gcnX4iCZtjnCC6qylizXxLe3l6xLhME6LEJH0UeW7V4,13979
35
35
  tol/api_client/converter.py,sha256=X6VPk4nrvmXF8EOXy36sn1nvPvYTBYKZ66ofxyQbaY8,4681
36
36
  tol/api_client/exception.py,sha256=MkvJaIyRVCzQ2rKOYnCOcT747mpOeQwGJJl3Kkb1BsQ,3999
37
37
  tol/api_client/factory.py,sha256=DIYFmFsQlwOCrUfexiEMBN3ovnreMqUFYNU8hcNvSao,3405
@@ -59,7 +59,7 @@ tol/benchling/sql/results_pacbio_prep.sql,sha256=a1tGu9irtsyPlCA0_FxBrqYj_uFiYPM
59
59
  tol/benchling/sql/results_pacbio_prep_pooled.sql,sha256=WZfMZbOeOfD55iDQQEwPNc7mF0gSJfMj94A1m9whLtw,6291
60
60
  tol/benchling/sql/sample.sql,sha256=ZFRXWabV9jjivAECCJz-zi05a5oSzTUQSbT2ssy4sGU,4174
61
61
  tol/benchling/sql/sequencing_request_sequencing_platform_hic.sql,sha256=W5VCnWvR16CJHnljzqdcQKJ8GWoci9QQhYVA2SbNyKk,3044
62
- tol/benchling/sql/sequencing_request_sequencing_platform_pacbio.sql,sha256=I-qx-nZuGRybojEVqIfCpIsyT58nzTUA-3pIcUMQeLo,18373
62
+ tol/benchling/sql/sequencing_request_sequencing_platform_pacbio.sql,sha256=ecJvV_qvZwnWKvr5tS0kZVHySNTEgU0irbklsE6tUfQ,22958
63
63
  tol/benchling/sql/sequencing_request_sequencing_platform_rnaseq.sql,sha256=zZ3d_VLXMHAp3n6agX-Y-Oj6HIzk32IDq5UssJuYPMs,3420
64
64
  tol/benchling/sql/sequencing_request_sequencing_platform_wgs.sql,sha256=RVSR2y_ZYT0j-NDYYAzGSkLwofXy6A_9AfI0RFuztbQ,7062
65
65
  tol/benchling/sql/tissue_prep.sql,sha256=8JAOUaXDc0nu0qJeIYLdTXY5APklSighDoESHtzZ8vw,6141
@@ -230,7 +230,7 @@ tol/labwhere/factory.py,sha256=33ljl5jLZ8bMTXLZauVyKQNxPX5UHgtMyb-NI_9Vemg,2327
230
230
  tol/labwhere/labwhere_datasource.py,sha256=z8h1781yM_zJQXXHEXrGzbSnQmIHEZ3v7gMv67xhpvI,4079
231
231
  tol/labwhere/parser.py,sha256=7C5ZHMqk0gDOUoPM-5KLeQa79pr1Hx69kgzHDY2gc-M,2815
232
232
  tol/mlwh/__init__.py,sha256=fLh6NTRmDi63IpXfUCs9NOc_hLVAkGkoRozjGh36GBU,125
233
- tol/mlwh/mlwh_datasource.py,sha256=IKw0-lRMhGycWnSfWOvWRXkVAS3Ab7qE0-NXDFR_Ue8,25072
233
+ tol/mlwh/mlwh_datasource.py,sha256=TTnPEm1-vGc1qRYAJVN4X2skJcAIowGkd5wzMKIAyus,25057
234
234
  tol/prefect/__init__.py,sha256=VhGEUNR-0Fi5SmLPZzxJt7GYbaIjCFK6GGe786ezNj8,199
235
235
  tol/prefect/converter.py,sha256=YCWgb01QtRPoAgI6C6Gav1Ti69k_TfIXgfMEsrXQLOA,4321
236
236
  tol/prefect/factory.py,sha256=mO4KVnaEYMv-ojGJuiencNQMq6PAMU8cIc4QN5Kq8Gw,2208
@@ -319,13 +319,15 @@ tol/treeval/treeval_datasource.py,sha256=GzY6JwH67b5QdV-UVdCFJfgGAIuZ96J2nl53YxZ
319
319
  tol/utils/__init__.py,sha256=764-Na1OaNGUDWpMIu51ZtXG7n_nB5MccUFK6LmkWRI,138
320
320
  tol/utils/csv.py,sha256=mihww25fSn72c4h-RFeqD_pFIG6KHZP4v1_C0rx81ws,421
321
321
  tol/utils/s3.py,sha256=aoYCwJ-qcMqFrpxmViFqPa0O1jgp0phtztO3-0CSNjw,491
322
- tol/validators/__init__.py,sha256=XXwCt8JPQ5-w2kN1bVjJPLXbS9F4s1nJUnY9jaKdmVk,272
322
+ tol/validators/__init__.py,sha256=bIMjfuRd358nUPLp6fMG9nTs43gM9aA9oY_AINgxkWU,379
323
323
  tol/validators/allowed_keys.py,sha256=BJMomJtaQdxsdGsueDtLewv75TlwdIXiQipLGFcJ7_c,1331
324
324
  tol/validators/allowed_values.py,sha256=yJ5SdiUlV7PSKORtsBJ9hYSqwvlx_esbFmFL_Gxh-p4,2262
325
+ tol/validators/regex.py,sha256=dKodGH0sv6DbqWeV6QXE6-GYjnG4rMO0rg8IEIaQG60,2364
326
+ tol/validators/regex_by_value.py,sha256=o99NJlWPgQ0GrpVnep8-cHfjWnc9F2rChmXHIxjrMrk,2543
325
327
  tol/validators/unique_values.py,sha256=stI-1i006WEbERcjSMapRggJkEF-RFDzw2uUtXBAE_M,1885
326
- tol_sdk-1.7.0.dist-info/licenses/LICENSE,sha256=RF9Jacy-9BpUAQQ20INhTgtaNBkmdTolYCHtrrkM2-8,1077
327
- tol_sdk-1.7.0.dist-info/METADATA,sha256=I8e_3R5_nYW6u3e3CFZgUT-H1Cyo3BwlmIUa6cLlUEI,3072
328
- tol_sdk-1.7.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
329
- tol_sdk-1.7.0.dist-info/entry_points.txt,sha256=jH3HfTwxjzog7E3lq8CKpUWGIRY9FSXbyL6CpUmv6D0,36
330
- tol_sdk-1.7.0.dist-info/top_level.txt,sha256=PwKMQLphyZNvagBoriVbl8uwHXQl8IC1niawVG0iXMM,10
331
- tol_sdk-1.7.0.dist-info/RECORD,,
328
+ tol_sdk-1.7.2.dist-info/licenses/LICENSE,sha256=RF9Jacy-9BpUAQQ20INhTgtaNBkmdTolYCHtrrkM2-8,1077
329
+ tol_sdk-1.7.2.dist-info/METADATA,sha256=FOII5eZYn_0x2XpUiv_dTnTUBz7_ZArof7jrD5NZHms,3079
330
+ tol_sdk-1.7.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
331
+ tol_sdk-1.7.2.dist-info/entry_points.txt,sha256=jH3HfTwxjzog7E3lq8CKpUWGIRY9FSXbyL6CpUmv6D0,36
332
+ tol_sdk-1.7.2.dist-info/top_level.txt,sha256=PwKMQLphyZNvagBoriVbl8uwHXQl8IC1niawVG0iXMM,10
333
+ tol_sdk-1.7.2.dist-info/RECORD,,