tol-sdk 1.7.5b4__py3-none-any.whl → 1.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,173 @@
1
+ /*
2
+ ## SQL Query: DNA Extraction Containers (Benchling Warehouse)
3
+
4
+ This SQL query retrieves detailed information about DNA extraction containers managed by the ToL Core Laboratory, including metadata, container details, and the latest QC measurements.
5
+
6
+ The resulting table includes identifiers for tissues, tissue preps, extractions, containers, and locations, as well as the most recent QC results (Nanodrop, Qubit, Femto, Yield, and Decision Making).
7
+
8
+ Output: Table with columns:
9
+
10
+ 1) taxon_id: [character] Tissue metadata. Origin: STS
11
+ 2) eln_tissue_id: [character] Benchling ID for the tissue the extraction is derived from.
12
+ 3) eln_tissue_prep_id: [character] Benchling ID for the tissue prep the extraction is derived from.
13
+ 4) extraction_id: [character] DNA extraction entity ID (Benchling).
14
+ 5) programme_id: [character] ToLID. Origin: BWH.
15
+ 6) specimen_id: [character] Specimen ID. Origin: STS.
16
+ 7) creation_date: [date] Date the container was created.
17
+ 8) fluidx_container_id: [character] Primary key for the FluidX container.
18
+ 9) fluidx_id: [character] FluidX barcode.
19
+ 10) tube_type: [character] Type of tube/container.
20
+ 11) volume_ul: [numeric] Volume in microliters (0 if archived as 'Retired' or 'Expended').
21
+ 12) location: [character] Storage location name.
22
+ 13) rack: [character] Box/rack barcode.
23
+ 14) archive_purpose: [character] Reason for archiving the DNA extraction.
24
+ 15) nanodrop_concentration_ngul: [numeric] Latest Nanodrop concentration (ng/µL).
25
+ 16) dna_260_280_ratio: [numeric] Latest Nanodrop 260/280 ratio.
26
+ 17) dna_260_230_ratio: [numeric] Latest Nanodrop 260/230 ratio.
27
+ 18) qubit_concentration_ngul: [numeric] Latest Qubit concentration (ng/µL).
28
+ 19) yield_ng: [numeric] Latest yield (ng).
29
+ 20) femto_date_code: [character] Latest Femto date code.
30
+ 21) femto_description: [character] Latest Femto profile description.
31
+ 22) gqn_index: [numeric] Latest GQN index from Femto.
32
+ 23) next_step: [character] Latest decision making next step.
33
+ 24) extraction_qc_result: [character] Latest extraction QC result.
34
+
35
+ NOTES:
36
+ 1) Only extractions from the 'ToL Core Lab' project and relevant folders are included.
37
+ 2) Containers archived as 'Made in error' or with names matching '%Nuclei isolation and tagmentation%' are excluded.
38
+ 3) Latest QC results are joined from their respective measurement tables.
39
+ 4) Volume is set to 0 for archived/expended extractions.
40
+ 5) Data types are preserved as in the Benchling Warehouse.
41
+
42
+ */
43
+
44
+ WITH latest_nanodrop_conc AS (
45
+ SELECT
46
+ nanod.sample_id,
47
+ nanod.nanodrop_concentration_ngul,
48
+ nanod._260_280_ratio AS "dna_260_280_ratio",
49
+ nanod._260_230_ratio AS "dna_260_230_ratio"
50
+ FROM nanodrop_measurements_v2$raw AS nanod
51
+ WHERE nanod.created_at$ = (
52
+ SELECT MAX(sub.created_at$)
53
+ FROM nanodrop_measurements_v2$raw AS sub
54
+ WHERE sub.sample_id = nanod.sample_id
55
+ )
56
+ ),
57
+
58
+ latest_qubit_conc AS (
59
+ SELECT
60
+ qbit.sample_id,
61
+ qbit.qubit_concentration_ngul
62
+ FROM qubit_measurements_v2$raw as qbit
63
+ WHERE qbit.created_at$ = (
64
+ SELECT MAX(sub.created_at$)
65
+ FROM qubit_measurements_v2$raw AS sub
66
+ WHERE sub.sample_id = qbit.sample_id
67
+ )
68
+ ),
69
+
70
+ latest_yield AS (
71
+ SELECT
72
+ dnay.sample_id,
73
+ dnay.yield
74
+ FROM yield_v2$raw as dnay
75
+ WHERE dnay.created_at$ = (
76
+ SELECT MAX(sub.created_at$)
77
+ FROM yield_v2$raw AS sub
78
+ WHERE sub.sample_id = dnay.sample_id
79
+ )
80
+ ),
81
+
82
+ latest_femto AS (
83
+ SELECT
84
+ femto.sample_id,
85
+ femto.femto_date_code,
86
+ femto.femto_profile_description AS femto_description,
87
+ femto.gqn_dnaex
88
+ FROM femto_dna_extract_v2$raw AS femto
89
+ WHERE femto.created_at$ = (
90
+ SELECT MAX(sub.created_at$)
91
+ FROM femto_dna_extract_v2$raw as sub
92
+ WHERE sub.sample_id = femto.sample_id
93
+ )
94
+ ),
95
+
96
+ latest_decision_making AS (
97
+ SELECT
98
+ dnad.sample_id,
99
+ dnad.next_step,
100
+ qc_passfail AS extraction_qc_result
101
+ FROM dna_decision_making_v2$raw AS dnad
102
+ WHERE dnad.created_at$ = (
103
+ SELECT MAX(sub.created_at$)
104
+ FROM dna_decision_making_v2$raw AS sub
105
+ WHERE sub.sample_id = dnad.sample_id
106
+ )
107
+ )
108
+
109
+ SELECT DISTINCT
110
+ t.taxon_id,
111
+ t.id AS eln_tissue_id,
112
+ tp.id AS eln_tissue_prep_id,
113
+ dna.id AS extraction_id,
114
+ t.programme_id,
115
+ t.specimen_id,
116
+ DATE(con.created_at) AS creation_date,
117
+ con.id AS fluidx_container_id, -- primary key
118
+ con.barcode AS fluidx_id,
119
+ tube.type AS tube_type,
120
+ CASE
121
+ WHEN con.archive_purpose$ IN ('Retired', 'Expended') THEN 0 -- Retired or expended DNA extractions have a weight of 0
122
+ ELSE con.volume_si * 1000000
123
+ END AS volume_ul,
124
+ loc.name AS location,
125
+ box.barcode AS rack,
126
+ con.archive_purpose$ AS archive_purpose,
127
+ latest_nanodrop_conc.nanodrop_concentration_ngul,
128
+ latest_nanodrop_conc.dna_260_280_ratio,
129
+ latest_nanodrop_conc.dna_260_230_ratio,
130
+ latest_qubit_conc.qubit_concentration_ngul,
131
+ latest_yield.yield AS yield_ng,
132
+ latest_femto.femto_date_code,
133
+ latest_femto.femto_description,
134
+ latest_femto.gqn_dnaex AS gqn_index,
135
+ latest_decision_making.next_step,
136
+ latest_decision_making.extraction_qc_result
137
+ FROM dna_extract$raw AS dna
138
+ INNER JOIN container_content$raw AS cc -- Start of container/tube join
139
+ ON cc.entity_id = dna.id
140
+ LEFT JOIN container$raw AS con
141
+ ON con.id = cc.container_id
142
+ LEFT JOIN tube$raw AS tube
143
+ ON cc.container_id = tube.id -- End of container/tube join
144
+ LEFT JOIN box$raw AS box -- Location chunk
145
+ ON con.box_id = box.id
146
+ LEFT JOIN location$raw AS loc
147
+ ON loc.id = box.location_id -- End of location chunk
148
+ LEFT JOIN tissue_prep$raw AS tp
149
+ ON tp.id = dna.tissue_prep
150
+ LEFT JOIN tissue$raw AS t
151
+ ON t.id = tp.tissue
152
+ LEFT JOIN latest_nanodrop_conc -- Results chunk
153
+ ON dna.id = latest_nanodrop_conc.sample_id
154
+ LEFT JOIN latest_qubit_conc
155
+ ON dna.id = latest_qubit_conc.sample_id
156
+ LEFT JOIN latest_yield
157
+ ON dna.id = latest_yield.sample_id
158
+ LEFT JOIN latest_femto
159
+ ON dna.id = latest_femto.sample_id
160
+ LEFT JOIN latest_decision_making
161
+ ON dna.id = latest_decision_making.sample_id -- End Results chunk
162
+ LEFT JOIN folder$raw AS f
163
+ ON dna.folder_id$ = f.id
164
+ LEFT JOIN project$raw AS proj
165
+ ON dna.project_id$ = proj.id
166
+ LEFT JOIN registration_origin$raw AS reg
167
+ ON reg.entity_id = dna.id
168
+ LEFT JOIN entry$raw AS ent
169
+ ON reg.origin_entry_id = ent.id
170
+ WHERE proj.name = 'ToL Core Lab'
171
+ AND (f.name IN ('Routine Throughput', 'DNA', 'Core Lab Entities', 'Benchling MS Project Move') OR f.name IS NULL)
172
+ AND (con.archive_purpose$ != ('Made in error') OR con.archive_purpose$ IS NULL)
173
+ AND ent.name NOT LIKE '%Nuclei isolation and tagmentation%'
@@ -11,34 +11,20 @@ Output: Table with cols:
11
11
 
12
12
  1) sts_id: [integer] Tissue metadata. Origin: STS
13
13
  2) taxon_id: [character] Tissue metadata. Origin: STS
14
- 3) eln_tissue_id: [character] Benchling id for the tissue the extractions is derived from.
15
- 4) eln_tissue_prep_id: [character] Benchling id for the tissue prep the extractions is derived from.
14
+ 3) eln_tissue_id: [character] Benchling id for the tissue the extraction is derived from.
15
+ 4) eln_tissue_prep_id: [character] Benchling id for the tissue prep the extraction is derived from.
16
16
  5) eln_file_registry_id: [character] id in Benchling Registry.
17
17
  6) extraction_id: [character] Primary key.
18
18
  7) programme_id: [character] ToLID. Origin: BWH
19
19
  8) specimen_id: [character] Specimen ID. Origin: STS
20
20
  9) completion_date: [date] Extraction date. This field coalesces created_at$ and created_on fields. Created_on is for bnt legacy data.
21
- 10) extraction_name: [character] Entity name.
22
- 11) fluidx_id: [character] Fluidx ID.
23
- 12) volume_ul: [double] volume of DNA available in the fluidx tube.
24
- 13) location: [character] Physical locationo of the DNA extraction. Freezer shelf.
25
- 14) rack: [character] Physical locationo of the DNA extraction. Rack barcode.
26
- 15) bnt_id: [character] Batches and Tracking legacy id.
27
- 16) manual_vs_automatic: [character].
28
- 17) extraction_protocol: [character] DNA extraction protocol as recorded at the time of extraction
29
- 18) tube_type: [character] Type of tube. Marked NULL or voucher.
30
- 19) extraction_type: [character] dna.
31
- 20) name: [character] Folder name.
32
- 21) archive_purpose: [character] Reason for archiving the DNA extraction.
33
- 22) nanodrop_concentration_ngul: [double] Concentration of DNA as measured by Nanodrop.
34
- 23) dna_260_280_ratio: [double] Ratio of absorbance at 260:280nm as measured by spectrophotometer.
35
- 24) dna_260_230_ratio: [double] Ratio of absorbance at 260:230nm as measured by spectrophotometer.
36
- 25) qubit_concentration_ngul: [double] Concentration of DNA as measured by Qubit.
37
- 26) yield_ng: [double] DNA yield after extraction.
38
- 27) femto_date_code: [character] Femto date code.
39
- 28) femto_description:[character] Categorical description of the femto pulse profile.
40
- 29) gqn_index: [character] Genomic Quality Number (GQN) index, calculated by the Femto software.
41
- 30) extraction_qc_result: [character] QC result: Yes = Extraction passed; No = Extraction failed.
21
+ 10) extraction_name: [character] Entity name.
22
+ 11) bnt_id: [character] Batches and Tracking legacy id.
23
+ 12) manual_vs_automatic: [character] Extraction method indicator.
24
+ 13) extraction_protocol: [character] DNA extraction protocol as recorded at the time of extraction.
25
+ 14) extraction_type: [character] Type of extraction, set to 'dna'.
26
+ 15) folder_name: [character] Folder name.
27
+ 16) archive_purpose: [character] Reason for archiving the DNA extraction.
42
28
 
43
29
  NOTES:
44
30
  1) Data types were casted explicitly to conserved the data type stored in BWH.
@@ -46,71 +32,6 @@ NOTES:
46
32
 
47
33
  */
48
34
 
49
- WITH latest_nanodrop_conc AS (
50
- SELECT
51
- nanod.sample_id,
52
- nanod.nanodrop_concentration_ngul,
53
- nanod._260_280_ratio AS "dna_260_280_ratio",
54
- nanod._260_230_ratio AS "dna_260_230_ratio"
55
- FROM nanodrop_measurements_v2$raw AS nanod
56
- WHERE nanod.created_at$ = (
57
- SELECT MAX(sub.created_at$)
58
- FROM nanodrop_measurements_v2$raw AS sub
59
- WHERE sub.sample_id = nanod.sample_id
60
- )
61
- ),
62
-
63
- latest_qubit_conc AS (
64
- SELECT
65
- qbit.sample_id,
66
- qbit.qubit_concentration_ngul
67
- FROM qubit_measurements_v2$raw as qbit
68
- WHERE qbit.created_at$ = (
69
- SELECT MAX(sub.created_at$)
70
- FROM qubit_measurements_v2$raw AS sub
71
- WHERE sub.sample_id = qbit.sample_id
72
- )
73
- ),
74
-
75
- latest_yield AS (
76
- SELECT
77
- dnay.sample_id,
78
- dnay.yield
79
- FROM yield_v2$raw as dnay
80
- WHERE dnay.created_at$ = (
81
- SELECT MAX(sub.created_at$)
82
- FROM yield_v2$raw AS sub
83
- WHERE sub.sample_id = dnay.sample_id
84
- )
85
- ),
86
-
87
- latest_femto AS (
88
- SELECT
89
- femto.sample_id,
90
- femto.femto_date_code,
91
- femto.femto_profile_description AS femto_description,
92
- femto.gqn_dnaex
93
- FROM femto_dna_extract_v2$raw AS femto
94
- WHERE femto.created_at$ = (
95
- SELECT MAX(sub.created_at$)
96
- FROM femto_dna_extract_v2$raw as sub
97
- WHERE sub.sample_id = femto.sample_id
98
- )
99
- ),
100
-
101
- latest_decision_making AS (
102
- SELECT
103
- dnad.sample_id,
104
- dnad.next_step,
105
- qc_passfail AS extraction_qc_result
106
- FROM dna_decision_making_v2$raw AS dnad
107
- WHERE dnad.created_at$ = (
108
- SELECT MAX(sub.created_at$)
109
- FROM dna_decision_making_v2$raw AS sub
110
- WHERE sub.sample_id = dnad.sample_id
111
- )
112
- )
113
-
114
35
  SELECT DISTINCT
115
36
  t.sts_id,
116
37
  t.taxon_id,
@@ -122,62 +43,26 @@ SELECT DISTINCT
122
43
  t.specimen_id,
123
44
  COALESCE(DATE(dna.created_on), DATE(dna.created_at$)) AS completion_date, -- Homogenising BnT and Benchling dates
124
45
  dna.name$ AS extraction_name,
125
- con.barcode AS fluidx_id,
126
- con.id AS fluidx_container_id,
127
- CASE
128
- WHEN con.archive_purpose$ IN ('Retired', 'Expended') THEN 0 -- Retired or expended DNA extractions have a weight of 0
129
- ELSE con.volume_si * 1000000
130
- END AS volume_ul,
131
- loc.name AS location,
132
- box.barcode AS rack,
133
46
  dna.bt_id AS bnt_id,
134
- dna.manual_vs_automatic AS manual_vs_automatic,
47
+ dna.manual_vs_automatic AS manual_vs_automatic,
135
48
  dna.extraction_protocol,
136
- tube.type AS tube_type,
137
49
  'dna'::varchar AS extraction_type,
138
- f.name, dna.archive_purpose$,
139
- latest_nanodrop_conc.nanodrop_concentration_ngul,
140
- latest_nanodrop_conc.dna_260_280_ratio,
141
- latest_nanodrop_conc.dna_260_230_ratio,
142
- latest_qubit_conc.qubit_concentration_ngul,
143
- latest_yield.yield AS yield_ng,
144
- latest_femto.femto_date_code,
145
- latest_femto.femto_description,
146
- latest_femto.gqn_dnaex AS gqn_index,
147
- latest_decision_making.next_step,
148
- latest_decision_making.extraction_qc_result
50
+ f.name AS folder_name
149
51
  FROM dna_extract$raw AS dna
150
- LEFT JOIN container_content$raw AS cc
151
- ON cc.entity_id = dna.id
152
- LEFT JOIN container$raw AS con
153
- ON con.id = cc.container_id
154
52
  LEFT JOIN tissue_prep$raw AS tp
155
53
  ON tp.id = dna.tissue_prep
156
54
  LEFT JOIN tissue$raw AS t
157
55
  ON t.id = tp.tissue
158
- LEFT JOIN tube$raw AS tube
159
- ON cc.container_id = tube.id
160
56
  LEFT JOIN folder$raw AS f
161
57
  ON dna.folder_id$ = f.id
162
58
  LEFT JOIN project$raw AS proj
163
59
  ON dna.project_id$ = proj.id
164
- LEFT JOIN latest_nanodrop_conc -- Results chunk
165
- ON dna.id = latest_nanodrop_conc.sample_id
166
- LEFT JOIN latest_qubit_conc
167
- ON dna.id = latest_qubit_conc.sample_id
168
- LEFT JOIN latest_yield
169
- ON dna.id = latest_yield.sample_id
170
- LEFT JOIN latest_femto
171
- ON dna.id = latest_femto.sample_id
172
- LEFT JOIN latest_decision_making
173
- ON dna.id = latest_decision_making.sample_id -- End Results chunk
174
- LEFT JOIN box$raw AS box -- Location chunk
175
- ON con.box_id = box.id
176
- LEFT JOIN location$raw AS loc
177
- ON loc.id = box.location_id -- End of location chunk
60
+ LEFT JOIN registration_origin$raw AS reg
61
+ ON reg.entity_id = dna.id
62
+ LEFT JOIN entry$raw AS ent
63
+ ON reg.origin_entry_id = ent.id
178
64
  WHERE proj.name = 'ToL Core Lab'
179
65
  AND (f.name IN ('Routine Throughput', 'DNA', 'Core Lab Entities', 'Benchling MS Project Move') OR f.name IS NULL)
180
66
  AND (dna.archive_purpose$ != ('Made in error') OR dna.archive_purpose$ IS NULL)
181
- AND (con.archive_purpose$ != ('Made in error') OR con.archive_purpose$ IS NULL)
182
- AND con.barcode NOT LIKE 'CON%'
67
+ AND ent.name NOT LIKE '%Nuclei isolation and tagmentation%'
183
68
  ORDER BY completion_date DESC
@@ -28,8 +28,8 @@ SELECT DISTINCT
28
28
  tp.name$ AS eln_tissue_prep_name,
29
29
  ssid.sanger_sample_id,
30
30
  ssid.sanger_sample_id AS extraction_id,
31
- c.barcode AS fluidx_id,
32
- c.id AS fluidx_container_id,
31
+ sub_con.barcode AS fluidx_id,
32
+ sub_con.id AS fluidx_container_id,
33
33
  DATE(tpsub.submitted_submission_date) AS completion_date,
34
34
  'lres'::varchar AS extraction_type
35
35
  FROM tissue_prep$raw AS tp
@@ -41,8 +41,16 @@ LEFT JOIN container$raw AS c
41
41
  ON cc.container_id = c.id
42
42
  LEFT JOIN tissue_prep_submission_workflow_output$raw AS tpsub
43
43
  ON c.id = tpsub.sample_tube_id
44
+ LEFT JOIN container$raw AS sub_con
45
+ ON tpsub.sample_tube_id = sub_con.id
44
46
  LEFT JOIN storage$raw AS stor
45
47
  ON c.location_id = stor.id
46
48
  LEFT JOIN sanger_sample_id$raw AS ssid
47
49
  ON c.id = ssid.sample_tube
48
- WHERE stor.name$ = 'SciOps ToL Lab'
50
+ LEFT JOIN project$raw AS proj
51
+ ON tp.project_id$ = proj.id
52
+ LEFT JOIN folder$raw AS f
53
+ ON tp.folder_id$ = f.id
54
+ WHERE sub_con.id IS NOT NULL
55
+ AND proj.name = 'ToL Core Lab'
56
+ AND f.name = 'Sample Prep'
@@ -3,6 +3,7 @@
3
3
  # SPDX-License-Identifier: MIT
4
4
 
5
5
  from abc import ABC, abstractmethod
6
+ from dataclasses import dataclass
6
7
  from typing import Iterable
7
8
 
8
9
  from more_itertools import flatten
@@ -95,9 +96,14 @@ class DefaultDataObjectToDataObjectConverter(DataObjectToDataObjectOrUpdateConve
95
96
 
96
97
  class SanitisingConverter(DataObjectToDataObjectOrUpdateConverter):
97
98
 
99
+ @dataclass(slots=True, frozen=True, kw_only=True)
100
+ class Config:
101
+ pass
102
+
98
103
  def __init__(
99
104
  self,
100
105
  data_object_factory: DataObjectFactory,
106
+ config: Config,
101
107
  **kwargs
102
108
  ):
103
109
  super().__init__(data_object_factory)
@@ -65,7 +65,7 @@ class ExcelDataSource(
65
65
  ) -> Iterable[DataObject]:
66
66
 
67
67
  return (
68
- self.__marshal_row(row_index + 1, row)
68
+ self.__marshal_row(row_index + 2, row) # Add 1 for header, 1 for 1-based ID
69
69
  for row_index, row
70
70
  in self.__df.iterrows()
71
71
  )
@@ -115,6 +115,14 @@ class ExcelDataSource(
115
115
  __v: Any,
116
116
  ) -> Any:
117
117
 
118
+ # Convert pandas Timestamp to Python datetime
119
+ if isinstance(__v, pd.Timestamp):
120
+ __v = datetime.fromtimestamp(__v.timestamp())
121
+
122
+ # If float and is whole number, convert to int
123
+ if isinstance(__v, float) and __v.is_integer():
124
+ __v = int(__v)
125
+
118
126
  if __k not in self.__mappings:
119
127
  return __v
120
128
 
@@ -3,6 +3,7 @@
3
3
 
4
4
  import re
5
5
  from dataclasses import dataclass
6
+ from datetime import datetime
6
7
  from typing import Iterable
7
8
 
8
9
  from tol.core import DataObject, DataObjectToDataObjectOrUpdateConverter
@@ -30,8 +31,10 @@ class IncomingSampleToEnaSampleConverter(DataObjectToDataObjectOrUpdateConverter
30
31
  s = data_object
31
32
  attributes = {
32
33
  'ENA-CHECKLIST': self.__config.ena_checklist_id,
33
- 'organism part': self.__replace_underscores(
34
- s.attributes.get('ORGANISM_PART')),
34
+ 'organism part': self.__join_list([
35
+ self.__replace_underscores(v)
36
+ for v in s.attributes.get('ORGANISM_PART', [])
37
+ ]),
35
38
  'lifestage': (
36
39
  'spore-bearing structure'
37
40
  if s.attributes.get('LIFESTAGE') == 'SPORE_BEARING_STRUCTURE'
@@ -40,35 +43,38 @@ class IncomingSampleToEnaSampleConverter(DataObjectToDataObjectOrUpdateConverter
40
43
  ),
41
44
  'project name':
42
45
  self.__config.project_name,
43
- 'collected by':
44
- self.__replace_underscores(
45
- s.attributes.get('COLLECTED_BY')),
46
+ 'collected_by': self.__join_list([
47
+ self.__replace_underscores(v)
48
+ for v in s.attributes.get('COLLECTED_BY', [])
49
+ ]),
46
50
  'collection date':
47
- self.__replace_underscores(
48
- s.attributes.get('DATE_OF_COLLECTION')).lower(),
51
+ self.__format_date(
52
+ s.attributes.get('DATE_OF_COLLECTION')),
49
53
  'geographic location (country and/or sea)':
50
54
  self.__collection_country(s).replace('_', ' '),
51
55
  'geographic location (latitude)':
52
56
  self.__replace_underscores(
53
- s.attributes.get('DECIMAL_LATITUDE')).lower(),
57
+ str(s.attributes.get('DECIMAL_LATITUDE'))).lower(),
54
58
  'geographic location (latitude) units':
55
59
  'DD',
56
60
  'geographic location (longitude)':
57
61
  self.__replace_underscores(
58
- s.attributes.get('DECIMAL_LONGITUDE')).lower(),
62
+ str(s.attributes.get('DECIMAL_LONGITUDE'))).lower(),
59
63
  'geographic location (longitude) units':
60
64
  'DD',
61
65
  'geographic location (region and locality)':
62
66
  self.__collection_region(s).replace('_', ' '),
63
- 'identified_by':
64
- self.__replace_underscores(
65
- s.attributes.get('IDENTIFIED_BY')),
67
+ 'identified_by': self.__join_list([
68
+ self.__replace_underscores(v)
69
+ for v in s.attributes.get('IDENTIFIED_BY', [])
70
+ ]),
66
71
  'habitat':
67
72
  self.__replace_underscores(
68
73
  s.attributes.get('HABITAT')),
69
- 'identifier_affiliation':
70
- self.__replace_underscores(
71
- s.attributes.get('IDENTIFIER_AFFILIATION')),
74
+ 'identifier_affiliation': self.__join_list([
75
+ self.__replace_underscores(v)
76
+ for v in s.attributes.get('IDENTIFIER_AFFILIATION', [])
77
+ ]),
72
78
  'sex':
73
79
  self.__replace_underscores(
74
80
  s.attributes.get('SEX')),
@@ -77,9 +83,10 @@ class IncomingSampleToEnaSampleConverter(DataObjectToDataObjectOrUpdateConverter
77
83
  s.attributes.get('RELATIONSHIP')),
78
84
  'SYMBIONT':
79
85
  'Y' if s.attributes.get('SYMBIONT') == 'SYMBIONT' else 'N',
80
- 'collecting institution':
81
- self.__replace_underscores(
82
- s.attributes.get('COLLECTOR_AFFILIATION'))
86
+ 'collecting institution': self.__join_list([
87
+ self.__replace_underscores(v)
88
+ for v in s.attributes.get('COLLECTOR_AFFILIATION', [])
89
+ ]),
83
90
  }
84
91
  if self.__sanitise(s.attributes.get('DEPTH')) != '':
85
92
  attributes['geographic location (depth)'] = s.attributes.get('DEPTH')
@@ -88,9 +95,11 @@ class IncomingSampleToEnaSampleConverter(DataObjectToDataObjectOrUpdateConverter
88
95
  attributes['geographic location (elevation)'] = s.attributes.get('ELEVATION')
89
96
  attributes['geographic location (elevation) units'] = 'm'
90
97
  if self.__sanitise(s.attributes.get('ORIGINAL_COLLECTION_DATE')) != '':
91
- attributes['original collection date'] = s.attributes.get('ORIGINAL_COLLECTION_DATE')
98
+ attributes['original collection date'] = \
99
+ self.__format_date(s.attributes.get('ORIGINAL_COLLECTION_DATE'))
92
100
  if self.__sanitise(s.attributes.get('ORIGINAL_GEOGRAPHIC_LOCATION')) != '':
93
- attributes['original geographic location'] = self.__replace_underscores(s.attributes.get('ORIGINAL_GEOGRAPHIC_LOCATION')) # noqa
101
+ attributes['original geographic location'] = \
102
+ self.__replace_underscores(s.attributes.get('ORIGINAL_GEOGRAPHIC_LOCATION'))
94
103
  if s.attributes.get('GAL') is not None:
95
104
  attributes['GAL'] = s.attributes.get('GAL')
96
105
  if s.attributes.get('VOUCHER_ID') is not None:
@@ -103,7 +112,7 @@ class IncomingSampleToEnaSampleConverter(DataObjectToDataObjectOrUpdateConverter
103
112
  attributes['culture_or_strain_id'] = s.attributes.get('CULTURE_OR_STRAIN_ID')
104
113
 
105
114
  ret = self._data_object_factory(
106
- 'sample',
115
+ data_object.type,
107
116
  s.id,
108
117
  attributes=attributes,
109
118
  )
@@ -128,3 +137,19 @@ class IncomingSampleToEnaSampleConverter(DataObjectToDataObjectOrUpdateConverter
128
137
  if value is None:
129
138
  return default_value
130
139
  return value
140
+
141
+ def __join_list(self, value_list):
142
+ if value_list is None:
143
+ return ''
144
+ if not isinstance(value_list, list):
145
+ return str(value_list)
146
+ return ' | '.join(str(v) for v in value_list)
147
+
148
+ def __format_date(self, value):
149
+ """Format date to YYYY-mm-dd format"""
150
+ if value is None:
151
+ return ''
152
+ if isinstance(value, datetime):
153
+ return value.strftime('%Y-%m-%d')
154
+
155
+ return str(value)
@@ -15,6 +15,7 @@ from .regex_by_value import RegexByValueValidator # noqa
15
15
  from .specimens_have_same_taxon import SpecimensHaveSameTaxonValidator # noqa
16
16
  from .sts_fields import StsFieldsValidator # noqa
17
17
  from .tolid import TolidValidator # noqa
18
+ from .types import TypesValidator # noqa
18
19
  from .unique_values import UniqueValuesValidator # noqa
19
20
  from .unique_whole_organisms import UniqueWholeOrganismsValidator # noqa
20
21
  from .interfaces import Condition # noqa
@@ -19,12 +19,12 @@ class ConverterAndValidateValidator(Validator):
19
19
  "converters": [{
20
20
  "module": "<path.to.module>",
21
21
  "class_name": "<path.to.ConverterClass>",
22
- "config": { ... }
22
+ "config_details": { ... }
23
23
  }],
24
24
  "validators": [{
25
25
  "module": "<path.to.module>",
26
26
  "class_name": "<path.to.ValidatorClass>",
27
- "config": { ... }
27
+ "config_details": { ... }
28
28
  }]
29
29
  }
30
30
 
@@ -54,7 +54,7 @@ class ConverterAndValidateValidator(Validator):
54
54
  converter_class = getattr(__module, conv.get('class_name'))
55
55
 
56
56
  converter_conf = converter_class.Config(
57
- **conv.get('config')
57
+ **conv.get('config_details')
58
58
  )
59
59
  self.__converters.append(converter_class(
60
60
  data_object_factory=data_object_factory,
@@ -65,7 +65,7 @@ class ConverterAndValidateValidator(Validator):
65
65
  validator_class = getattr(__module, val.get('class_name'))
66
66
 
67
67
  validator_conf = validator_class.Config(
68
- **val.get('config')
68
+ **val.get('config_details')
69
69
  )
70
70
  self.__validators.append(validator_class(
71
71
  data_object_factory=data_object_factory,
@@ -27,31 +27,31 @@ class EnaChecklistValidator(Validator):
27
27
  super().__init__()
28
28
  self.__config = config
29
29
  self._datasource = datasource
30
+ self.__ena_checklist = datasource.get_one(
31
+ 'checklist',
32
+ self.__config.ena_checklist_id
33
+ ).checklist
30
34
 
31
35
  def _validate_data_object(self, obj: DataObject) -> None:
32
- ena_datasource = self._datasource
33
- ena_checklist = ena_datasource.get_one('checklist', self.__config.ena_checklist_id)
34
-
35
- validations = ena_checklist.attributes['checklist']
36
- for key in validations:
36
+ for key, validation in self.__ena_checklist.items():
37
37
  field_name = key
38
- if 'field' in validations[key]:
39
- field_name = validations[key]['field']
40
- if 'mandatory' in validations[key] and key not in obj.attributes:
38
+ if 'field' in validation:
39
+ field_name = validation['field']
40
+ if 'mandatory' in validation and key not in obj.attributes:
41
41
  self.add_error(object_id=obj.id, detail='Must be given', field=[field_name])
42
42
  continue
43
- if 'mandatory' in validations[key] and obj.attributes[key] is None:
43
+ if 'mandatory' in validation and obj.attributes[key] is None:
44
44
  self.add_error(object_id=obj.id, detail='Must be given', field=[field_name])
45
45
  continue
46
- if 'mandatory' in validations[key] and obj.attributes.get(key) == '':
46
+ if 'mandatory' in validation and obj.attributes.get(key) == '':
47
47
  self.add_error(
48
48
  object_id=obj.id,
49
49
  detail='Must not be empty', field=[field_name]
50
50
  )
51
51
 
52
- if 'restricted text' in validations[key] and key in obj.attributes:
53
- for condition in validations[key]:
54
- if type(condition) == str and '(' in condition:
52
+ if 'restricted text' in validation and key in obj.attributes:
53
+ for condition in validation:
54
+ if isinstance(condition, str) and '(' in condition:
55
55
  regex = condition
56
56
  compiled_re = re.compile(regex)
57
57
  if not compiled_re.search(obj.attributes.get(key)):
@@ -61,9 +61,9 @@ class EnaChecklistValidator(Validator):
61
61
  )
62
62
 
63
63
  # Check against allowed values
64
- if 'text choice' in validations[key] and key in obj.attributes:
65
- for condition in validations[key]:
66
- if type(condition) == list:
64
+ if 'text choice' in validation and key in obj.attributes:
65
+ for condition in validation:
66
+ if isinstance(condition, list):
67
67
  allowed_values = condition
68
68
  if obj.attributes.get(key).lower() not in \
69
69
  [x.lower() for x in allowed_values]:
tol/validators/regex.py CHANGED
@@ -26,9 +26,9 @@ class Regex:
26
26
 
27
27
  def is_allowed(self, __v: Any) -> bool:
28
28
  # Check regex
29
- return bool(re.search(
29
+ return __v is None or __v == '' or bool(re.search(
30
30
  self.regex,
31
- str(__v) if __v is not None else ''
31
+ str(__v)
32
32
  ))
33
33
 
34
34
 
@@ -90,7 +90,6 @@ class RegexValidator(Validator):
90
90
  obj: DataObject,
91
91
  c: Regex,
92
92
  ) -> None:
93
-
94
93
  if c.is_error:
95
94
  self.add_error(
96
95
  object_id=obj.id,
@@ -3,6 +3,7 @@
3
3
  # SPDX-License-Identifier: MIT
4
4
 
5
5
  from dataclasses import dataclass
6
+ from datetime import datetime, timedelta
6
7
  from typing import List
7
8
 
8
9
  from tol.core import DataObject, DataSource
@@ -51,39 +52,178 @@ class StsFieldsValidator(Validator):
51
52
  obj: DataObject
52
53
  ) -> None:
53
54
  for field in self.__fields.values():
55
+ # Ignore inactive fields
56
+ if field.get('status') == 'Inactive':
57
+ continue
54
58
  # Get the value from the data object
55
59
  field_value = obj.get_field_by_name(field.get('data_input_key'))
56
- if field.get('mandatory_input') and (field_value is None or field_value == ''):
60
+ if isinstance(field_value, list):
61
+ field_value = ' | '.join(str(v) for v in field_value)
62
+
63
+ # mandatory_input fields must be present
64
+ if field.get('mandatory_input') and field.get('data_input_key') not in obj.attributes:
57
65
  self.add_error(
58
66
  object_id=obj.id,
59
67
  detail=f'Field {field.get("data_input_key")} is required '
60
68
  f'for project {self.__config.project_code}',
61
69
  field=field.get('data_input_key'),
62
70
  )
63
- elif field.get('allowed_values') and field_value not in field.get('allowed_values'):
64
- self.add_error(
65
- object_id=obj.id,
66
- detail=f'Field {field.get("data_input_key")} value '
67
- f'"{field_value}" not found in allowed values '
68
- f'{field.get("allowed_values")} for project '
69
- f'{self.__config.project_code}',
70
- field=field.get('data_input_key'),
71
- )
72
- elif field.get('min') and field_value < field.get('min'):
71
+ continue
72
+
73
+ # Skip further validations if validation is not mandatory
74
+ if not field.get('mandatory_validation'):
75
+ continue
76
+
77
+ # Mandatory validation fields must have a value
78
+ if field_value is None or field_value == '':
73
79
  self.add_error(
74
80
  object_id=obj.id,
75
- detail=f'Field {field.get("data_input_key")} value '
76
- f'"{field_value}" is less than minimum value '
77
- f'"{field.get("min")}" for project '
78
- f'{self.__config.project_code}',
81
+ detail=f'Field {field.get("data_input_key")} is required to have a value '
82
+ f'for project {self.__config.project_code}',
79
83
  field=field.get('data_input_key'),
80
84
  )
81
- elif field.get('max') and field_value > field.get('max'):
85
+ continue
86
+
87
+ # Allowed values
88
+ if field.get('allowed_values'):
89
+ allowed_values = [
90
+ value.get('value') for value in field.get('allowed_values', [])
91
+ ]
92
+ if field_value not in allowed_values:
93
+ self.add_error(
94
+ object_id=obj.id,
95
+ detail=f'Field {field.get("data_input_key")} value '
96
+ f'"{field_value}" not found in allowed values '
97
+ f'{allowed_values} for project '
98
+ f'{self.__config.project_code}',
99
+ field=field.get('data_input_key'),
100
+ )
101
+
102
+ if field.get('type') in ['String', 'TextArea']:
103
+ self.__validate_string(obj, field, field_value)
104
+
105
+ if field.get('type') in ['Integer', 'Decimal', 'Percentage']:
106
+ self.__validate_number(obj, field, field_value)
107
+
108
+ if field.get('type') in ['Boolean']:
109
+ self.__validate_boolean(obj, field, field_value)
110
+
111
+ if field.get('type') in ['Date']:
112
+ self.__validate_date(obj, field, field_value)
113
+
114
+ def __validate_string(
115
+ self,
116
+ obj: DataObject,
117
+ field: dict,
118
+ field_value: str | int | float | None
119
+ ) -> None:
120
+ # Check type is a string
121
+ # if not isinstance(field_value, str):
122
+ # self.add_error(
123
+ # object_id=obj.id,
124
+ # detail=f'Field {field.get("data_input_key")} value '
125
+ # f'"{field_value}" is not a string for project '
126
+ # f'{self.__config.project_code}',
127
+ # field=field.get('data_input_key'),
128
+ # )
129
+ # return
130
+
131
+ # Min/Max validations for string
132
+ if field.get('min') and len(field_value) < field.get('min'):
133
+ self.add_error(
134
+ object_id=obj.id,
135
+ detail=f'Field {field.get("data_input_key")} value '
136
+ f'"{field_value}" is shorter than minimum length '
137
+ f'"{field.get("min")}" for project '
138
+ f'{self.__config.project_code}',
139
+ field=field.get('data_input_key'),
140
+ )
141
+ if field.get('max') and len(field_value) > field.get('max'):
142
+ self.add_error(
143
+ object_id=obj.id,
144
+ detail=f'Field {field.get("data_input_key")} value '
145
+ f'"{field_value}" is longer than maximum length '
146
+ f'"{field.get("max")}" for project '
147
+ f'{self.__config.project_code}',
148
+ field=field.get('data_input_key'),
149
+ )
150
+
151
+ def __validate_number(
152
+ self,
153
+ obj: DataObject,
154
+ field: dict,
155
+ field_value: str | int | float | None
156
+ ) -> None:
157
+ # Check type is a number
158
+ if not isinstance(field_value, (int, float)):
159
+ self.add_error(
160
+ object_id=obj.id,
161
+ detail=f'Field {field.get("data_input_key")} value '
162
+ f'"{field_value}" is not a number for project '
163
+ f'{self.__config.project_code}',
164
+ field=field.get('data_input_key'),
165
+ )
166
+ return
167
+
168
+ # Min/Max validations for number
169
+ if field.get('min') is not None and field_value < field.get('min'):
170
+ self.add_error(
171
+ object_id=obj.id,
172
+ detail=f'Field {field.get("data_input_key")} value '
173
+ f'"{field_value}" is less than minimum value '
174
+ f'"{field.get("min")}" for project '
175
+ f'{self.__config.project_code}',
176
+ field=field.get('data_input_key'),
177
+ )
178
+ if field.get('max') is not None and field_value > field.get('max'):
179
+ self.add_error(
180
+ object_id=obj.id,
181
+ detail=f'Field {field.get("data_input_key")} value '
182
+ f'"{field_value}" is greater than maximum value '
183
+ f'"{field.get("max")}" for project '
184
+ f'{self.__config.project_code}',
185
+ field=field.get('data_input_key'),
186
+ )
187
+
188
+ def __validate_boolean(
189
+ self,
190
+ obj: DataObject,
191
+ field: dict,
192
+ field_value: str | int | float | None
193
+ ) -> None:
194
+ # Check type is a boolean
195
+ if field_value not in ['Y', 'N']:
196
+ self.add_error(
197
+ object_id=obj.id,
198
+ detail=f'Field {field.get("data_input_key")} value '
199
+ f'"{field_value}" is not a boolean (Y/N) for project '
200
+ f'{self.__config.project_code}',
201
+ field=field.get('data_input_key'),
202
+ )
203
+
204
+ def __validate_date(
205
+ self,
206
+ obj: DataObject,
207
+ field: dict,
208
+ field_value: str | int | float | None
209
+ ) -> None:
210
+ if not isinstance(field_value, datetime):
211
+ self.add_error(
212
+ object_id=obj.id,
213
+ detail=f'Field {field.get("data_input_key")} value '
214
+ f'"{field_value}" is not a date string for project '
215
+ f'{self.__config.project_code}',
216
+ field=field.get('data_input_key'),
217
+ )
218
+ return
219
+ if field.get('range_limit'):
220
+ earliest_date = datetime.now() - timedelta(days=field.get('min'))
221
+ latest_date = datetime.now() + timedelta(days=field.get('max'))
222
+ if field_value < earliest_date or field_value > latest_date:
82
223
  self.add_error(
83
224
  object_id=obj.id,
84
225
  detail=f'Field {field.get("data_input_key")} value '
85
- f'"{field_value}" is greater than maximum value '
86
- f'"{field.get("max")}" for project '
87
- f'{self.__config.project_code}',
226
+ f'"{field_value}" is not within the allowed date '
227
+ f'range for project {self.__config.project_code}',
88
228
  field=field.get('data_input_key'),
89
229
  )
tol/validators/tolid.py CHANGED
@@ -103,7 +103,7 @@ class TolidValidator(Validator):
103
103
 
104
104
  if str(obj.get_field_by_name(self.__config.species_id_field)) not in taxons:
105
105
  self.add_error(
106
- object_id=obj.id,
106
+ object_id=obj.id + 1,
107
107
  detail=f'Specimen ID {specimen_id} does not match Taxon ID '
108
108
  f'{obj.get_field_by_name(self.__config.species_id_field)}'
109
109
  'in TolID source',
@@ -0,0 +1,90 @@
1
+ # SPDX-FileCopyrightText: 2025 Genome Research Ltd.
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ from dataclasses import dataclass
6
+ from datetime import datetime, time
7
+
8
+ from tol.core import DataObject
9
+ from tol.core.validate import Validator
10
+
11
+
12
+ class TypesValidator(Validator):
13
+ """
14
+ Validates an incoming stream of `DataObject` instances,
15
+ ensuring that they only have attributes of the given
16
+ allowed keys.
17
+ """
18
+ @dataclass(slots=True, frozen=True, kw_only=True)
19
+ class Config:
20
+ allowed_types: dict[str, str]
21
+ is_error: bool = True
22
+ detail: str = 'Value is of incorrect type'
23
+
24
+ __slots__ = ['__config']
25
+ __config: Config
26
+
27
+ def __init__(
28
+ self,
29
+ config: Config,
30
+ **kwargs
31
+ ) -> None:
32
+
33
+ super().__init__()
34
+ self.__config = config
35
+
36
+ def _validate_data_object(
37
+ self,
38
+ obj: DataObject
39
+ ) -> None:
40
+
41
+ type_map = {
42
+ 'str': str,
43
+ 'int': int,
44
+ 'float': float,
45
+ 'bool': bool,
46
+ 'list': list,
47
+ 'dict': dict,
48
+ 'datetime': datetime,
49
+ 'time': time
50
+ }
51
+ for key, expected_type in self.__config.allowed_types.items():
52
+ if key in obj.attributes:
53
+ actual_value = obj.get_field_by_name(key)
54
+ if actual_value is None:
55
+ continue
56
+ type_class = type_map.get(expected_type)
57
+ if type_class and not isinstance(actual_value, type_class):
58
+ self.__add_result(
59
+ obj,
60
+ key,
61
+ detail=f'Field {key} value "{actual_value}" is not of type '
62
+ f'"{expected_type}"',
63
+ )
64
+ if type_class and isinstance(actual_value, type_class):
65
+ # Special case for bool since isinstance(True, int) is True
66
+ if expected_type == 'int' and isinstance(actual_value, bool):
67
+ self.__add_result(
68
+ obj,
69
+ key,
70
+ )
71
+
72
+ def __add_result(
73
+ self,
74
+ obj: DataObject,
75
+ key: str,
76
+ detail: str = None,
77
+ ) -> None:
78
+
79
+ if self.__config.is_error:
80
+ self.add_error(
81
+ object_id=obj.id,
82
+ detail=detail or self.__config.detail,
83
+ field=key,
84
+ )
85
+ else:
86
+ self.add_warning(
87
+ object_id=obj.id,
88
+ detail=self.__config.detail,
89
+ field=key,
90
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tol-sdk
3
- Version: 1.7.5b4
3
+ Version: 1.8.1
4
4
  Summary: SDK for interaction with ToL, Sanger and external services
5
5
  Author-email: ToL Platforms Team <tol-platforms@sanger.ac.uk>
6
6
  License: MIT
@@ -50,6 +50,8 @@ Requires-Dist: openpyxl>=3.0.10; extra == "sheets"
50
50
  Requires-Dist: XlsxWriter==3.1.9; extra == "sheets"
51
51
  Requires-Dist: xlrd==2.0.1; extra == "sheets"
52
52
  Requires-Dist: gspread>=5.12.0; extra == "sheets"
53
+ Provides-Extra: s3
54
+ Requires-Dist: minio==7.2.15; extra == "s3"
53
55
  Provides-Extra: all
54
56
  Requires-Dist: tol-sdk[api-base]; extra == "all"
55
57
  Requires-Dist: tol-sdk[benchling]; extra == "all"
@@ -47,8 +47,9 @@ tol/benchling/benchling_converter.py,sha256=CO7BMvMAM52uIFjQWZFedRve0XNSmC9YtumJ
47
47
  tol/benchling/benchling_datasource.py,sha256=aHoGOJYX_dsL3G-9lXlY0bQQl4pMXf4a852sAkl-sKs,35112
48
48
  tol/benchling/benchling_warehouse_datasource.py,sha256=opsdvHz8l06NTmt84HrIgUJxV_DsurVgFtGs3_5PMoM,4635
49
49
  tol/benchling/sql/__init__.py,sha256=4LbvDIZOOG7p-ebbvivP7NvrJeApUvGEIcDL58ahQJE,85
50
- tol/benchling/sql/extraction_extraction_type_dna.sql,sha256=hd6-Qt4FyNMh42aT3Oxph4W4A5Cv1mwsEBT9uPfHDPg,7291
51
- tol/benchling/sql/extraction_extraction_type_lres.sql,sha256=3FHMz_8aw-N1aaBip9bC7G0f8K-xkD3Gc_L91rY8c9o,1306
50
+ tol/benchling/sql/extraction_containers_dna.sql,sha256=YRQ0W1d-BjXB9gcMpf5ZyjHbPVp2VU0KkYi4e0JvYtA,6680
51
+ tol/benchling/sql/extraction_extraction_type_dna.sql,sha256=UvxboWBoXXp7RHUdRKNiQTS-AXdLdz8bFEXCS6q9SoE,3094
52
+ tol/benchling/sql/extraction_extraction_type_lres.sql,sha256=7Y6a8v0V-jjU5Kg3czuZjcPLvGSfrnUDekpHo2mUgnc,1556
52
53
  tol/benchling/sql/extraction_extraction_type_pooled_dna.sql,sha256=fNjCJPaViGrR6D8sLwZK2Zg5LqQqh16HB0s7ZeqTqdg,4480
53
54
  tol/benchling/sql/extraction_extraction_type_rna.sql,sha256=Vy3uV_ns4uO9CwuOFo1KPhI0yK6dsSO47wObcwJbHXQ,3861
54
55
  tol/benchling/sql/pacbio_prep.sql,sha256=a3dR-kcp8fT3ZZkbX8pV4StnweWGvcVl4fZNMVNCvbQ,4070
@@ -89,7 +90,7 @@ tol/core/attribute_metadata.py,sha256=wYD3NXDdStrpkUZoyTUiEpp7c14f7MLIcyooT1G4GE
89
90
  tol/core/core_converter.py,sha256=Gn4J507BtqDjnOWV2MFRYGz8YElJAKQItmnCrD72s7k,4504
90
91
  tol/core/data_loader.py,sha256=k-ET1nIohIz6PcADbEn9Y7k9TupoiBYxKDkcAl_9pGY,14710
91
92
  tol/core/data_object.py,sha256=GxG04JMcICaiHU1rufkhoD8jb9YQLhE0QWlFU2ZkQsM,4241
92
- tol/core/data_object_converter.py,sha256=FUNUXGi5FIdIe34B0g32hhRf8GGzWTHW7vLhb1GXG6E,3783
93
+ tol/core/data_object_converter.py,sha256=GESpLvwrAEwmCfBwy3GxcSCuHz0xt7ECCBPE2stxBdI,3927
93
94
  tol/core/data_source_attribute_metadata.py,sha256=NHvJ_Gmw7-Oej1MoFCohvq4f6emDJ2HF483UmW2Qd_c,4407
94
95
  tol/core/data_source_dict.py,sha256=d-hSmoWTwG6IOc0cQTLap1EBslsxYIWGUd3ScSoeH_Q,1705
95
96
  tol/core/datasource.py,sha256=e9GaeDPfO_Gs7cgQhmNxCiSDlRNf64reegzFebcMNkA,6303
@@ -142,7 +143,7 @@ tol/ena/filter.py,sha256=UzOx5ivXvA0TY2QuNzFmS-zDPVNnaAx07DMVkAwVsAE,3370
142
143
  tol/ena/parser.py,sha256=Z4YmUnpfLKng4QwmZkLEj1hUfwYb_bqr-DWgF1Gw-EY,3253
143
144
  tol/excel/__init__.py,sha256=M0xL9w9Au8kYOLWzFGuijJ7WoZENOMkZ1XV1ephhlDY,229
144
145
  tol/excel/excel.py,sha256=rcA-wfXY9R14OfNKS-NX2sn__9gmQ_G8LoUgWseF1Gk,2124
145
- tol/excel/excel_datasource.py,sha256=nIMvkCZ1edx8djqsVsPTRi6yCfyKc_dIokTpLy16rwY,3091
146
+ tol/excel/excel_datasource.py,sha256=WhkqIk4Qg-iDBczI4l1OFoLNDX32riwgj4SdGhr4DIs,3423
146
147
  tol/excel/s3_factory.py,sha256=4lGyKrSvarPXWndyvm7K-tel0FoM0My8wnz-Mzwt0yQ,1245
147
148
  tol/flows/__init__.py,sha256=M7iSvnBJs6fJ8M38cW0bYQa9WW0TN8FHAMjIHPDNAJ4,166
148
149
  tol/flows/logger.py,sha256=rWXbaknGcPEZRFvC1CiB1qkhFRZsQk435w7VyJ3cpyw,170
@@ -180,7 +181,7 @@ tol/flows/converters/gap_assembly_to_elastic_assembly_converter.py,sha256=XK-es-
180
181
  tol/flows/converters/genome_notes_genome_note_to_elastic_genome_note_converter.py,sha256=AaUWbVTaWU-NXnUQPaPwI41TE7a-nC4zlg-jrWpPT2s,1166
181
182
  tol/flows/converters/goat_taxon_to_elastic_species_converter.py,sha256=1NGs9427OdXGsBaMB467nOF7aTlJsUKYCuoSoABw9L4,1074
182
183
  tol/flows/converters/grit_issue_to_elastic_curation_converter.py,sha256=XpRpoRn589MxTqEk6zPWGn6tamJiqY9Ctxk8v0q-dvA,3953
183
- tol/flows/converters/incoming_sample_to_ena_sample_converter.py,sha256=HmGsg-VCE4W9Dl3lAlcNhWfkVYp1d22DZlFoTaFzeqA,5560
184
+ tol/flows/converters/incoming_sample_to_ena_sample_converter.py,sha256=SAVYWENG3GS7B1rM6rYwxfLQH75nZl7mEzphH5CBxRw,6353
184
185
  tol/flows/converters/incoming_sample_to_incoming_sample_with_lists_converter.py,sha256=5Fp1_ojsYqvRcKTgXJbyWqetPisi_vtWFcWr6RtGZoA,1504
185
186
  tol/flows/converters/informatics_tolid_to_elastic_tolid_converter.py,sha256=VrvtsDTPlc5Xa3K4rcAMHwV4n71zOH7q5EfALLLQ1tI,587
186
187
  tol/flows/converters/labwhere_location_to_elastic_sample_update_converter.py,sha256=NJNmG9sCc2WXc-2J5XfCKXhb2sDH82nZUBekd16PHcw,656
@@ -321,28 +322,29 @@ tol/treeval/treeval_datasource.py,sha256=GzY6JwH67b5QdV-UVdCFJfgGAIuZ96J2nl53YxZ
321
322
  tol/utils/__init__.py,sha256=764-Na1OaNGUDWpMIu51ZtXG7n_nB5MccUFK6LmkWRI,138
322
323
  tol/utils/csv.py,sha256=mihww25fSn72c4h-RFeqD_pFIG6KHZP4v1_C0rx81ws,421
323
324
  tol/utils/s3.py,sha256=aoYCwJ-qcMqFrpxmViFqPa0O1jgp0phtztO3-0CSNjw,491
324
- tol/validators/__init__.py,sha256=mJDlsI_W2y5jxazwOlyf-COl_Vlj1Xk1yC5xASouGH8,1134
325
+ tol/validators/__init__.py,sha256=QI5ykFzsTLsIQXcL4vF_aaVGdSr2l0X0Qkssbnxumss,1176
325
326
  tol/validators/allowed_keys.py,sha256=RJcHBiguL84B8hjSRaXLNES21yZqaKFwJNp2Tz9zvh0,1506
326
327
  tol/validators/allowed_values.py,sha256=-Yy3Sqo1WYacGKlot_dn3M2o7Oj5MXOioJrJmrWCCxs,1536
327
328
  tol/validators/allowed_values_from_datasource.py,sha256=ICFO6FcYXDN7M2Cv1OwpyN38CdhmY7oU-njzIatA3-w,3185
328
329
  tol/validators/assert_on_condition.py,sha256=eBGgSVfIQ6e45SheM-ZDg7daXJjyZxRVS5L8AWvbXag,2027
329
- tol/validators/converter_and_validate.py,sha256=YjhLsh0qMcyZEnHK2GJFotJfZssOtr8qU4uszcPQmrg,2960
330
- tol/validators/ena_checklist.py,sha256=VGJeDrHH-XzueforuyyCEgEi6y9NurhvuOSL-gSDoOE,2885
330
+ tol/validators/converter_and_validate.py,sha256=O1uYdrU4YDZ8eZjb7Koots4-8fMVOkJFXESg-LVw2o8,2992
331
+ tol/validators/ena_checklist.py,sha256=M10VAFGpaxnm7rWO4jmFhTWkYRlCmU0Ox2IUEDFGKbo,2812
331
332
  tol/validators/ena_submittable.py,sha256=CujF9t4mA4N3Wm_5rA5MRp401aW19kbioOZpfWVXg6I,1965
332
333
  tol/validators/min_one_valid_value.py,sha256=gZUHtfRA-Lvpw0d1FJoAA31cRJpLbbxAJCC9DCt5lCY,1442
333
334
  tol/validators/mutually_exclusive.py,sha256=6blZK-2IY4Eq79fHKKrm-pxsQ6B5DNH5ldtxOFVCPhU,4492
334
- tol/validators/regex.py,sha256=YdFHPcvEo6jNbXxDPTnpAQeOv3kSX4OUZUKfWmFFWl0,2602
335
+ tol/validators/regex.py,sha256=dLAi_vQt9_DsT6wQZmbYC7X5-Wp15l0leUE6XkPaItg,2602
335
336
  tol/validators/regex_by_value.py,sha256=XM5EnT4vgD17rfpR3bUE9I56IemSw26BI9MZtMakd4E,2582
336
337
  tol/validators/specimens_have_same_taxon.py,sha256=m2LLRIZMdhPj1fzyioDJOraI6UHXgy1l963xhezgk7E,2177
337
- tol/validators/sts_fields.py,sha256=A_NkQFn2TMNFv2yU_ercs7CXlh-oib33ZmDZtc6SuKQ,3459
338
- tol/validators/tolid.py,sha256=kgo-OWW3at6jK4DQtdgVFjO06sDvqG4QulGkI-jjVRU,3893
338
+ tol/validators/sts_fields.py,sha256=aYbzy15btEg4-ocDT1qrspe7-atoWRrOJ_KmuPU6J14,8936
339
+ tol/validators/tolid.py,sha256=yODebLYbKtlem3IpVcv8XImvq90r-AK68asH9JEawqo,3897
340
+ tol/validators/types.py,sha256=KDBNqx5isJG5XI1l2V9Wmi9135ZwDace3MU6Qij3J6E,2612
339
341
  tol/validators/unique_values.py,sha256=o5IrfUNLEmlEp8kpInTtFnTq-FqiHSC9TItKdf-LI1o,3114
340
342
  tol/validators/unique_whole_organisms.py,sha256=RdqA1GzIf3LTdrmNGGdxv0aW2udDY2P9EaqZb40hhik,5735
341
343
  tol/validators/interfaces/__init__.py,sha256=jtOxnwnwqV_29xjmmMcS_kvlt-pQiWwQYJn2YRP07_w,172
342
344
  tol/validators/interfaces/condition_evaluator.py,sha256=nj8Cb8hi47OBy6OVNfeLhF-Pjwtr8MiOSymYL6hfVes,3766
343
- tol_sdk-1.7.5b4.dist-info/licenses/LICENSE,sha256=RF9Jacy-9BpUAQQ20INhTgtaNBkmdTolYCHtrrkM2-8,1077
344
- tol_sdk-1.7.5b4.dist-info/METADATA,sha256=ZR7pMG-jGHoasPgY1zqvDMJMc4rm4rbqRi0hiC5pyN4,3081
345
- tol_sdk-1.7.5b4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
346
- tol_sdk-1.7.5b4.dist-info/entry_points.txt,sha256=jH3HfTwxjzog7E3lq8CKpUWGIRY9FSXbyL6CpUmv6D0,36
347
- tol_sdk-1.7.5b4.dist-info/top_level.txt,sha256=PwKMQLphyZNvagBoriVbl8uwHXQl8IC1niawVG0iXMM,10
348
- tol_sdk-1.7.5b4.dist-info/RECORD,,
345
+ tol_sdk-1.8.1.dist-info/licenses/LICENSE,sha256=RF9Jacy-9BpUAQQ20INhTgtaNBkmdTolYCHtrrkM2-8,1077
346
+ tol_sdk-1.8.1.dist-info/METADATA,sha256=wXZU-uS5CIiiWYa0EGwD8YiK2Z9eOJz5x78yTUPFMmk,3142
347
+ tol_sdk-1.8.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
348
+ tol_sdk-1.8.1.dist-info/entry_points.txt,sha256=jH3HfTwxjzog7E3lq8CKpUWGIRY9FSXbyL6CpUmv6D0,36
349
+ tol_sdk-1.8.1.dist-info/top_level.txt,sha256=PwKMQLphyZNvagBoriVbl8uwHXQl8IC1niawVG0iXMM,10
350
+ tol_sdk-1.8.1.dist-info/RECORD,,