tol-sdk 1.7.5b4__py3-none-any.whl → 1.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tol/benchling/sql/extraction_containers_dna.sql +173 -0
- tol/benchling/sql/extraction_extraction_type_dna.sql +16 -131
- tol/benchling/sql/extraction_extraction_type_lres.sql +11 -3
- tol/core/data_object_converter.py +6 -0
- tol/excel/excel_datasource.py +9 -1
- tol/flows/converters/incoming_sample_to_ena_sample_converter.py +46 -21
- tol/validators/__init__.py +1 -0
- tol/validators/converter_and_validate.py +4 -4
- tol/validators/ena_checklist.py +16 -16
- tol/validators/regex.py +2 -3
- tol/validators/sts_fields.py +159 -19
- tol/validators/tolid.py +1 -1
- tol/validators/types.py +90 -0
- {tol_sdk-1.7.5b4.dist-info → tol_sdk-1.8.1.dist-info}/METADATA +3 -1
- {tol_sdk-1.7.5b4.dist-info → tol_sdk-1.8.1.dist-info}/RECORD +19 -17
- {tol_sdk-1.7.5b4.dist-info → tol_sdk-1.8.1.dist-info}/WHEEL +0 -0
- {tol_sdk-1.7.5b4.dist-info → tol_sdk-1.8.1.dist-info}/entry_points.txt +0 -0
- {tol_sdk-1.7.5b4.dist-info → tol_sdk-1.8.1.dist-info}/licenses/LICENSE +0 -0
- {tol_sdk-1.7.5b4.dist-info → tol_sdk-1.8.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
/*
|
|
2
|
+
## SQL Query: DNA Extraction Containers (Benchling Warehouse)
|
|
3
|
+
|
|
4
|
+
This SQL query retrieves detailed information about DNA extraction containers managed by the ToL Core Laboratory, including metadata, container details, and the latest QC measurements.
|
|
5
|
+
|
|
6
|
+
The resulting table includes identifiers for tissues, tissue preps, extractions, containers, and locations, as well as the most recent QC results (Nanodrop, Qubit, Femto, Yield, and Decision Making).
|
|
7
|
+
|
|
8
|
+
Output: Table with columns:
|
|
9
|
+
|
|
10
|
+
1) taxon_id: [character] Tissue metadata. Origin: STS
|
|
11
|
+
2) eln_tissue_id: [character] Benchling ID for the tissue the extraction is derived from.
|
|
12
|
+
3) eln_tissue_prep_id: [character] Benchling ID for the tissue prep the extraction is derived from.
|
|
13
|
+
4) extraction_id: [character] DNA extraction entity ID (Benchling).
|
|
14
|
+
5) programme_id: [character] ToLID. Origin: BWH.
|
|
15
|
+
6) specimen_id: [character] Specimen ID. Origin: STS.
|
|
16
|
+
7) creation_date: [date] Date the container was created.
|
|
17
|
+
8) fluidx_container_id: [character] Primary key for the FluidX container.
|
|
18
|
+
9) fluidx_id: [character] FluidX barcode.
|
|
19
|
+
10) tube_type: [character] Type of tube/container.
|
|
20
|
+
11) volume_ul: [numeric] Volume in microliters (0 if archived as 'Retired' or 'Expended').
|
|
21
|
+
12) location: [character] Storage location name.
|
|
22
|
+
13) rack: [character] Box/rack barcode.
|
|
23
|
+
14) archive_purpose: [character] Reason for archiving the DNA extraction.
|
|
24
|
+
15) nanodrop_concentration_ngul: [numeric] Latest Nanodrop concentration (ng/µL).
|
|
25
|
+
16) dna_260_280_ratio: [numeric] Latest Nanodrop 260/280 ratio.
|
|
26
|
+
17) dna_260_230_ratio: [numeric] Latest Nanodrop 260/230 ratio.
|
|
27
|
+
18) qubit_concentration_ngul: [numeric] Latest Qubit concentration (ng/µL).
|
|
28
|
+
19) yield_ng: [numeric] Latest yield (ng).
|
|
29
|
+
20) femto_date_code: [character] Latest Femto date code.
|
|
30
|
+
21) femto_description: [character] Latest Femto profile description.
|
|
31
|
+
22) gqn_index: [numeric] Latest GQN index from Femto.
|
|
32
|
+
23) next_step: [character] Latest decision making next step.
|
|
33
|
+
24) extraction_qc_result: [character] Latest extraction QC result.
|
|
34
|
+
|
|
35
|
+
NOTES:
|
|
36
|
+
1) Only extractions from the 'ToL Core Lab' project and relevant folders are included.
|
|
37
|
+
2) Containers archived as 'Made in error' or with names matching '%Nuclei isolation and tagmentation%' are excluded.
|
|
38
|
+
3) Latest QC results are joined from their respective measurement tables.
|
|
39
|
+
4) Volume is set to 0 for archived/expended extractions.
|
|
40
|
+
5) Data types are preserved as in the Benchling Warehouse.
|
|
41
|
+
|
|
42
|
+
*/
|
|
43
|
+
|
|
44
|
+
WITH latest_nanodrop_conc AS (
|
|
45
|
+
SELECT
|
|
46
|
+
nanod.sample_id,
|
|
47
|
+
nanod.nanodrop_concentration_ngul,
|
|
48
|
+
nanod._260_280_ratio AS "dna_260_280_ratio",
|
|
49
|
+
nanod._260_230_ratio AS "dna_260_230_ratio"
|
|
50
|
+
FROM nanodrop_measurements_v2$raw AS nanod
|
|
51
|
+
WHERE nanod.created_at$ = (
|
|
52
|
+
SELECT MAX(sub.created_at$)
|
|
53
|
+
FROM nanodrop_measurements_v2$raw AS sub
|
|
54
|
+
WHERE sub.sample_id = nanod.sample_id
|
|
55
|
+
)
|
|
56
|
+
),
|
|
57
|
+
|
|
58
|
+
latest_qubit_conc AS (
|
|
59
|
+
SELECT
|
|
60
|
+
qbit.sample_id,
|
|
61
|
+
qbit.qubit_concentration_ngul
|
|
62
|
+
FROM qubit_measurements_v2$raw as qbit
|
|
63
|
+
WHERE qbit.created_at$ = (
|
|
64
|
+
SELECT MAX(sub.created_at$)
|
|
65
|
+
FROM qubit_measurements_v2$raw AS sub
|
|
66
|
+
WHERE sub.sample_id = qbit.sample_id
|
|
67
|
+
)
|
|
68
|
+
),
|
|
69
|
+
|
|
70
|
+
latest_yield AS (
|
|
71
|
+
SELECT
|
|
72
|
+
dnay.sample_id,
|
|
73
|
+
dnay.yield
|
|
74
|
+
FROM yield_v2$raw as dnay
|
|
75
|
+
WHERE dnay.created_at$ = (
|
|
76
|
+
SELECT MAX(sub.created_at$)
|
|
77
|
+
FROM yield_v2$raw AS sub
|
|
78
|
+
WHERE sub.sample_id = dnay.sample_id
|
|
79
|
+
)
|
|
80
|
+
),
|
|
81
|
+
|
|
82
|
+
latest_femto AS (
|
|
83
|
+
SELECT
|
|
84
|
+
femto.sample_id,
|
|
85
|
+
femto.femto_date_code,
|
|
86
|
+
femto.femto_profile_description AS femto_description,
|
|
87
|
+
femto.gqn_dnaex
|
|
88
|
+
FROM femto_dna_extract_v2$raw AS femto
|
|
89
|
+
WHERE femto.created_at$ = (
|
|
90
|
+
SELECT MAX(sub.created_at$)
|
|
91
|
+
FROM femto_dna_extract_v2$raw as sub
|
|
92
|
+
WHERE sub.sample_id = femto.sample_id
|
|
93
|
+
)
|
|
94
|
+
),
|
|
95
|
+
|
|
96
|
+
latest_decision_making AS (
|
|
97
|
+
SELECT
|
|
98
|
+
dnad.sample_id,
|
|
99
|
+
dnad.next_step,
|
|
100
|
+
qc_passfail AS extraction_qc_result
|
|
101
|
+
FROM dna_decision_making_v2$raw AS dnad
|
|
102
|
+
WHERE dnad.created_at$ = (
|
|
103
|
+
SELECT MAX(sub.created_at$)
|
|
104
|
+
FROM dna_decision_making_v2$raw AS sub
|
|
105
|
+
WHERE sub.sample_id = dnad.sample_id
|
|
106
|
+
)
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
SELECT DISTINCT
|
|
110
|
+
t.taxon_id,
|
|
111
|
+
t.id AS eln_tissue_id,
|
|
112
|
+
tp.id AS eln_tissue_prep_id,
|
|
113
|
+
dna.id AS extraction_id,
|
|
114
|
+
t.programme_id,
|
|
115
|
+
t.specimen_id,
|
|
116
|
+
DATE(con.created_at) AS creation_date,
|
|
117
|
+
con.id AS fluidx_container_id, -- primary key
|
|
118
|
+
con.barcode AS fluidx_id,
|
|
119
|
+
tube.type AS tube_type,
|
|
120
|
+
CASE
|
|
121
|
+
WHEN con.archive_purpose$ IN ('Retired', 'Expended') THEN 0 -- Retired or expended DNA extractions have a weight of 0
|
|
122
|
+
ELSE con.volume_si * 1000000
|
|
123
|
+
END AS volume_ul,
|
|
124
|
+
loc.name AS location,
|
|
125
|
+
box.barcode AS rack,
|
|
126
|
+
con.archive_purpose$ AS archive_purpose,
|
|
127
|
+
latest_nanodrop_conc.nanodrop_concentration_ngul,
|
|
128
|
+
latest_nanodrop_conc.dna_260_280_ratio,
|
|
129
|
+
latest_nanodrop_conc.dna_260_230_ratio,
|
|
130
|
+
latest_qubit_conc.qubit_concentration_ngul,
|
|
131
|
+
latest_yield.yield AS yield_ng,
|
|
132
|
+
latest_femto.femto_date_code,
|
|
133
|
+
latest_femto.femto_description,
|
|
134
|
+
latest_femto.gqn_dnaex AS gqn_index,
|
|
135
|
+
latest_decision_making.next_step,
|
|
136
|
+
latest_decision_making.extraction_qc_result
|
|
137
|
+
FROM dna_extract$raw AS dna
|
|
138
|
+
INNER JOIN container_content$raw AS cc -- Start of container/tube join
|
|
139
|
+
ON cc.entity_id = dna.id
|
|
140
|
+
LEFT JOIN container$raw AS con
|
|
141
|
+
ON con.id = cc.container_id
|
|
142
|
+
LEFT JOIN tube$raw AS tube
|
|
143
|
+
ON cc.container_id = tube.id -- End of container/tube join
|
|
144
|
+
LEFT JOIN box$raw AS box -- Location chunk
|
|
145
|
+
ON con.box_id = box.id
|
|
146
|
+
LEFT JOIN location$raw AS loc
|
|
147
|
+
ON loc.id = box.location_id -- End of location chunk
|
|
148
|
+
LEFT JOIN tissue_prep$raw AS tp
|
|
149
|
+
ON tp.id = dna.tissue_prep
|
|
150
|
+
LEFT JOIN tissue$raw AS t
|
|
151
|
+
ON t.id = tp.tissue
|
|
152
|
+
LEFT JOIN latest_nanodrop_conc -- Results chunk
|
|
153
|
+
ON dna.id = latest_nanodrop_conc.sample_id
|
|
154
|
+
LEFT JOIN latest_qubit_conc
|
|
155
|
+
ON dna.id = latest_qubit_conc.sample_id
|
|
156
|
+
LEFT JOIN latest_yield
|
|
157
|
+
ON dna.id = latest_yield.sample_id
|
|
158
|
+
LEFT JOIN latest_femto
|
|
159
|
+
ON dna.id = latest_femto.sample_id
|
|
160
|
+
LEFT JOIN latest_decision_making
|
|
161
|
+
ON dna.id = latest_decision_making.sample_id -- End Results chunk
|
|
162
|
+
LEFT JOIN folder$raw AS f
|
|
163
|
+
ON dna.folder_id$ = f.id
|
|
164
|
+
LEFT JOIN project$raw AS proj
|
|
165
|
+
ON dna.project_id$ = proj.id
|
|
166
|
+
LEFT JOIN registration_origin$raw AS reg
|
|
167
|
+
ON reg.entity_id = dna.id
|
|
168
|
+
LEFT JOIN entry$raw AS ent
|
|
169
|
+
ON reg.origin_entry_id = ent.id
|
|
170
|
+
WHERE proj.name = 'ToL Core Lab'
|
|
171
|
+
AND (f.name IN ('Routine Throughput', 'DNA', 'Core Lab Entities', 'Benchling MS Project Move') OR f.name IS NULL)
|
|
172
|
+
AND (con.archive_purpose$ != ('Made in error') OR con.archive_purpose$ IS NULL)
|
|
173
|
+
AND ent.name NOT LIKE '%Nuclei isolation and tagmentation%'
|
|
@@ -11,34 +11,20 @@ Output: Table with cols:
|
|
|
11
11
|
|
|
12
12
|
1) sts_id: [integer] Tissue metadata. Origin: STS
|
|
13
13
|
2) taxon_id: [character] Tissue metadata. Origin: STS
|
|
14
|
-
3) eln_tissue_id: [character] Benchling id for the tissue the
|
|
15
|
-
4) eln_tissue_prep_id: [character] Benchling id for the tissue prep the
|
|
14
|
+
3) eln_tissue_id: [character] Benchling id for the tissue the extraction is derived from.
|
|
15
|
+
4) eln_tissue_prep_id: [character] Benchling id for the tissue prep the extraction is derived from.
|
|
16
16
|
5) eln_file_registry_id: [character] id in Benchling Registry.
|
|
17
17
|
6) extraction_id: [character] Primary key.
|
|
18
18
|
7) programme_id: [character] ToLID. Origin: BWH
|
|
19
19
|
8) specimen_id: [character] Specimen ID. Origin: STS
|
|
20
20
|
9) completion_date: [date] Extraction date. This field coalesces created_at$ and created_on fields. Created_on is for bnt legacy data.
|
|
21
|
-
10) extraction_name: [character] Entity name.
|
|
22
|
-
11)
|
|
23
|
-
12)
|
|
24
|
-
13)
|
|
25
|
-
14)
|
|
26
|
-
15)
|
|
27
|
-
16)
|
|
28
|
-
17) extraction_protocol: [character] DNA extraction protocol as recorded at the time of extraction
|
|
29
|
-
18) tube_type: [character] Type of tube. Marked NULL or voucher.
|
|
30
|
-
19) extraction_type: [character] dna.
|
|
31
|
-
20) name: [character] Folder name.
|
|
32
|
-
21) archive_purpose: [character] Reason for archiving the DNA extraction.
|
|
33
|
-
22) nanodrop_concentration_ngul: [double] Concentration of DNA as measured by Nanodrop.
|
|
34
|
-
23) dna_260_280_ratio: [double] Ratio of absorbance at 260:280nm as measured by spectrophotometer.
|
|
35
|
-
24) dna_260_230_ratio: [double] Ratio of absorbance at 260:230nm as measured by spectrophotometer.
|
|
36
|
-
25) qubit_concentration_ngul: [double] Concentration of DNA as measured by Qubit.
|
|
37
|
-
26) yield_ng: [double] DNA yield after extraction.
|
|
38
|
-
27) femto_date_code: [character] Femto date code.
|
|
39
|
-
28) femto_description:[character] Categorical description of the femto pulse profile.
|
|
40
|
-
29) gqn_index: [character] Genomic Quality Number (GQN) index, calculated by the Femto software.
|
|
41
|
-
30) extraction_qc_result: [character] QC result: Yes = Extraction passed; No = Extraction failed.
|
|
21
|
+
10) extraction_name: [character] Entity name.
|
|
22
|
+
11) bnt_id: [character] Batches and Tracking legacy id.
|
|
23
|
+
12) manual_vs_automatic: [character] Extraction method indicator.
|
|
24
|
+
13) extraction_protocol: [character] DNA extraction protocol as recorded at the time of extraction.
|
|
25
|
+
14) extraction_type: [character] Type of extraction, set to 'dna'.
|
|
26
|
+
15) folder_name: [character] Folder name.
|
|
27
|
+
16) archive_purpose: [character] Reason for archiving the DNA extraction.
|
|
42
28
|
|
|
43
29
|
NOTES:
|
|
44
30
|
1) Data types were casted explicitly to conserved the data type stored in BWH.
|
|
@@ -46,71 +32,6 @@ NOTES:
|
|
|
46
32
|
|
|
47
33
|
*/
|
|
48
34
|
|
|
49
|
-
WITH latest_nanodrop_conc AS (
|
|
50
|
-
SELECT
|
|
51
|
-
nanod.sample_id,
|
|
52
|
-
nanod.nanodrop_concentration_ngul,
|
|
53
|
-
nanod._260_280_ratio AS "dna_260_280_ratio",
|
|
54
|
-
nanod._260_230_ratio AS "dna_260_230_ratio"
|
|
55
|
-
FROM nanodrop_measurements_v2$raw AS nanod
|
|
56
|
-
WHERE nanod.created_at$ = (
|
|
57
|
-
SELECT MAX(sub.created_at$)
|
|
58
|
-
FROM nanodrop_measurements_v2$raw AS sub
|
|
59
|
-
WHERE sub.sample_id = nanod.sample_id
|
|
60
|
-
)
|
|
61
|
-
),
|
|
62
|
-
|
|
63
|
-
latest_qubit_conc AS (
|
|
64
|
-
SELECT
|
|
65
|
-
qbit.sample_id,
|
|
66
|
-
qbit.qubit_concentration_ngul
|
|
67
|
-
FROM qubit_measurements_v2$raw as qbit
|
|
68
|
-
WHERE qbit.created_at$ = (
|
|
69
|
-
SELECT MAX(sub.created_at$)
|
|
70
|
-
FROM qubit_measurements_v2$raw AS sub
|
|
71
|
-
WHERE sub.sample_id = qbit.sample_id
|
|
72
|
-
)
|
|
73
|
-
),
|
|
74
|
-
|
|
75
|
-
latest_yield AS (
|
|
76
|
-
SELECT
|
|
77
|
-
dnay.sample_id,
|
|
78
|
-
dnay.yield
|
|
79
|
-
FROM yield_v2$raw as dnay
|
|
80
|
-
WHERE dnay.created_at$ = (
|
|
81
|
-
SELECT MAX(sub.created_at$)
|
|
82
|
-
FROM yield_v2$raw AS sub
|
|
83
|
-
WHERE sub.sample_id = dnay.sample_id
|
|
84
|
-
)
|
|
85
|
-
),
|
|
86
|
-
|
|
87
|
-
latest_femto AS (
|
|
88
|
-
SELECT
|
|
89
|
-
femto.sample_id,
|
|
90
|
-
femto.femto_date_code,
|
|
91
|
-
femto.femto_profile_description AS femto_description,
|
|
92
|
-
femto.gqn_dnaex
|
|
93
|
-
FROM femto_dna_extract_v2$raw AS femto
|
|
94
|
-
WHERE femto.created_at$ = (
|
|
95
|
-
SELECT MAX(sub.created_at$)
|
|
96
|
-
FROM femto_dna_extract_v2$raw as sub
|
|
97
|
-
WHERE sub.sample_id = femto.sample_id
|
|
98
|
-
)
|
|
99
|
-
),
|
|
100
|
-
|
|
101
|
-
latest_decision_making AS (
|
|
102
|
-
SELECT
|
|
103
|
-
dnad.sample_id,
|
|
104
|
-
dnad.next_step,
|
|
105
|
-
qc_passfail AS extraction_qc_result
|
|
106
|
-
FROM dna_decision_making_v2$raw AS dnad
|
|
107
|
-
WHERE dnad.created_at$ = (
|
|
108
|
-
SELECT MAX(sub.created_at$)
|
|
109
|
-
FROM dna_decision_making_v2$raw AS sub
|
|
110
|
-
WHERE sub.sample_id = dnad.sample_id
|
|
111
|
-
)
|
|
112
|
-
)
|
|
113
|
-
|
|
114
35
|
SELECT DISTINCT
|
|
115
36
|
t.sts_id,
|
|
116
37
|
t.taxon_id,
|
|
@@ -122,62 +43,26 @@ SELECT DISTINCT
|
|
|
122
43
|
t.specimen_id,
|
|
123
44
|
COALESCE(DATE(dna.created_on), DATE(dna.created_at$)) AS completion_date, -- Homogenising BnT and Benchling dates
|
|
124
45
|
dna.name$ AS extraction_name,
|
|
125
|
-
con.barcode AS fluidx_id,
|
|
126
|
-
con.id AS fluidx_container_id,
|
|
127
|
-
CASE
|
|
128
|
-
WHEN con.archive_purpose$ IN ('Retired', 'Expended') THEN 0 -- Retired or expended DNA extractions have a weight of 0
|
|
129
|
-
ELSE con.volume_si * 1000000
|
|
130
|
-
END AS volume_ul,
|
|
131
|
-
loc.name AS location,
|
|
132
|
-
box.barcode AS rack,
|
|
133
46
|
dna.bt_id AS bnt_id,
|
|
134
|
-
|
|
47
|
+
dna.manual_vs_automatic AS manual_vs_automatic,
|
|
135
48
|
dna.extraction_protocol,
|
|
136
|
-
tube.type AS tube_type,
|
|
137
49
|
'dna'::varchar AS extraction_type,
|
|
138
|
-
f.name
|
|
139
|
-
latest_nanodrop_conc.nanodrop_concentration_ngul,
|
|
140
|
-
latest_nanodrop_conc.dna_260_280_ratio,
|
|
141
|
-
latest_nanodrop_conc.dna_260_230_ratio,
|
|
142
|
-
latest_qubit_conc.qubit_concentration_ngul,
|
|
143
|
-
latest_yield.yield AS yield_ng,
|
|
144
|
-
latest_femto.femto_date_code,
|
|
145
|
-
latest_femto.femto_description,
|
|
146
|
-
latest_femto.gqn_dnaex AS gqn_index,
|
|
147
|
-
latest_decision_making.next_step,
|
|
148
|
-
latest_decision_making.extraction_qc_result
|
|
50
|
+
f.name AS folder_name
|
|
149
51
|
FROM dna_extract$raw AS dna
|
|
150
|
-
LEFT JOIN container_content$raw AS cc
|
|
151
|
-
ON cc.entity_id = dna.id
|
|
152
|
-
LEFT JOIN container$raw AS con
|
|
153
|
-
ON con.id = cc.container_id
|
|
154
52
|
LEFT JOIN tissue_prep$raw AS tp
|
|
155
53
|
ON tp.id = dna.tissue_prep
|
|
156
54
|
LEFT JOIN tissue$raw AS t
|
|
157
55
|
ON t.id = tp.tissue
|
|
158
|
-
LEFT JOIN tube$raw AS tube
|
|
159
|
-
ON cc.container_id = tube.id
|
|
160
56
|
LEFT JOIN folder$raw AS f
|
|
161
57
|
ON dna.folder_id$ = f.id
|
|
162
58
|
LEFT JOIN project$raw AS proj
|
|
163
59
|
ON dna.project_id$ = proj.id
|
|
164
|
-
LEFT JOIN
|
|
165
|
-
|
|
166
|
-
LEFT JOIN
|
|
167
|
-
|
|
168
|
-
LEFT JOIN latest_yield
|
|
169
|
-
ON dna.id = latest_yield.sample_id
|
|
170
|
-
LEFT JOIN latest_femto
|
|
171
|
-
ON dna.id = latest_femto.sample_id
|
|
172
|
-
LEFT JOIN latest_decision_making
|
|
173
|
-
ON dna.id = latest_decision_making.sample_id -- End Results chunk
|
|
174
|
-
LEFT JOIN box$raw AS box -- Location chunk
|
|
175
|
-
ON con.box_id = box.id
|
|
176
|
-
LEFT JOIN location$raw AS loc
|
|
177
|
-
ON loc.id = box.location_id -- End of location chunk
|
|
60
|
+
LEFT JOIN registration_origin$raw AS reg
|
|
61
|
+
ON reg.entity_id = dna.id
|
|
62
|
+
LEFT JOIN entry$raw AS ent
|
|
63
|
+
ON reg.origin_entry_id = ent.id
|
|
178
64
|
WHERE proj.name = 'ToL Core Lab'
|
|
179
65
|
AND (f.name IN ('Routine Throughput', 'DNA', 'Core Lab Entities', 'Benchling MS Project Move') OR f.name IS NULL)
|
|
180
66
|
AND (dna.archive_purpose$ != ('Made in error') OR dna.archive_purpose$ IS NULL)
|
|
181
|
-
AND
|
|
182
|
-
AND con.barcode NOT LIKE 'CON%'
|
|
67
|
+
AND ent.name NOT LIKE '%Nuclei isolation and tagmentation%'
|
|
183
68
|
ORDER BY completion_date DESC
|
|
@@ -28,8 +28,8 @@ SELECT DISTINCT
|
|
|
28
28
|
tp.name$ AS eln_tissue_prep_name,
|
|
29
29
|
ssid.sanger_sample_id,
|
|
30
30
|
ssid.sanger_sample_id AS extraction_id,
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
sub_con.barcode AS fluidx_id,
|
|
32
|
+
sub_con.id AS fluidx_container_id,
|
|
33
33
|
DATE(tpsub.submitted_submission_date) AS completion_date,
|
|
34
34
|
'lres'::varchar AS extraction_type
|
|
35
35
|
FROM tissue_prep$raw AS tp
|
|
@@ -41,8 +41,16 @@ LEFT JOIN container$raw AS c
|
|
|
41
41
|
ON cc.container_id = c.id
|
|
42
42
|
LEFT JOIN tissue_prep_submission_workflow_output$raw AS tpsub
|
|
43
43
|
ON c.id = tpsub.sample_tube_id
|
|
44
|
+
LEFT JOIN container$raw AS sub_con
|
|
45
|
+
ON tpsub.sample_tube_id = sub_con.id
|
|
44
46
|
LEFT JOIN storage$raw AS stor
|
|
45
47
|
ON c.location_id = stor.id
|
|
46
48
|
LEFT JOIN sanger_sample_id$raw AS ssid
|
|
47
49
|
ON c.id = ssid.sample_tube
|
|
48
|
-
|
|
50
|
+
LEFT JOIN project$raw AS proj
|
|
51
|
+
ON tp.project_id$ = proj.id
|
|
52
|
+
LEFT JOIN folder$raw AS f
|
|
53
|
+
ON tp.folder_id$ = f.id
|
|
54
|
+
WHERE sub_con.id IS NOT NULL
|
|
55
|
+
AND proj.name = 'ToL Core Lab'
|
|
56
|
+
AND f.name = 'Sample Prep'
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
# SPDX-License-Identifier: MIT
|
|
4
4
|
|
|
5
5
|
from abc import ABC, abstractmethod
|
|
6
|
+
from dataclasses import dataclass
|
|
6
7
|
from typing import Iterable
|
|
7
8
|
|
|
8
9
|
from more_itertools import flatten
|
|
@@ -95,9 +96,14 @@ class DefaultDataObjectToDataObjectConverter(DataObjectToDataObjectOrUpdateConve
|
|
|
95
96
|
|
|
96
97
|
class SanitisingConverter(DataObjectToDataObjectOrUpdateConverter):
|
|
97
98
|
|
|
99
|
+
@dataclass(slots=True, frozen=True, kw_only=True)
|
|
100
|
+
class Config:
|
|
101
|
+
pass
|
|
102
|
+
|
|
98
103
|
def __init__(
|
|
99
104
|
self,
|
|
100
105
|
data_object_factory: DataObjectFactory,
|
|
106
|
+
config: Config,
|
|
101
107
|
**kwargs
|
|
102
108
|
):
|
|
103
109
|
super().__init__(data_object_factory)
|
tol/excel/excel_datasource.py
CHANGED
|
@@ -65,7 +65,7 @@ class ExcelDataSource(
|
|
|
65
65
|
) -> Iterable[DataObject]:
|
|
66
66
|
|
|
67
67
|
return (
|
|
68
|
-
self.__marshal_row(row_index +
|
|
68
|
+
self.__marshal_row(row_index + 2, row) # Add 1 for header, 1 for 1-based ID
|
|
69
69
|
for row_index, row
|
|
70
70
|
in self.__df.iterrows()
|
|
71
71
|
)
|
|
@@ -115,6 +115,14 @@ class ExcelDataSource(
|
|
|
115
115
|
__v: Any,
|
|
116
116
|
) -> Any:
|
|
117
117
|
|
|
118
|
+
# Convert pandas Timestamp to Python datetime
|
|
119
|
+
if isinstance(__v, pd.Timestamp):
|
|
120
|
+
__v = datetime.fromtimestamp(__v.timestamp())
|
|
121
|
+
|
|
122
|
+
# If float and is whole number, convert to int
|
|
123
|
+
if isinstance(__v, float) and __v.is_integer():
|
|
124
|
+
__v = int(__v)
|
|
125
|
+
|
|
118
126
|
if __k not in self.__mappings:
|
|
119
127
|
return __v
|
|
120
128
|
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
|
|
4
4
|
import re
|
|
5
5
|
from dataclasses import dataclass
|
|
6
|
+
from datetime import datetime
|
|
6
7
|
from typing import Iterable
|
|
7
8
|
|
|
8
9
|
from tol.core import DataObject, DataObjectToDataObjectOrUpdateConverter
|
|
@@ -30,8 +31,10 @@ class IncomingSampleToEnaSampleConverter(DataObjectToDataObjectOrUpdateConverter
|
|
|
30
31
|
s = data_object
|
|
31
32
|
attributes = {
|
|
32
33
|
'ENA-CHECKLIST': self.__config.ena_checklist_id,
|
|
33
|
-
'organism part': self.
|
|
34
|
-
|
|
34
|
+
'organism part': self.__join_list([
|
|
35
|
+
self.__replace_underscores(v)
|
|
36
|
+
for v in s.attributes.get('ORGANISM_PART', [])
|
|
37
|
+
]),
|
|
35
38
|
'lifestage': (
|
|
36
39
|
'spore-bearing structure'
|
|
37
40
|
if s.attributes.get('LIFESTAGE') == 'SPORE_BEARING_STRUCTURE'
|
|
@@ -40,35 +43,38 @@ class IncomingSampleToEnaSampleConverter(DataObjectToDataObjectOrUpdateConverter
|
|
|
40
43
|
),
|
|
41
44
|
'project name':
|
|
42
45
|
self.__config.project_name,
|
|
43
|
-
'
|
|
44
|
-
self.__replace_underscores(
|
|
45
|
-
|
|
46
|
+
'collected_by': self.__join_list([
|
|
47
|
+
self.__replace_underscores(v)
|
|
48
|
+
for v in s.attributes.get('COLLECTED_BY', [])
|
|
49
|
+
]),
|
|
46
50
|
'collection date':
|
|
47
|
-
self.
|
|
48
|
-
s.attributes.get('DATE_OF_COLLECTION'))
|
|
51
|
+
self.__format_date(
|
|
52
|
+
s.attributes.get('DATE_OF_COLLECTION')),
|
|
49
53
|
'geographic location (country and/or sea)':
|
|
50
54
|
self.__collection_country(s).replace('_', ' '),
|
|
51
55
|
'geographic location (latitude)':
|
|
52
56
|
self.__replace_underscores(
|
|
53
|
-
s.attributes.get('DECIMAL_LATITUDE')).lower(),
|
|
57
|
+
str(s.attributes.get('DECIMAL_LATITUDE'))).lower(),
|
|
54
58
|
'geographic location (latitude) units':
|
|
55
59
|
'DD',
|
|
56
60
|
'geographic location (longitude)':
|
|
57
61
|
self.__replace_underscores(
|
|
58
|
-
s.attributes.get('DECIMAL_LONGITUDE')).lower(),
|
|
62
|
+
str(s.attributes.get('DECIMAL_LONGITUDE'))).lower(),
|
|
59
63
|
'geographic location (longitude) units':
|
|
60
64
|
'DD',
|
|
61
65
|
'geographic location (region and locality)':
|
|
62
66
|
self.__collection_region(s).replace('_', ' '),
|
|
63
|
-
'identified_by':
|
|
64
|
-
self.__replace_underscores(
|
|
65
|
-
|
|
67
|
+
'identified_by': self.__join_list([
|
|
68
|
+
self.__replace_underscores(v)
|
|
69
|
+
for v in s.attributes.get('IDENTIFIED_BY', [])
|
|
70
|
+
]),
|
|
66
71
|
'habitat':
|
|
67
72
|
self.__replace_underscores(
|
|
68
73
|
s.attributes.get('HABITAT')),
|
|
69
|
-
'identifier_affiliation':
|
|
70
|
-
self.__replace_underscores(
|
|
71
|
-
|
|
74
|
+
'identifier_affiliation': self.__join_list([
|
|
75
|
+
self.__replace_underscores(v)
|
|
76
|
+
for v in s.attributes.get('IDENTIFIER_AFFILIATION', [])
|
|
77
|
+
]),
|
|
72
78
|
'sex':
|
|
73
79
|
self.__replace_underscores(
|
|
74
80
|
s.attributes.get('SEX')),
|
|
@@ -77,9 +83,10 @@ class IncomingSampleToEnaSampleConverter(DataObjectToDataObjectOrUpdateConverter
|
|
|
77
83
|
s.attributes.get('RELATIONSHIP')),
|
|
78
84
|
'SYMBIONT':
|
|
79
85
|
'Y' if s.attributes.get('SYMBIONT') == 'SYMBIONT' else 'N',
|
|
80
|
-
'collecting institution':
|
|
81
|
-
self.__replace_underscores(
|
|
82
|
-
|
|
86
|
+
'collecting institution': self.__join_list([
|
|
87
|
+
self.__replace_underscores(v)
|
|
88
|
+
for v in s.attributes.get('COLLECTOR_AFFILIATION', [])
|
|
89
|
+
]),
|
|
83
90
|
}
|
|
84
91
|
if self.__sanitise(s.attributes.get('DEPTH')) != '':
|
|
85
92
|
attributes['geographic location (depth)'] = s.attributes.get('DEPTH')
|
|
@@ -88,9 +95,11 @@ class IncomingSampleToEnaSampleConverter(DataObjectToDataObjectOrUpdateConverter
|
|
|
88
95
|
attributes['geographic location (elevation)'] = s.attributes.get('ELEVATION')
|
|
89
96
|
attributes['geographic location (elevation) units'] = 'm'
|
|
90
97
|
if self.__sanitise(s.attributes.get('ORIGINAL_COLLECTION_DATE')) != '':
|
|
91
|
-
attributes['original collection date'] =
|
|
98
|
+
attributes['original collection date'] = \
|
|
99
|
+
self.__format_date(s.attributes.get('ORIGINAL_COLLECTION_DATE'))
|
|
92
100
|
if self.__sanitise(s.attributes.get('ORIGINAL_GEOGRAPHIC_LOCATION')) != '':
|
|
93
|
-
attributes['original geographic location'] =
|
|
101
|
+
attributes['original geographic location'] = \
|
|
102
|
+
self.__replace_underscores(s.attributes.get('ORIGINAL_GEOGRAPHIC_LOCATION'))
|
|
94
103
|
if s.attributes.get('GAL') is not None:
|
|
95
104
|
attributes['GAL'] = s.attributes.get('GAL')
|
|
96
105
|
if s.attributes.get('VOUCHER_ID') is not None:
|
|
@@ -103,7 +112,7 @@ class IncomingSampleToEnaSampleConverter(DataObjectToDataObjectOrUpdateConverter
|
|
|
103
112
|
attributes['culture_or_strain_id'] = s.attributes.get('CULTURE_OR_STRAIN_ID')
|
|
104
113
|
|
|
105
114
|
ret = self._data_object_factory(
|
|
106
|
-
|
|
115
|
+
data_object.type,
|
|
107
116
|
s.id,
|
|
108
117
|
attributes=attributes,
|
|
109
118
|
)
|
|
@@ -128,3 +137,19 @@ class IncomingSampleToEnaSampleConverter(DataObjectToDataObjectOrUpdateConverter
|
|
|
128
137
|
if value is None:
|
|
129
138
|
return default_value
|
|
130
139
|
return value
|
|
140
|
+
|
|
141
|
+
def __join_list(self, value_list):
|
|
142
|
+
if value_list is None:
|
|
143
|
+
return ''
|
|
144
|
+
if not isinstance(value_list, list):
|
|
145
|
+
return str(value_list)
|
|
146
|
+
return ' | '.join(str(v) for v in value_list)
|
|
147
|
+
|
|
148
|
+
def __format_date(self, value):
|
|
149
|
+
"""Format date to YYYY-mm-dd format"""
|
|
150
|
+
if value is None:
|
|
151
|
+
return ''
|
|
152
|
+
if isinstance(value, datetime):
|
|
153
|
+
return value.strftime('%Y-%m-%d')
|
|
154
|
+
|
|
155
|
+
return str(value)
|
tol/validators/__init__.py
CHANGED
|
@@ -15,6 +15,7 @@ from .regex_by_value import RegexByValueValidator # noqa
|
|
|
15
15
|
from .specimens_have_same_taxon import SpecimensHaveSameTaxonValidator # noqa
|
|
16
16
|
from .sts_fields import StsFieldsValidator # noqa
|
|
17
17
|
from .tolid import TolidValidator # noqa
|
|
18
|
+
from .types import TypesValidator # noqa
|
|
18
19
|
from .unique_values import UniqueValuesValidator # noqa
|
|
19
20
|
from .unique_whole_organisms import UniqueWholeOrganismsValidator # noqa
|
|
20
21
|
from .interfaces import Condition # noqa
|
|
@@ -19,12 +19,12 @@ class ConverterAndValidateValidator(Validator):
|
|
|
19
19
|
"converters": [{
|
|
20
20
|
"module": "<path.to.module>",
|
|
21
21
|
"class_name": "<path.to.ConverterClass>",
|
|
22
|
-
"
|
|
22
|
+
"config_details": { ... }
|
|
23
23
|
}],
|
|
24
24
|
"validators": [{
|
|
25
25
|
"module": "<path.to.module>",
|
|
26
26
|
"class_name": "<path.to.ValidatorClass>",
|
|
27
|
-
"
|
|
27
|
+
"config_details": { ... }
|
|
28
28
|
}]
|
|
29
29
|
}
|
|
30
30
|
|
|
@@ -54,7 +54,7 @@ class ConverterAndValidateValidator(Validator):
|
|
|
54
54
|
converter_class = getattr(__module, conv.get('class_name'))
|
|
55
55
|
|
|
56
56
|
converter_conf = converter_class.Config(
|
|
57
|
-
**conv.get('
|
|
57
|
+
**conv.get('config_details')
|
|
58
58
|
)
|
|
59
59
|
self.__converters.append(converter_class(
|
|
60
60
|
data_object_factory=data_object_factory,
|
|
@@ -65,7 +65,7 @@ class ConverterAndValidateValidator(Validator):
|
|
|
65
65
|
validator_class = getattr(__module, val.get('class_name'))
|
|
66
66
|
|
|
67
67
|
validator_conf = validator_class.Config(
|
|
68
|
-
**val.get('
|
|
68
|
+
**val.get('config_details')
|
|
69
69
|
)
|
|
70
70
|
self.__validators.append(validator_class(
|
|
71
71
|
data_object_factory=data_object_factory,
|
tol/validators/ena_checklist.py
CHANGED
|
@@ -27,31 +27,31 @@ class EnaChecklistValidator(Validator):
|
|
|
27
27
|
super().__init__()
|
|
28
28
|
self.__config = config
|
|
29
29
|
self._datasource = datasource
|
|
30
|
+
self.__ena_checklist = datasource.get_one(
|
|
31
|
+
'checklist',
|
|
32
|
+
self.__config.ena_checklist_id
|
|
33
|
+
).checklist
|
|
30
34
|
|
|
31
35
|
def _validate_data_object(self, obj: DataObject) -> None:
|
|
32
|
-
|
|
33
|
-
ena_checklist = ena_datasource.get_one('checklist', self.__config.ena_checklist_id)
|
|
34
|
-
|
|
35
|
-
validations = ena_checklist.attributes['checklist']
|
|
36
|
-
for key in validations:
|
|
36
|
+
for key, validation in self.__ena_checklist.items():
|
|
37
37
|
field_name = key
|
|
38
|
-
if 'field' in
|
|
39
|
-
field_name =
|
|
40
|
-
if 'mandatory' in
|
|
38
|
+
if 'field' in validation:
|
|
39
|
+
field_name = validation['field']
|
|
40
|
+
if 'mandatory' in validation and key not in obj.attributes:
|
|
41
41
|
self.add_error(object_id=obj.id, detail='Must be given', field=[field_name])
|
|
42
42
|
continue
|
|
43
|
-
if 'mandatory' in
|
|
43
|
+
if 'mandatory' in validation and obj.attributes[key] is None:
|
|
44
44
|
self.add_error(object_id=obj.id, detail='Must be given', field=[field_name])
|
|
45
45
|
continue
|
|
46
|
-
if 'mandatory' in
|
|
46
|
+
if 'mandatory' in validation and obj.attributes.get(key) == '':
|
|
47
47
|
self.add_error(
|
|
48
48
|
object_id=obj.id,
|
|
49
49
|
detail='Must not be empty', field=[field_name]
|
|
50
50
|
)
|
|
51
51
|
|
|
52
|
-
if 'restricted text' in
|
|
53
|
-
for condition in
|
|
54
|
-
if
|
|
52
|
+
if 'restricted text' in validation and key in obj.attributes:
|
|
53
|
+
for condition in validation:
|
|
54
|
+
if isinstance(condition, str) and '(' in condition:
|
|
55
55
|
regex = condition
|
|
56
56
|
compiled_re = re.compile(regex)
|
|
57
57
|
if not compiled_re.search(obj.attributes.get(key)):
|
|
@@ -61,9 +61,9 @@ class EnaChecklistValidator(Validator):
|
|
|
61
61
|
)
|
|
62
62
|
|
|
63
63
|
# Check against allowed values
|
|
64
|
-
if 'text choice' in
|
|
65
|
-
for condition in
|
|
66
|
-
if
|
|
64
|
+
if 'text choice' in validation and key in obj.attributes:
|
|
65
|
+
for condition in validation:
|
|
66
|
+
if isinstance(condition, list):
|
|
67
67
|
allowed_values = condition
|
|
68
68
|
if obj.attributes.get(key).lower() not in \
|
|
69
69
|
[x.lower() for x in allowed_values]:
|
tol/validators/regex.py
CHANGED
|
@@ -26,9 +26,9 @@ class Regex:
|
|
|
26
26
|
|
|
27
27
|
def is_allowed(self, __v: Any) -> bool:
|
|
28
28
|
# Check regex
|
|
29
|
-
return bool(re.search(
|
|
29
|
+
return __v is None or __v == '' or bool(re.search(
|
|
30
30
|
self.regex,
|
|
31
|
-
str(__v)
|
|
31
|
+
str(__v)
|
|
32
32
|
))
|
|
33
33
|
|
|
34
34
|
|
|
@@ -90,7 +90,6 @@ class RegexValidator(Validator):
|
|
|
90
90
|
obj: DataObject,
|
|
91
91
|
c: Regex,
|
|
92
92
|
) -> None:
|
|
93
|
-
|
|
94
93
|
if c.is_error:
|
|
95
94
|
self.add_error(
|
|
96
95
|
object_id=obj.id,
|
tol/validators/sts_fields.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
# SPDX-License-Identifier: MIT
|
|
4
4
|
|
|
5
5
|
from dataclasses import dataclass
|
|
6
|
+
from datetime import datetime, timedelta
|
|
6
7
|
from typing import List
|
|
7
8
|
|
|
8
9
|
from tol.core import DataObject, DataSource
|
|
@@ -51,39 +52,178 @@ class StsFieldsValidator(Validator):
|
|
|
51
52
|
obj: DataObject
|
|
52
53
|
) -> None:
|
|
53
54
|
for field in self.__fields.values():
|
|
55
|
+
# Ignore inactive fields
|
|
56
|
+
if field.get('status') == 'Inactive':
|
|
57
|
+
continue
|
|
54
58
|
# Get the value from the data object
|
|
55
59
|
field_value = obj.get_field_by_name(field.get('data_input_key'))
|
|
56
|
-
if
|
|
60
|
+
if isinstance(field_value, list):
|
|
61
|
+
field_value = ' | '.join(str(v) for v in field_value)
|
|
62
|
+
|
|
63
|
+
# mandatory_input fields must be present
|
|
64
|
+
if field.get('mandatory_input') and field.get('data_input_key') not in obj.attributes:
|
|
57
65
|
self.add_error(
|
|
58
66
|
object_id=obj.id,
|
|
59
67
|
detail=f'Field {field.get("data_input_key")} is required '
|
|
60
68
|
f'for project {self.__config.project_code}',
|
|
61
69
|
field=field.get('data_input_key'),
|
|
62
70
|
)
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
)
|
|
72
|
-
elif field.get('min') and field_value < field.get('min'):
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
# Skip further validations if validation is not mandatory
|
|
74
|
+
if not field.get('mandatory_validation'):
|
|
75
|
+
continue
|
|
76
|
+
|
|
77
|
+
# Mandatory validation fields must have a value
|
|
78
|
+
if field_value is None or field_value == '':
|
|
73
79
|
self.add_error(
|
|
74
80
|
object_id=obj.id,
|
|
75
|
-
detail=f'Field {field.get("data_input_key")} value '
|
|
76
|
-
f'
|
|
77
|
-
f'"{field.get("min")}" for project '
|
|
78
|
-
f'{self.__config.project_code}',
|
|
81
|
+
detail=f'Field {field.get("data_input_key")} is required to have a value '
|
|
82
|
+
f'for project {self.__config.project_code}',
|
|
79
83
|
field=field.get('data_input_key'),
|
|
80
84
|
)
|
|
81
|
-
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
# Allowed values
|
|
88
|
+
if field.get('allowed_values'):
|
|
89
|
+
allowed_values = [
|
|
90
|
+
value.get('value') for value in field.get('allowed_values', [])
|
|
91
|
+
]
|
|
92
|
+
if field_value not in allowed_values:
|
|
93
|
+
self.add_error(
|
|
94
|
+
object_id=obj.id,
|
|
95
|
+
detail=f'Field {field.get("data_input_key")} value '
|
|
96
|
+
f'"{field_value}" not found in allowed values '
|
|
97
|
+
f'{allowed_values} for project '
|
|
98
|
+
f'{self.__config.project_code}',
|
|
99
|
+
field=field.get('data_input_key'),
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
if field.get('type') in ['String', 'TextArea']:
|
|
103
|
+
self.__validate_string(obj, field, field_value)
|
|
104
|
+
|
|
105
|
+
if field.get('type') in ['Integer', 'Decimal', 'Percentage']:
|
|
106
|
+
self.__validate_number(obj, field, field_value)
|
|
107
|
+
|
|
108
|
+
if field.get('type') in ['Boolean']:
|
|
109
|
+
self.__validate_boolean(obj, field, field_value)
|
|
110
|
+
|
|
111
|
+
if field.get('type') in ['Date']:
|
|
112
|
+
self.__validate_date(obj, field, field_value)
|
|
113
|
+
|
|
114
|
+
def __validate_string(
|
|
115
|
+
self,
|
|
116
|
+
obj: DataObject,
|
|
117
|
+
field: dict,
|
|
118
|
+
field_value: str | int | float | None
|
|
119
|
+
) -> None:
|
|
120
|
+
# Check type is a string
|
|
121
|
+
# if not isinstance(field_value, str):
|
|
122
|
+
# self.add_error(
|
|
123
|
+
# object_id=obj.id,
|
|
124
|
+
# detail=f'Field {field.get("data_input_key")} value '
|
|
125
|
+
# f'"{field_value}" is not a string for project '
|
|
126
|
+
# f'{self.__config.project_code}',
|
|
127
|
+
# field=field.get('data_input_key'),
|
|
128
|
+
# )
|
|
129
|
+
# return
|
|
130
|
+
|
|
131
|
+
# Min/Max validations for string
|
|
132
|
+
if field.get('min') and len(field_value) < field.get('min'):
|
|
133
|
+
self.add_error(
|
|
134
|
+
object_id=obj.id,
|
|
135
|
+
detail=f'Field {field.get("data_input_key")} value '
|
|
136
|
+
f'"{field_value}" is shorter than minimum length '
|
|
137
|
+
f'"{field.get("min")}" for project '
|
|
138
|
+
f'{self.__config.project_code}',
|
|
139
|
+
field=field.get('data_input_key'),
|
|
140
|
+
)
|
|
141
|
+
if field.get('max') and len(field_value) > field.get('max'):
|
|
142
|
+
self.add_error(
|
|
143
|
+
object_id=obj.id,
|
|
144
|
+
detail=f'Field {field.get("data_input_key")} value '
|
|
145
|
+
f'"{field_value}" is longer than maximum length '
|
|
146
|
+
f'"{field.get("max")}" for project '
|
|
147
|
+
f'{self.__config.project_code}',
|
|
148
|
+
field=field.get('data_input_key'),
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
def __validate_number(
|
|
152
|
+
self,
|
|
153
|
+
obj: DataObject,
|
|
154
|
+
field: dict,
|
|
155
|
+
field_value: str | int | float | None
|
|
156
|
+
) -> None:
|
|
157
|
+
# Check type is a number
|
|
158
|
+
if not isinstance(field_value, (int, float)):
|
|
159
|
+
self.add_error(
|
|
160
|
+
object_id=obj.id,
|
|
161
|
+
detail=f'Field {field.get("data_input_key")} value '
|
|
162
|
+
f'"{field_value}" is not a number for project '
|
|
163
|
+
f'{self.__config.project_code}',
|
|
164
|
+
field=field.get('data_input_key'),
|
|
165
|
+
)
|
|
166
|
+
return
|
|
167
|
+
|
|
168
|
+
# Min/Max validations for number
|
|
169
|
+
if field.get('min') is not None and field_value < field.get('min'):
|
|
170
|
+
self.add_error(
|
|
171
|
+
object_id=obj.id,
|
|
172
|
+
detail=f'Field {field.get("data_input_key")} value '
|
|
173
|
+
f'"{field_value}" is less than minimum value '
|
|
174
|
+
f'"{field.get("min")}" for project '
|
|
175
|
+
f'{self.__config.project_code}',
|
|
176
|
+
field=field.get('data_input_key'),
|
|
177
|
+
)
|
|
178
|
+
if field.get('max') is not None and field_value > field.get('max'):
|
|
179
|
+
self.add_error(
|
|
180
|
+
object_id=obj.id,
|
|
181
|
+
detail=f'Field {field.get("data_input_key")} value '
|
|
182
|
+
f'"{field_value}" is greater than maximum value '
|
|
183
|
+
f'"{field.get("max")}" for project '
|
|
184
|
+
f'{self.__config.project_code}',
|
|
185
|
+
field=field.get('data_input_key'),
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
def __validate_boolean(
|
|
189
|
+
self,
|
|
190
|
+
obj: DataObject,
|
|
191
|
+
field: dict,
|
|
192
|
+
field_value: str | int | float | None
|
|
193
|
+
) -> None:
|
|
194
|
+
# Check type is a boolean
|
|
195
|
+
if field_value not in ['Y', 'N']:
|
|
196
|
+
self.add_error(
|
|
197
|
+
object_id=obj.id,
|
|
198
|
+
detail=f'Field {field.get("data_input_key")} value '
|
|
199
|
+
f'"{field_value}" is not a boolean (Y/N) for project '
|
|
200
|
+
f'{self.__config.project_code}',
|
|
201
|
+
field=field.get('data_input_key'),
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
def __validate_date(
|
|
205
|
+
self,
|
|
206
|
+
obj: DataObject,
|
|
207
|
+
field: dict,
|
|
208
|
+
field_value: str | int | float | None
|
|
209
|
+
) -> None:
|
|
210
|
+
if not isinstance(field_value, datetime):
|
|
211
|
+
self.add_error(
|
|
212
|
+
object_id=obj.id,
|
|
213
|
+
detail=f'Field {field.get("data_input_key")} value '
|
|
214
|
+
f'"{field_value}" is not a date string for project '
|
|
215
|
+
f'{self.__config.project_code}',
|
|
216
|
+
field=field.get('data_input_key'),
|
|
217
|
+
)
|
|
218
|
+
return
|
|
219
|
+
if field.get('range_limit'):
|
|
220
|
+
earliest_date = datetime.now() - timedelta(days=field.get('min'))
|
|
221
|
+
latest_date = datetime.now() + timedelta(days=field.get('max'))
|
|
222
|
+
if field_value < earliest_date or field_value > latest_date:
|
|
82
223
|
self.add_error(
|
|
83
224
|
object_id=obj.id,
|
|
84
225
|
detail=f'Field {field.get("data_input_key")} value '
|
|
85
|
-
f'"{field_value}" is
|
|
86
|
-
f'
|
|
87
|
-
f'{self.__config.project_code}',
|
|
226
|
+
f'"{field_value}" is not within the allowed date '
|
|
227
|
+
f'range for project {self.__config.project_code}',
|
|
88
228
|
field=field.get('data_input_key'),
|
|
89
229
|
)
|
tol/validators/tolid.py
CHANGED
|
@@ -103,7 +103,7 @@ class TolidValidator(Validator):
|
|
|
103
103
|
|
|
104
104
|
if str(obj.get_field_by_name(self.__config.species_id_field)) not in taxons:
|
|
105
105
|
self.add_error(
|
|
106
|
-
object_id=obj.id,
|
|
106
|
+
object_id=obj.id + 1,
|
|
107
107
|
detail=f'Specimen ID {specimen_id} does not match Taxon ID '
|
|
108
108
|
f'{obj.get_field_by_name(self.__config.species_id_field)}'
|
|
109
109
|
'in TolID source',
|
tol/validators/types.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2025 Genome Research Ltd.
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from datetime import datetime, time
|
|
7
|
+
|
|
8
|
+
from tol.core import DataObject
|
|
9
|
+
from tol.core.validate import Validator
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TypesValidator(Validator):
|
|
13
|
+
"""
|
|
14
|
+
Validates an incoming stream of `DataObject` instances,
|
|
15
|
+
ensuring that they only have attributes of the given
|
|
16
|
+
allowed keys.
|
|
17
|
+
"""
|
|
18
|
+
@dataclass(slots=True, frozen=True, kw_only=True)
|
|
19
|
+
class Config:
|
|
20
|
+
allowed_types: dict[str, str]
|
|
21
|
+
is_error: bool = True
|
|
22
|
+
detail: str = 'Value is of incorrect type'
|
|
23
|
+
|
|
24
|
+
__slots__ = ['__config']
|
|
25
|
+
__config: Config
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
config: Config,
|
|
30
|
+
**kwargs
|
|
31
|
+
) -> None:
|
|
32
|
+
|
|
33
|
+
super().__init__()
|
|
34
|
+
self.__config = config
|
|
35
|
+
|
|
36
|
+
def _validate_data_object(
|
|
37
|
+
self,
|
|
38
|
+
obj: DataObject
|
|
39
|
+
) -> None:
|
|
40
|
+
|
|
41
|
+
type_map = {
|
|
42
|
+
'str': str,
|
|
43
|
+
'int': int,
|
|
44
|
+
'float': float,
|
|
45
|
+
'bool': bool,
|
|
46
|
+
'list': list,
|
|
47
|
+
'dict': dict,
|
|
48
|
+
'datetime': datetime,
|
|
49
|
+
'time': time
|
|
50
|
+
}
|
|
51
|
+
for key, expected_type in self.__config.allowed_types.items():
|
|
52
|
+
if key in obj.attributes:
|
|
53
|
+
actual_value = obj.get_field_by_name(key)
|
|
54
|
+
if actual_value is None:
|
|
55
|
+
continue
|
|
56
|
+
type_class = type_map.get(expected_type)
|
|
57
|
+
if type_class and not isinstance(actual_value, type_class):
|
|
58
|
+
self.__add_result(
|
|
59
|
+
obj,
|
|
60
|
+
key,
|
|
61
|
+
detail=f'Field {key} value "{actual_value}" is not of type '
|
|
62
|
+
f'"{expected_type}"',
|
|
63
|
+
)
|
|
64
|
+
if type_class and isinstance(actual_value, type_class):
|
|
65
|
+
# Special case for bool since isinstance(True, int) is True
|
|
66
|
+
if expected_type == 'int' and isinstance(actual_value, bool):
|
|
67
|
+
self.__add_result(
|
|
68
|
+
obj,
|
|
69
|
+
key,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
def __add_result(
|
|
73
|
+
self,
|
|
74
|
+
obj: DataObject,
|
|
75
|
+
key: str,
|
|
76
|
+
detail: str = None,
|
|
77
|
+
) -> None:
|
|
78
|
+
|
|
79
|
+
if self.__config.is_error:
|
|
80
|
+
self.add_error(
|
|
81
|
+
object_id=obj.id,
|
|
82
|
+
detail=detail or self.__config.detail,
|
|
83
|
+
field=key,
|
|
84
|
+
)
|
|
85
|
+
else:
|
|
86
|
+
self.add_warning(
|
|
87
|
+
object_id=obj.id,
|
|
88
|
+
detail=self.__config.detail,
|
|
89
|
+
field=key,
|
|
90
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tol-sdk
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.8.1
|
|
4
4
|
Summary: SDK for interaction with ToL, Sanger and external services
|
|
5
5
|
Author-email: ToL Platforms Team <tol-platforms@sanger.ac.uk>
|
|
6
6
|
License: MIT
|
|
@@ -50,6 +50,8 @@ Requires-Dist: openpyxl>=3.0.10; extra == "sheets"
|
|
|
50
50
|
Requires-Dist: XlsxWriter==3.1.9; extra == "sheets"
|
|
51
51
|
Requires-Dist: xlrd==2.0.1; extra == "sheets"
|
|
52
52
|
Requires-Dist: gspread>=5.12.0; extra == "sheets"
|
|
53
|
+
Provides-Extra: s3
|
|
54
|
+
Requires-Dist: minio==7.2.15; extra == "s3"
|
|
53
55
|
Provides-Extra: all
|
|
54
56
|
Requires-Dist: tol-sdk[api-base]; extra == "all"
|
|
55
57
|
Requires-Dist: tol-sdk[benchling]; extra == "all"
|
|
@@ -47,8 +47,9 @@ tol/benchling/benchling_converter.py,sha256=CO7BMvMAM52uIFjQWZFedRve0XNSmC9YtumJ
|
|
|
47
47
|
tol/benchling/benchling_datasource.py,sha256=aHoGOJYX_dsL3G-9lXlY0bQQl4pMXf4a852sAkl-sKs,35112
|
|
48
48
|
tol/benchling/benchling_warehouse_datasource.py,sha256=opsdvHz8l06NTmt84HrIgUJxV_DsurVgFtGs3_5PMoM,4635
|
|
49
49
|
tol/benchling/sql/__init__.py,sha256=4LbvDIZOOG7p-ebbvivP7NvrJeApUvGEIcDL58ahQJE,85
|
|
50
|
-
tol/benchling/sql/
|
|
51
|
-
tol/benchling/sql/
|
|
50
|
+
tol/benchling/sql/extraction_containers_dna.sql,sha256=YRQ0W1d-BjXB9gcMpf5ZyjHbPVp2VU0KkYi4e0JvYtA,6680
|
|
51
|
+
tol/benchling/sql/extraction_extraction_type_dna.sql,sha256=UvxboWBoXXp7RHUdRKNiQTS-AXdLdz8bFEXCS6q9SoE,3094
|
|
52
|
+
tol/benchling/sql/extraction_extraction_type_lres.sql,sha256=7Y6a8v0V-jjU5Kg3czuZjcPLvGSfrnUDekpHo2mUgnc,1556
|
|
52
53
|
tol/benchling/sql/extraction_extraction_type_pooled_dna.sql,sha256=fNjCJPaViGrR6D8sLwZK2Zg5LqQqh16HB0s7ZeqTqdg,4480
|
|
53
54
|
tol/benchling/sql/extraction_extraction_type_rna.sql,sha256=Vy3uV_ns4uO9CwuOFo1KPhI0yK6dsSO47wObcwJbHXQ,3861
|
|
54
55
|
tol/benchling/sql/pacbio_prep.sql,sha256=a3dR-kcp8fT3ZZkbX8pV4StnweWGvcVl4fZNMVNCvbQ,4070
|
|
@@ -89,7 +90,7 @@ tol/core/attribute_metadata.py,sha256=wYD3NXDdStrpkUZoyTUiEpp7c14f7MLIcyooT1G4GE
|
|
|
89
90
|
tol/core/core_converter.py,sha256=Gn4J507BtqDjnOWV2MFRYGz8YElJAKQItmnCrD72s7k,4504
|
|
90
91
|
tol/core/data_loader.py,sha256=k-ET1nIohIz6PcADbEn9Y7k9TupoiBYxKDkcAl_9pGY,14710
|
|
91
92
|
tol/core/data_object.py,sha256=GxG04JMcICaiHU1rufkhoD8jb9YQLhE0QWlFU2ZkQsM,4241
|
|
92
|
-
tol/core/data_object_converter.py,sha256=
|
|
93
|
+
tol/core/data_object_converter.py,sha256=GESpLvwrAEwmCfBwy3GxcSCuHz0xt7ECCBPE2stxBdI,3927
|
|
93
94
|
tol/core/data_source_attribute_metadata.py,sha256=NHvJ_Gmw7-Oej1MoFCohvq4f6emDJ2HF483UmW2Qd_c,4407
|
|
94
95
|
tol/core/data_source_dict.py,sha256=d-hSmoWTwG6IOc0cQTLap1EBslsxYIWGUd3ScSoeH_Q,1705
|
|
95
96
|
tol/core/datasource.py,sha256=e9GaeDPfO_Gs7cgQhmNxCiSDlRNf64reegzFebcMNkA,6303
|
|
@@ -142,7 +143,7 @@ tol/ena/filter.py,sha256=UzOx5ivXvA0TY2QuNzFmS-zDPVNnaAx07DMVkAwVsAE,3370
|
|
|
142
143
|
tol/ena/parser.py,sha256=Z4YmUnpfLKng4QwmZkLEj1hUfwYb_bqr-DWgF1Gw-EY,3253
|
|
143
144
|
tol/excel/__init__.py,sha256=M0xL9w9Au8kYOLWzFGuijJ7WoZENOMkZ1XV1ephhlDY,229
|
|
144
145
|
tol/excel/excel.py,sha256=rcA-wfXY9R14OfNKS-NX2sn__9gmQ_G8LoUgWseF1Gk,2124
|
|
145
|
-
tol/excel/excel_datasource.py,sha256=
|
|
146
|
+
tol/excel/excel_datasource.py,sha256=WhkqIk4Qg-iDBczI4l1OFoLNDX32riwgj4SdGhr4DIs,3423
|
|
146
147
|
tol/excel/s3_factory.py,sha256=4lGyKrSvarPXWndyvm7K-tel0FoM0My8wnz-Mzwt0yQ,1245
|
|
147
148
|
tol/flows/__init__.py,sha256=M7iSvnBJs6fJ8M38cW0bYQa9WW0TN8FHAMjIHPDNAJ4,166
|
|
148
149
|
tol/flows/logger.py,sha256=rWXbaknGcPEZRFvC1CiB1qkhFRZsQk435w7VyJ3cpyw,170
|
|
@@ -180,7 +181,7 @@ tol/flows/converters/gap_assembly_to_elastic_assembly_converter.py,sha256=XK-es-
|
|
|
180
181
|
tol/flows/converters/genome_notes_genome_note_to_elastic_genome_note_converter.py,sha256=AaUWbVTaWU-NXnUQPaPwI41TE7a-nC4zlg-jrWpPT2s,1166
|
|
181
182
|
tol/flows/converters/goat_taxon_to_elastic_species_converter.py,sha256=1NGs9427OdXGsBaMB467nOF7aTlJsUKYCuoSoABw9L4,1074
|
|
182
183
|
tol/flows/converters/grit_issue_to_elastic_curation_converter.py,sha256=XpRpoRn589MxTqEk6zPWGn6tamJiqY9Ctxk8v0q-dvA,3953
|
|
183
|
-
tol/flows/converters/incoming_sample_to_ena_sample_converter.py,sha256=
|
|
184
|
+
tol/flows/converters/incoming_sample_to_ena_sample_converter.py,sha256=SAVYWENG3GS7B1rM6rYwxfLQH75nZl7mEzphH5CBxRw,6353
|
|
184
185
|
tol/flows/converters/incoming_sample_to_incoming_sample_with_lists_converter.py,sha256=5Fp1_ojsYqvRcKTgXJbyWqetPisi_vtWFcWr6RtGZoA,1504
|
|
185
186
|
tol/flows/converters/informatics_tolid_to_elastic_tolid_converter.py,sha256=VrvtsDTPlc5Xa3K4rcAMHwV4n71zOH7q5EfALLLQ1tI,587
|
|
186
187
|
tol/flows/converters/labwhere_location_to_elastic_sample_update_converter.py,sha256=NJNmG9sCc2WXc-2J5XfCKXhb2sDH82nZUBekd16PHcw,656
|
|
@@ -321,28 +322,29 @@ tol/treeval/treeval_datasource.py,sha256=GzY6JwH67b5QdV-UVdCFJfgGAIuZ96J2nl53YxZ
|
|
|
321
322
|
tol/utils/__init__.py,sha256=764-Na1OaNGUDWpMIu51ZtXG7n_nB5MccUFK6LmkWRI,138
|
|
322
323
|
tol/utils/csv.py,sha256=mihww25fSn72c4h-RFeqD_pFIG6KHZP4v1_C0rx81ws,421
|
|
323
324
|
tol/utils/s3.py,sha256=aoYCwJ-qcMqFrpxmViFqPa0O1jgp0phtztO3-0CSNjw,491
|
|
324
|
-
tol/validators/__init__.py,sha256=
|
|
325
|
+
tol/validators/__init__.py,sha256=QI5ykFzsTLsIQXcL4vF_aaVGdSr2l0X0Qkssbnxumss,1176
|
|
325
326
|
tol/validators/allowed_keys.py,sha256=RJcHBiguL84B8hjSRaXLNES21yZqaKFwJNp2Tz9zvh0,1506
|
|
326
327
|
tol/validators/allowed_values.py,sha256=-Yy3Sqo1WYacGKlot_dn3M2o7Oj5MXOioJrJmrWCCxs,1536
|
|
327
328
|
tol/validators/allowed_values_from_datasource.py,sha256=ICFO6FcYXDN7M2Cv1OwpyN38CdhmY7oU-njzIatA3-w,3185
|
|
328
329
|
tol/validators/assert_on_condition.py,sha256=eBGgSVfIQ6e45SheM-ZDg7daXJjyZxRVS5L8AWvbXag,2027
|
|
329
|
-
tol/validators/converter_and_validate.py,sha256=
|
|
330
|
-
tol/validators/ena_checklist.py,sha256=
|
|
330
|
+
tol/validators/converter_and_validate.py,sha256=O1uYdrU4YDZ8eZjb7Koots4-8fMVOkJFXESg-LVw2o8,2992
|
|
331
|
+
tol/validators/ena_checklist.py,sha256=M10VAFGpaxnm7rWO4jmFhTWkYRlCmU0Ox2IUEDFGKbo,2812
|
|
331
332
|
tol/validators/ena_submittable.py,sha256=CujF9t4mA4N3Wm_5rA5MRp401aW19kbioOZpfWVXg6I,1965
|
|
332
333
|
tol/validators/min_one_valid_value.py,sha256=gZUHtfRA-Lvpw0d1FJoAA31cRJpLbbxAJCC9DCt5lCY,1442
|
|
333
334
|
tol/validators/mutually_exclusive.py,sha256=6blZK-2IY4Eq79fHKKrm-pxsQ6B5DNH5ldtxOFVCPhU,4492
|
|
334
|
-
tol/validators/regex.py,sha256=
|
|
335
|
+
tol/validators/regex.py,sha256=dLAi_vQt9_DsT6wQZmbYC7X5-Wp15l0leUE6XkPaItg,2602
|
|
335
336
|
tol/validators/regex_by_value.py,sha256=XM5EnT4vgD17rfpR3bUE9I56IemSw26BI9MZtMakd4E,2582
|
|
336
337
|
tol/validators/specimens_have_same_taxon.py,sha256=m2LLRIZMdhPj1fzyioDJOraI6UHXgy1l963xhezgk7E,2177
|
|
337
|
-
tol/validators/sts_fields.py,sha256=
|
|
338
|
-
tol/validators/tolid.py,sha256=
|
|
338
|
+
tol/validators/sts_fields.py,sha256=aYbzy15btEg4-ocDT1qrspe7-atoWRrOJ_KmuPU6J14,8936
|
|
339
|
+
tol/validators/tolid.py,sha256=yODebLYbKtlem3IpVcv8XImvq90r-AK68asH9JEawqo,3897
|
|
340
|
+
tol/validators/types.py,sha256=KDBNqx5isJG5XI1l2V9Wmi9135ZwDace3MU6Qij3J6E,2612
|
|
339
341
|
tol/validators/unique_values.py,sha256=o5IrfUNLEmlEp8kpInTtFnTq-FqiHSC9TItKdf-LI1o,3114
|
|
340
342
|
tol/validators/unique_whole_organisms.py,sha256=RdqA1GzIf3LTdrmNGGdxv0aW2udDY2P9EaqZb40hhik,5735
|
|
341
343
|
tol/validators/interfaces/__init__.py,sha256=jtOxnwnwqV_29xjmmMcS_kvlt-pQiWwQYJn2YRP07_w,172
|
|
342
344
|
tol/validators/interfaces/condition_evaluator.py,sha256=nj8Cb8hi47OBy6OVNfeLhF-Pjwtr8MiOSymYL6hfVes,3766
|
|
343
|
-
tol_sdk-1.
|
|
344
|
-
tol_sdk-1.
|
|
345
|
-
tol_sdk-1.
|
|
346
|
-
tol_sdk-1.
|
|
347
|
-
tol_sdk-1.
|
|
348
|
-
tol_sdk-1.
|
|
345
|
+
tol_sdk-1.8.1.dist-info/licenses/LICENSE,sha256=RF9Jacy-9BpUAQQ20INhTgtaNBkmdTolYCHtrrkM2-8,1077
|
|
346
|
+
tol_sdk-1.8.1.dist-info/METADATA,sha256=wXZU-uS5CIiiWYa0EGwD8YiK2Z9eOJz5x78yTUPFMmk,3142
|
|
347
|
+
tol_sdk-1.8.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
348
|
+
tol_sdk-1.8.1.dist-info/entry_points.txt,sha256=jH3HfTwxjzog7E3lq8CKpUWGIRY9FSXbyL6CpUmv6D0,36
|
|
349
|
+
tol_sdk-1.8.1.dist-info/top_level.txt,sha256=PwKMQLphyZNvagBoriVbl8uwHXQl8IC1niawVG0iXMM,10
|
|
350
|
+
tol_sdk-1.8.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|