tol-sdk 1.8.0__py3-none-any.whl → 1.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tol/benchling/sql/extraction_containers_dna.sql +173 -0
- tol/benchling/sql/extraction_extraction_type_dna.sql +16 -131
- tol/benchling/sql/extraction_extraction_type_lres.sql +11 -3
- tol/excel/excel_datasource.py +4 -0
- tol/validators/sts_fields.py +151 -17
- tol/validators/types.py +4 -1
- {tol_sdk-1.8.0.dist-info → tol_sdk-1.8.1.dist-info}/METADATA +1 -1
- {tol_sdk-1.8.0.dist-info → tol_sdk-1.8.1.dist-info}/RECORD +12 -11
- {tol_sdk-1.8.0.dist-info → tol_sdk-1.8.1.dist-info}/WHEEL +0 -0
- {tol_sdk-1.8.0.dist-info → tol_sdk-1.8.1.dist-info}/entry_points.txt +0 -0
- {tol_sdk-1.8.0.dist-info → tol_sdk-1.8.1.dist-info}/licenses/LICENSE +0 -0
- {tol_sdk-1.8.0.dist-info → tol_sdk-1.8.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
/*
|
|
2
|
+
## SQL Query: DNA Extraction Containers (Benchling Warehouse)
|
|
3
|
+
|
|
4
|
+
This SQL query retrieves detailed information about DNA extraction containers managed by the ToL Core Laboratory, including metadata, container details, and the latest QC measurements.
|
|
5
|
+
|
|
6
|
+
The resulting table includes identifiers for tissues, tissue preps, extractions, containers, and locations, as well as the most recent QC results (Nanodrop, Qubit, Femto, Yield, and Decision Making).
|
|
7
|
+
|
|
8
|
+
Output: Table with columns:
|
|
9
|
+
|
|
10
|
+
1) taxon_id: [character] Tissue metadata. Origin: STS
|
|
11
|
+
2) eln_tissue_id: [character] Benchling ID for the tissue the extraction is derived from.
|
|
12
|
+
3) eln_tissue_prep_id: [character] Benchling ID for the tissue prep the extraction is derived from.
|
|
13
|
+
4) extraction_id: [character] DNA extraction entity ID (Benchling).
|
|
14
|
+
5) programme_id: [character] ToLID. Origin: BWH.
|
|
15
|
+
6) specimen_id: [character] Specimen ID. Origin: STS.
|
|
16
|
+
7) creation_date: [date] Date the container was created.
|
|
17
|
+
8) fluidx_container_id: [character] Primary key for the FluidX container.
|
|
18
|
+
9) fluidx_id: [character] FluidX barcode.
|
|
19
|
+
10) tube_type: [character] Type of tube/container.
|
|
20
|
+
11) volume_ul: [numeric] Volume in microliters (0 if archived as 'Retired' or 'Expended').
|
|
21
|
+
12) location: [character] Storage location name.
|
|
22
|
+
13) rack: [character] Box/rack barcode.
|
|
23
|
+
14) archive_purpose: [character] Reason for archiving the DNA extraction.
|
|
24
|
+
15) nanodrop_concentration_ngul: [numeric] Latest Nanodrop concentration (ng/µL).
|
|
25
|
+
16) dna_260_280_ratio: [numeric] Latest Nanodrop 260/280 ratio.
|
|
26
|
+
17) dna_260_230_ratio: [numeric] Latest Nanodrop 260/230 ratio.
|
|
27
|
+
18) qubit_concentration_ngul: [numeric] Latest Qubit concentration (ng/µL).
|
|
28
|
+
19) yield_ng: [numeric] Latest yield (ng).
|
|
29
|
+
20) femto_date_code: [character] Latest Femto date code.
|
|
30
|
+
21) femto_description: [character] Latest Femto profile description.
|
|
31
|
+
22) gqn_index: [numeric] Latest GQN index from Femto.
|
|
32
|
+
23) next_step: [character] Latest decision making next step.
|
|
33
|
+
24) extraction_qc_result: [character] Latest extraction QC result.
|
|
34
|
+
|
|
35
|
+
NOTES:
|
|
36
|
+
1) Only extractions from the 'ToL Core Lab' project and relevant folders are included.
|
|
37
|
+
2) Containers archived as 'Made in error' or with names matching '%Nuclei isolation and tagmentation%' are excluded.
|
|
38
|
+
3) Latest QC results are joined from their respective measurement tables.
|
|
39
|
+
4) Volume is set to 0 for archived/expended extractions.
|
|
40
|
+
5) Data types are preserved as in the Benchling Warehouse.
|
|
41
|
+
|
|
42
|
+
*/
|
|
43
|
+
|
|
44
|
+
WITH latest_nanodrop_conc AS (
|
|
45
|
+
SELECT
|
|
46
|
+
nanod.sample_id,
|
|
47
|
+
nanod.nanodrop_concentration_ngul,
|
|
48
|
+
nanod._260_280_ratio AS "dna_260_280_ratio",
|
|
49
|
+
nanod._260_230_ratio AS "dna_260_230_ratio"
|
|
50
|
+
FROM nanodrop_measurements_v2$raw AS nanod
|
|
51
|
+
WHERE nanod.created_at$ = (
|
|
52
|
+
SELECT MAX(sub.created_at$)
|
|
53
|
+
FROM nanodrop_measurements_v2$raw AS sub
|
|
54
|
+
WHERE sub.sample_id = nanod.sample_id
|
|
55
|
+
)
|
|
56
|
+
),
|
|
57
|
+
|
|
58
|
+
latest_qubit_conc AS (
|
|
59
|
+
SELECT
|
|
60
|
+
qbit.sample_id,
|
|
61
|
+
qbit.qubit_concentration_ngul
|
|
62
|
+
FROM qubit_measurements_v2$raw as qbit
|
|
63
|
+
WHERE qbit.created_at$ = (
|
|
64
|
+
SELECT MAX(sub.created_at$)
|
|
65
|
+
FROM qubit_measurements_v2$raw AS sub
|
|
66
|
+
WHERE sub.sample_id = qbit.sample_id
|
|
67
|
+
)
|
|
68
|
+
),
|
|
69
|
+
|
|
70
|
+
latest_yield AS (
|
|
71
|
+
SELECT
|
|
72
|
+
dnay.sample_id,
|
|
73
|
+
dnay.yield
|
|
74
|
+
FROM yield_v2$raw as dnay
|
|
75
|
+
WHERE dnay.created_at$ = (
|
|
76
|
+
SELECT MAX(sub.created_at$)
|
|
77
|
+
FROM yield_v2$raw AS sub
|
|
78
|
+
WHERE sub.sample_id = dnay.sample_id
|
|
79
|
+
)
|
|
80
|
+
),
|
|
81
|
+
|
|
82
|
+
latest_femto AS (
|
|
83
|
+
SELECT
|
|
84
|
+
femto.sample_id,
|
|
85
|
+
femto.femto_date_code,
|
|
86
|
+
femto.femto_profile_description AS femto_description,
|
|
87
|
+
femto.gqn_dnaex
|
|
88
|
+
FROM femto_dna_extract_v2$raw AS femto
|
|
89
|
+
WHERE femto.created_at$ = (
|
|
90
|
+
SELECT MAX(sub.created_at$)
|
|
91
|
+
FROM femto_dna_extract_v2$raw as sub
|
|
92
|
+
WHERE sub.sample_id = femto.sample_id
|
|
93
|
+
)
|
|
94
|
+
),
|
|
95
|
+
|
|
96
|
+
latest_decision_making AS (
|
|
97
|
+
SELECT
|
|
98
|
+
dnad.sample_id,
|
|
99
|
+
dnad.next_step,
|
|
100
|
+
qc_passfail AS extraction_qc_result
|
|
101
|
+
FROM dna_decision_making_v2$raw AS dnad
|
|
102
|
+
WHERE dnad.created_at$ = (
|
|
103
|
+
SELECT MAX(sub.created_at$)
|
|
104
|
+
FROM dna_decision_making_v2$raw AS sub
|
|
105
|
+
WHERE sub.sample_id = dnad.sample_id
|
|
106
|
+
)
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
SELECT DISTINCT
|
|
110
|
+
t.taxon_id,
|
|
111
|
+
t.id AS eln_tissue_id,
|
|
112
|
+
tp.id AS eln_tissue_prep_id,
|
|
113
|
+
dna.id AS extraction_id,
|
|
114
|
+
t.programme_id,
|
|
115
|
+
t.specimen_id,
|
|
116
|
+
DATE(con.created_at) AS creation_date,
|
|
117
|
+
con.id AS fluidx_container_id, -- primary key
|
|
118
|
+
con.barcode AS fluidx_id,
|
|
119
|
+
tube.type AS tube_type,
|
|
120
|
+
CASE
|
|
121
|
+
WHEN con.archive_purpose$ IN ('Retired', 'Expended') THEN 0 -- Retired or expended DNA extractions have a weight of 0
|
|
122
|
+
ELSE con.volume_si * 1000000
|
|
123
|
+
END AS volume_ul,
|
|
124
|
+
loc.name AS location,
|
|
125
|
+
box.barcode AS rack,
|
|
126
|
+
con.archive_purpose$ AS archive_purpose,
|
|
127
|
+
latest_nanodrop_conc.nanodrop_concentration_ngul,
|
|
128
|
+
latest_nanodrop_conc.dna_260_280_ratio,
|
|
129
|
+
latest_nanodrop_conc.dna_260_230_ratio,
|
|
130
|
+
latest_qubit_conc.qubit_concentration_ngul,
|
|
131
|
+
latest_yield.yield AS yield_ng,
|
|
132
|
+
latest_femto.femto_date_code,
|
|
133
|
+
latest_femto.femto_description,
|
|
134
|
+
latest_femto.gqn_dnaex AS gqn_index,
|
|
135
|
+
latest_decision_making.next_step,
|
|
136
|
+
latest_decision_making.extraction_qc_result
|
|
137
|
+
FROM dna_extract$raw AS dna
|
|
138
|
+
INNER JOIN container_content$raw AS cc -- Start of container/tube join
|
|
139
|
+
ON cc.entity_id = dna.id
|
|
140
|
+
LEFT JOIN container$raw AS con
|
|
141
|
+
ON con.id = cc.container_id
|
|
142
|
+
LEFT JOIN tube$raw AS tube
|
|
143
|
+
ON cc.container_id = tube.id -- End of container/tube join
|
|
144
|
+
LEFT JOIN box$raw AS box -- Location chunk
|
|
145
|
+
ON con.box_id = box.id
|
|
146
|
+
LEFT JOIN location$raw AS loc
|
|
147
|
+
ON loc.id = box.location_id -- End of location chunk
|
|
148
|
+
LEFT JOIN tissue_prep$raw AS tp
|
|
149
|
+
ON tp.id = dna.tissue_prep
|
|
150
|
+
LEFT JOIN tissue$raw AS t
|
|
151
|
+
ON t.id = tp.tissue
|
|
152
|
+
LEFT JOIN latest_nanodrop_conc -- Results chunk
|
|
153
|
+
ON dna.id = latest_nanodrop_conc.sample_id
|
|
154
|
+
LEFT JOIN latest_qubit_conc
|
|
155
|
+
ON dna.id = latest_qubit_conc.sample_id
|
|
156
|
+
LEFT JOIN latest_yield
|
|
157
|
+
ON dna.id = latest_yield.sample_id
|
|
158
|
+
LEFT JOIN latest_femto
|
|
159
|
+
ON dna.id = latest_femto.sample_id
|
|
160
|
+
LEFT JOIN latest_decision_making
|
|
161
|
+
ON dna.id = latest_decision_making.sample_id -- End Results chunk
|
|
162
|
+
LEFT JOIN folder$raw AS f
|
|
163
|
+
ON dna.folder_id$ = f.id
|
|
164
|
+
LEFT JOIN project$raw AS proj
|
|
165
|
+
ON dna.project_id$ = proj.id
|
|
166
|
+
LEFT JOIN registration_origin$raw AS reg
|
|
167
|
+
ON reg.entity_id = dna.id
|
|
168
|
+
LEFT JOIN entry$raw AS ent
|
|
169
|
+
ON reg.origin_entry_id = ent.id
|
|
170
|
+
WHERE proj.name = 'ToL Core Lab'
|
|
171
|
+
AND (f.name IN ('Routine Throughput', 'DNA', 'Core Lab Entities', 'Benchling MS Project Move') OR f.name IS NULL)
|
|
172
|
+
AND (con.archive_purpose$ != ('Made in error') OR con.archive_purpose$ IS NULL)
|
|
173
|
+
AND ent.name NOT LIKE '%Nuclei isolation and tagmentation%'
|
|
@@ -11,34 +11,20 @@ Output: Table with cols:
|
|
|
11
11
|
|
|
12
12
|
1) sts_id: [integer] Tissue metadata. Origin: STS
|
|
13
13
|
2) taxon_id: [character] Tissue metadata. Origin: STS
|
|
14
|
-
3) eln_tissue_id: [character] Benchling id for the tissue the
|
|
15
|
-
4) eln_tissue_prep_id: [character] Benchling id for the tissue prep the
|
|
14
|
+
3) eln_tissue_id: [character] Benchling id for the tissue the extraction is derived from.
|
|
15
|
+
4) eln_tissue_prep_id: [character] Benchling id for the tissue prep the extraction is derived from.
|
|
16
16
|
5) eln_file_registry_id: [character] id in Benchling Registry.
|
|
17
17
|
6) extraction_id: [character] Primary key.
|
|
18
18
|
7) programme_id: [character] ToLID. Origin: BWH
|
|
19
19
|
8) specimen_id: [character] Specimen ID. Origin: STS
|
|
20
20
|
9) completion_date: [date] Extraction date. This field coalesces created_at$ and created_on fields. Created_on is for bnt legacy data.
|
|
21
|
-
10) extraction_name: [character] Entity name.
|
|
22
|
-
11)
|
|
23
|
-
12)
|
|
24
|
-
13)
|
|
25
|
-
14)
|
|
26
|
-
15)
|
|
27
|
-
16)
|
|
28
|
-
17) extraction_protocol: [character] DNA extraction protocol as recorded at the time of extraction
|
|
29
|
-
18) tube_type: [character] Type of tube. Marked NULL or voucher.
|
|
30
|
-
19) extraction_type: [character] dna.
|
|
31
|
-
20) name: [character] Folder name.
|
|
32
|
-
21) archive_purpose: [character] Reason for archiving the DNA extraction.
|
|
33
|
-
22) nanodrop_concentration_ngul: [double] Concentration of DNA as measured by Nanodrop.
|
|
34
|
-
23) dna_260_280_ratio: [double] Ratio of absorbance at 260:280nm as measured by spectrophotometer.
|
|
35
|
-
24) dna_260_230_ratio: [double] Ratio of absorbance at 260:230nm as measured by spectrophotometer.
|
|
36
|
-
25) qubit_concentration_ngul: [double] Concentration of DNA as measured by Qubit.
|
|
37
|
-
26) yield_ng: [double] DNA yield after extraction.
|
|
38
|
-
27) femto_date_code: [character] Femto date code.
|
|
39
|
-
28) femto_description:[character] Categorical description of the femto pulse profile.
|
|
40
|
-
29) gqn_index: [character] Genomic Quality Number (GQN) index, calculated by the Femto software.
|
|
41
|
-
30) extraction_qc_result: [character] QC result: Yes = Extraction passed; No = Extraction failed.
|
|
21
|
+
10) extraction_name: [character] Entity name.
|
|
22
|
+
11) bnt_id: [character] Batches and Tracking legacy id.
|
|
23
|
+
12) manual_vs_automatic: [character] Extraction method indicator.
|
|
24
|
+
13) extraction_protocol: [character] DNA extraction protocol as recorded at the time of extraction.
|
|
25
|
+
14) extraction_type: [character] Type of extraction, set to 'dna'.
|
|
26
|
+
15) folder_name: [character] Folder name.
|
|
27
|
+
16) archive_purpose: [character] Reason for archiving the DNA extraction.
|
|
42
28
|
|
|
43
29
|
NOTES:
|
|
44
30
|
1) Data types were casted explicitly to conserved the data type stored in BWH.
|
|
@@ -46,71 +32,6 @@ NOTES:
|
|
|
46
32
|
|
|
47
33
|
*/
|
|
48
34
|
|
|
49
|
-
WITH latest_nanodrop_conc AS (
|
|
50
|
-
SELECT
|
|
51
|
-
nanod.sample_id,
|
|
52
|
-
nanod.nanodrop_concentration_ngul,
|
|
53
|
-
nanod._260_280_ratio AS "dna_260_280_ratio",
|
|
54
|
-
nanod._260_230_ratio AS "dna_260_230_ratio"
|
|
55
|
-
FROM nanodrop_measurements_v2$raw AS nanod
|
|
56
|
-
WHERE nanod.created_at$ = (
|
|
57
|
-
SELECT MAX(sub.created_at$)
|
|
58
|
-
FROM nanodrop_measurements_v2$raw AS sub
|
|
59
|
-
WHERE sub.sample_id = nanod.sample_id
|
|
60
|
-
)
|
|
61
|
-
),
|
|
62
|
-
|
|
63
|
-
latest_qubit_conc AS (
|
|
64
|
-
SELECT
|
|
65
|
-
qbit.sample_id,
|
|
66
|
-
qbit.qubit_concentration_ngul
|
|
67
|
-
FROM qubit_measurements_v2$raw as qbit
|
|
68
|
-
WHERE qbit.created_at$ = (
|
|
69
|
-
SELECT MAX(sub.created_at$)
|
|
70
|
-
FROM qubit_measurements_v2$raw AS sub
|
|
71
|
-
WHERE sub.sample_id = qbit.sample_id
|
|
72
|
-
)
|
|
73
|
-
),
|
|
74
|
-
|
|
75
|
-
latest_yield AS (
|
|
76
|
-
SELECT
|
|
77
|
-
dnay.sample_id,
|
|
78
|
-
dnay.yield
|
|
79
|
-
FROM yield_v2$raw as dnay
|
|
80
|
-
WHERE dnay.created_at$ = (
|
|
81
|
-
SELECT MAX(sub.created_at$)
|
|
82
|
-
FROM yield_v2$raw AS sub
|
|
83
|
-
WHERE sub.sample_id = dnay.sample_id
|
|
84
|
-
)
|
|
85
|
-
),
|
|
86
|
-
|
|
87
|
-
latest_femto AS (
|
|
88
|
-
SELECT
|
|
89
|
-
femto.sample_id,
|
|
90
|
-
femto.femto_date_code,
|
|
91
|
-
femto.femto_profile_description AS femto_description,
|
|
92
|
-
femto.gqn_dnaex
|
|
93
|
-
FROM femto_dna_extract_v2$raw AS femto
|
|
94
|
-
WHERE femto.created_at$ = (
|
|
95
|
-
SELECT MAX(sub.created_at$)
|
|
96
|
-
FROM femto_dna_extract_v2$raw as sub
|
|
97
|
-
WHERE sub.sample_id = femto.sample_id
|
|
98
|
-
)
|
|
99
|
-
),
|
|
100
|
-
|
|
101
|
-
latest_decision_making AS (
|
|
102
|
-
SELECT
|
|
103
|
-
dnad.sample_id,
|
|
104
|
-
dnad.next_step,
|
|
105
|
-
qc_passfail AS extraction_qc_result
|
|
106
|
-
FROM dna_decision_making_v2$raw AS dnad
|
|
107
|
-
WHERE dnad.created_at$ = (
|
|
108
|
-
SELECT MAX(sub.created_at$)
|
|
109
|
-
FROM dna_decision_making_v2$raw AS sub
|
|
110
|
-
WHERE sub.sample_id = dnad.sample_id
|
|
111
|
-
)
|
|
112
|
-
)
|
|
113
|
-
|
|
114
35
|
SELECT DISTINCT
|
|
115
36
|
t.sts_id,
|
|
116
37
|
t.taxon_id,
|
|
@@ -122,62 +43,26 @@ SELECT DISTINCT
|
|
|
122
43
|
t.specimen_id,
|
|
123
44
|
COALESCE(DATE(dna.created_on), DATE(dna.created_at$)) AS completion_date, -- Homogenising BnT and Benchling dates
|
|
124
45
|
dna.name$ AS extraction_name,
|
|
125
|
-
con.barcode AS fluidx_id,
|
|
126
|
-
con.id AS fluidx_container_id,
|
|
127
|
-
CASE
|
|
128
|
-
WHEN con.archive_purpose$ IN ('Retired', 'Expended') THEN 0 -- Retired or expended DNA extractions have a weight of 0
|
|
129
|
-
ELSE con.volume_si * 1000000
|
|
130
|
-
END AS volume_ul,
|
|
131
|
-
loc.name AS location,
|
|
132
|
-
box.barcode AS rack,
|
|
133
46
|
dna.bt_id AS bnt_id,
|
|
134
|
-
|
|
47
|
+
dna.manual_vs_automatic AS manual_vs_automatic,
|
|
135
48
|
dna.extraction_protocol,
|
|
136
|
-
tube.type AS tube_type,
|
|
137
49
|
'dna'::varchar AS extraction_type,
|
|
138
|
-
f.name
|
|
139
|
-
latest_nanodrop_conc.nanodrop_concentration_ngul,
|
|
140
|
-
latest_nanodrop_conc.dna_260_280_ratio,
|
|
141
|
-
latest_nanodrop_conc.dna_260_230_ratio,
|
|
142
|
-
latest_qubit_conc.qubit_concentration_ngul,
|
|
143
|
-
latest_yield.yield AS yield_ng,
|
|
144
|
-
latest_femto.femto_date_code,
|
|
145
|
-
latest_femto.femto_description,
|
|
146
|
-
latest_femto.gqn_dnaex AS gqn_index,
|
|
147
|
-
latest_decision_making.next_step,
|
|
148
|
-
latest_decision_making.extraction_qc_result
|
|
50
|
+
f.name AS folder_name
|
|
149
51
|
FROM dna_extract$raw AS dna
|
|
150
|
-
LEFT JOIN container_content$raw AS cc
|
|
151
|
-
ON cc.entity_id = dna.id
|
|
152
|
-
LEFT JOIN container$raw AS con
|
|
153
|
-
ON con.id = cc.container_id
|
|
154
52
|
LEFT JOIN tissue_prep$raw AS tp
|
|
155
53
|
ON tp.id = dna.tissue_prep
|
|
156
54
|
LEFT JOIN tissue$raw AS t
|
|
157
55
|
ON t.id = tp.tissue
|
|
158
|
-
LEFT JOIN tube$raw AS tube
|
|
159
|
-
ON cc.container_id = tube.id
|
|
160
56
|
LEFT JOIN folder$raw AS f
|
|
161
57
|
ON dna.folder_id$ = f.id
|
|
162
58
|
LEFT JOIN project$raw AS proj
|
|
163
59
|
ON dna.project_id$ = proj.id
|
|
164
|
-
LEFT JOIN
|
|
165
|
-
|
|
166
|
-
LEFT JOIN
|
|
167
|
-
|
|
168
|
-
LEFT JOIN latest_yield
|
|
169
|
-
ON dna.id = latest_yield.sample_id
|
|
170
|
-
LEFT JOIN latest_femto
|
|
171
|
-
ON dna.id = latest_femto.sample_id
|
|
172
|
-
LEFT JOIN latest_decision_making
|
|
173
|
-
ON dna.id = latest_decision_making.sample_id -- End Results chunk
|
|
174
|
-
LEFT JOIN box$raw AS box -- Location chunk
|
|
175
|
-
ON con.box_id = box.id
|
|
176
|
-
LEFT JOIN location$raw AS loc
|
|
177
|
-
ON loc.id = box.location_id -- End of location chunk
|
|
60
|
+
LEFT JOIN registration_origin$raw AS reg
|
|
61
|
+
ON reg.entity_id = dna.id
|
|
62
|
+
LEFT JOIN entry$raw AS ent
|
|
63
|
+
ON reg.origin_entry_id = ent.id
|
|
178
64
|
WHERE proj.name = 'ToL Core Lab'
|
|
179
65
|
AND (f.name IN ('Routine Throughput', 'DNA', 'Core Lab Entities', 'Benchling MS Project Move') OR f.name IS NULL)
|
|
180
66
|
AND (dna.archive_purpose$ != ('Made in error') OR dna.archive_purpose$ IS NULL)
|
|
181
|
-
AND
|
|
182
|
-
AND con.barcode NOT LIKE 'CON%'
|
|
67
|
+
AND ent.name NOT LIKE '%Nuclei isolation and tagmentation%'
|
|
183
68
|
ORDER BY completion_date DESC
|
|
@@ -28,8 +28,8 @@ SELECT DISTINCT
|
|
|
28
28
|
tp.name$ AS eln_tissue_prep_name,
|
|
29
29
|
ssid.sanger_sample_id,
|
|
30
30
|
ssid.sanger_sample_id AS extraction_id,
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
sub_con.barcode AS fluidx_id,
|
|
32
|
+
sub_con.id AS fluidx_container_id,
|
|
33
33
|
DATE(tpsub.submitted_submission_date) AS completion_date,
|
|
34
34
|
'lres'::varchar AS extraction_type
|
|
35
35
|
FROM tissue_prep$raw AS tp
|
|
@@ -41,8 +41,16 @@ LEFT JOIN container$raw AS c
|
|
|
41
41
|
ON cc.container_id = c.id
|
|
42
42
|
LEFT JOIN tissue_prep_submission_workflow_output$raw AS tpsub
|
|
43
43
|
ON c.id = tpsub.sample_tube_id
|
|
44
|
+
LEFT JOIN container$raw AS sub_con
|
|
45
|
+
ON tpsub.sample_tube_id = sub_con.id
|
|
44
46
|
LEFT JOIN storage$raw AS stor
|
|
45
47
|
ON c.location_id = stor.id
|
|
46
48
|
LEFT JOIN sanger_sample_id$raw AS ssid
|
|
47
49
|
ON c.id = ssid.sample_tube
|
|
48
|
-
|
|
50
|
+
LEFT JOIN project$raw AS proj
|
|
51
|
+
ON tp.project_id$ = proj.id
|
|
52
|
+
LEFT JOIN folder$raw AS f
|
|
53
|
+
ON tp.folder_id$ = f.id
|
|
54
|
+
WHERE sub_con.id IS NOT NULL
|
|
55
|
+
AND proj.name = 'ToL Core Lab'
|
|
56
|
+
AND f.name = 'Sample Prep'
|
tol/excel/excel_datasource.py
CHANGED
|
@@ -119,6 +119,10 @@ class ExcelDataSource(
|
|
|
119
119
|
if isinstance(__v, pd.Timestamp):
|
|
120
120
|
__v = datetime.fromtimestamp(__v.timestamp())
|
|
121
121
|
|
|
122
|
+
# If float and is whole number, convert to int
|
|
123
|
+
if isinstance(__v, float) and __v.is_integer():
|
|
124
|
+
__v = int(__v)
|
|
125
|
+
|
|
122
126
|
if __k not in self.__mappings:
|
|
123
127
|
return __v
|
|
124
128
|
|
tol/validators/sts_fields.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
# SPDX-License-Identifier: MIT
|
|
4
4
|
|
|
5
5
|
from dataclasses import dataclass
|
|
6
|
+
from datetime import datetime, timedelta
|
|
6
7
|
from typing import List
|
|
7
8
|
|
|
8
9
|
from tol.core import DataObject, DataSource
|
|
@@ -51,16 +52,40 @@ class StsFieldsValidator(Validator):
|
|
|
51
52
|
obj: DataObject
|
|
52
53
|
) -> None:
|
|
53
54
|
for field in self.__fields.values():
|
|
55
|
+
# Ignore inactive fields
|
|
56
|
+
if field.get('status') == 'Inactive':
|
|
57
|
+
continue
|
|
54
58
|
# Get the value from the data object
|
|
55
59
|
field_value = obj.get_field_by_name(field.get('data_input_key'))
|
|
56
|
-
if
|
|
60
|
+
if isinstance(field_value, list):
|
|
61
|
+
field_value = ' | '.join(str(v) for v in field_value)
|
|
62
|
+
|
|
63
|
+
# mandatory_input fields must be present
|
|
64
|
+
if field.get('mandatory_input') and field.get('data_input_key') not in obj.attributes:
|
|
57
65
|
self.add_error(
|
|
58
66
|
object_id=obj.id,
|
|
59
67
|
detail=f'Field {field.get("data_input_key")} is required '
|
|
60
68
|
f'for project {self.__config.project_code}',
|
|
61
69
|
field=field.get('data_input_key'),
|
|
62
70
|
)
|
|
63
|
-
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
# Skip further validations if validation is not mandatory
|
|
74
|
+
if not field.get('mandatory_validation'):
|
|
75
|
+
continue
|
|
76
|
+
|
|
77
|
+
# Mandatory validation fields must have a value
|
|
78
|
+
if field_value is None or field_value == '':
|
|
79
|
+
self.add_error(
|
|
80
|
+
object_id=obj.id,
|
|
81
|
+
detail=f'Field {field.get("data_input_key")} is required to have a value '
|
|
82
|
+
f'for project {self.__config.project_code}',
|
|
83
|
+
field=field.get('data_input_key'),
|
|
84
|
+
)
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
# Allowed values
|
|
88
|
+
if field.get('allowed_values'):
|
|
64
89
|
allowed_values = [
|
|
65
90
|
value.get('value') for value in field.get('allowed_values', [])
|
|
66
91
|
]
|
|
@@ -73,23 +98,132 @@ class StsFieldsValidator(Validator):
|
|
|
73
98
|
f'{self.__config.project_code}',
|
|
74
99
|
field=field.get('data_input_key'),
|
|
75
100
|
)
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
self.
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
101
|
+
|
|
102
|
+
if field.get('type') in ['String', 'TextArea']:
|
|
103
|
+
self.__validate_string(obj, field, field_value)
|
|
104
|
+
|
|
105
|
+
if field.get('type') in ['Integer', 'Decimal', 'Percentage']:
|
|
106
|
+
self.__validate_number(obj, field, field_value)
|
|
107
|
+
|
|
108
|
+
if field.get('type') in ['Boolean']:
|
|
109
|
+
self.__validate_boolean(obj, field, field_value)
|
|
110
|
+
|
|
111
|
+
if field.get('type') in ['Date']:
|
|
112
|
+
self.__validate_date(obj, field, field_value)
|
|
113
|
+
|
|
114
|
+
def __validate_string(
|
|
115
|
+
self,
|
|
116
|
+
obj: DataObject,
|
|
117
|
+
field: dict,
|
|
118
|
+
field_value: str | int | float | None
|
|
119
|
+
) -> None:
|
|
120
|
+
# Check type is a string
|
|
121
|
+
# if not isinstance(field_value, str):
|
|
122
|
+
# self.add_error(
|
|
123
|
+
# object_id=obj.id,
|
|
124
|
+
# detail=f'Field {field.get("data_input_key")} value '
|
|
125
|
+
# f'"{field_value}" is not a string for project '
|
|
126
|
+
# f'{self.__config.project_code}',
|
|
127
|
+
# field=field.get('data_input_key'),
|
|
128
|
+
# )
|
|
129
|
+
# return
|
|
130
|
+
|
|
131
|
+
# Min/Max validations for string
|
|
132
|
+
if field.get('min') and len(field_value) < field.get('min'):
|
|
133
|
+
self.add_error(
|
|
134
|
+
object_id=obj.id,
|
|
135
|
+
detail=f'Field {field.get("data_input_key")} value '
|
|
136
|
+
f'"{field_value}" is shorter than minimum length '
|
|
137
|
+
f'"{field.get("min")}" for project '
|
|
138
|
+
f'{self.__config.project_code}',
|
|
139
|
+
field=field.get('data_input_key'),
|
|
140
|
+
)
|
|
141
|
+
if field.get('max') and len(field_value) > field.get('max'):
|
|
142
|
+
self.add_error(
|
|
143
|
+
object_id=obj.id,
|
|
144
|
+
detail=f'Field {field.get("data_input_key")} value '
|
|
145
|
+
f'"{field_value}" is longer than maximum length '
|
|
146
|
+
f'"{field.get("max")}" for project '
|
|
147
|
+
f'{self.__config.project_code}',
|
|
148
|
+
field=field.get('data_input_key'),
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
def __validate_number(
|
|
152
|
+
self,
|
|
153
|
+
obj: DataObject,
|
|
154
|
+
field: dict,
|
|
155
|
+
field_value: str | int | float | None
|
|
156
|
+
) -> None:
|
|
157
|
+
# Check type is a number
|
|
158
|
+
if not isinstance(field_value, (int, float)):
|
|
159
|
+
self.add_error(
|
|
160
|
+
object_id=obj.id,
|
|
161
|
+
detail=f'Field {field.get("data_input_key")} value '
|
|
162
|
+
f'"{field_value}" is not a number for project '
|
|
163
|
+
f'{self.__config.project_code}',
|
|
164
|
+
field=field.get('data_input_key'),
|
|
165
|
+
)
|
|
166
|
+
return
|
|
167
|
+
|
|
168
|
+
# Min/Max validations for number
|
|
169
|
+
if field.get('min') is not None and field_value < field.get('min'):
|
|
170
|
+
self.add_error(
|
|
171
|
+
object_id=obj.id,
|
|
172
|
+
detail=f'Field {field.get("data_input_key")} value '
|
|
173
|
+
f'"{field_value}" is less than minimum value '
|
|
174
|
+
f'"{field.get("min")}" for project '
|
|
175
|
+
f'{self.__config.project_code}',
|
|
176
|
+
field=field.get('data_input_key'),
|
|
177
|
+
)
|
|
178
|
+
if field.get('max') is not None and field_value > field.get('max'):
|
|
179
|
+
self.add_error(
|
|
180
|
+
object_id=obj.id,
|
|
181
|
+
detail=f'Field {field.get("data_input_key")} value '
|
|
182
|
+
f'"{field_value}" is greater than maximum value '
|
|
183
|
+
f'"{field.get("max")}" for project '
|
|
184
|
+
f'{self.__config.project_code}',
|
|
185
|
+
field=field.get('data_input_key'),
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
def __validate_boolean(
|
|
189
|
+
self,
|
|
190
|
+
obj: DataObject,
|
|
191
|
+
field: dict,
|
|
192
|
+
field_value: str | int | float | None
|
|
193
|
+
) -> None:
|
|
194
|
+
# Check type is a boolean
|
|
195
|
+
if field_value not in ['Y', 'N']:
|
|
196
|
+
self.add_error(
|
|
197
|
+
object_id=obj.id,
|
|
198
|
+
detail=f'Field {field.get("data_input_key")} value '
|
|
199
|
+
f'"{field_value}" is not a boolean (Y/N) for project '
|
|
200
|
+
f'{self.__config.project_code}',
|
|
201
|
+
field=field.get('data_input_key'),
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
def __validate_date(
|
|
205
|
+
self,
|
|
206
|
+
obj: DataObject,
|
|
207
|
+
field: dict,
|
|
208
|
+
field_value: str | int | float | None
|
|
209
|
+
) -> None:
|
|
210
|
+
if not isinstance(field_value, datetime):
|
|
211
|
+
self.add_error(
|
|
212
|
+
object_id=obj.id,
|
|
213
|
+
detail=f'Field {field.get("data_input_key")} value '
|
|
214
|
+
f'"{field_value}" is not a date string for project '
|
|
215
|
+
f'{self.__config.project_code}',
|
|
216
|
+
field=field.get('data_input_key'),
|
|
217
|
+
)
|
|
218
|
+
return
|
|
219
|
+
if field.get('range_limit'):
|
|
220
|
+
earliest_date = datetime.now() - timedelta(days=field.get('min'))
|
|
221
|
+
latest_date = datetime.now() + timedelta(days=field.get('max'))
|
|
222
|
+
if field_value < earliest_date or field_value > latest_date:
|
|
88
223
|
self.add_error(
|
|
89
224
|
object_id=obj.id,
|
|
90
225
|
detail=f'Field {field.get("data_input_key")} value '
|
|
91
|
-
f'"{field_value}" is
|
|
92
|
-
f'
|
|
93
|
-
f'{self.__config.project_code}',
|
|
226
|
+
f'"{field_value}" is not within the allowed date '
|
|
227
|
+
f'range for project {self.__config.project_code}',
|
|
94
228
|
field=field.get('data_input_key'),
|
|
95
229
|
)
|
tol/validators/types.py
CHANGED
|
@@ -58,6 +58,8 @@ class TypesValidator(Validator):
|
|
|
58
58
|
self.__add_result(
|
|
59
59
|
obj,
|
|
60
60
|
key,
|
|
61
|
+
detail=f'Field {key} value "{actual_value}" is not of type '
|
|
62
|
+
f'"{expected_type}"',
|
|
61
63
|
)
|
|
62
64
|
if type_class and isinstance(actual_value, type_class):
|
|
63
65
|
# Special case for bool since isinstance(True, int) is True
|
|
@@ -71,12 +73,13 @@ class TypesValidator(Validator):
|
|
|
71
73
|
self,
|
|
72
74
|
obj: DataObject,
|
|
73
75
|
key: str,
|
|
76
|
+
detail: str = None,
|
|
74
77
|
) -> None:
|
|
75
78
|
|
|
76
79
|
if self.__config.is_error:
|
|
77
80
|
self.add_error(
|
|
78
81
|
object_id=obj.id,
|
|
79
|
-
detail=self.__config.detail,
|
|
82
|
+
detail=detail or self.__config.detail,
|
|
80
83
|
field=key,
|
|
81
84
|
)
|
|
82
85
|
else:
|
|
@@ -47,8 +47,9 @@ tol/benchling/benchling_converter.py,sha256=CO7BMvMAM52uIFjQWZFedRve0XNSmC9YtumJ
|
|
|
47
47
|
tol/benchling/benchling_datasource.py,sha256=aHoGOJYX_dsL3G-9lXlY0bQQl4pMXf4a852sAkl-sKs,35112
|
|
48
48
|
tol/benchling/benchling_warehouse_datasource.py,sha256=opsdvHz8l06NTmt84HrIgUJxV_DsurVgFtGs3_5PMoM,4635
|
|
49
49
|
tol/benchling/sql/__init__.py,sha256=4LbvDIZOOG7p-ebbvivP7NvrJeApUvGEIcDL58ahQJE,85
|
|
50
|
-
tol/benchling/sql/
|
|
51
|
-
tol/benchling/sql/
|
|
50
|
+
tol/benchling/sql/extraction_containers_dna.sql,sha256=YRQ0W1d-BjXB9gcMpf5ZyjHbPVp2VU0KkYi4e0JvYtA,6680
|
|
51
|
+
tol/benchling/sql/extraction_extraction_type_dna.sql,sha256=UvxboWBoXXp7RHUdRKNiQTS-AXdLdz8bFEXCS6q9SoE,3094
|
|
52
|
+
tol/benchling/sql/extraction_extraction_type_lres.sql,sha256=7Y6a8v0V-jjU5Kg3czuZjcPLvGSfrnUDekpHo2mUgnc,1556
|
|
52
53
|
tol/benchling/sql/extraction_extraction_type_pooled_dna.sql,sha256=fNjCJPaViGrR6D8sLwZK2Zg5LqQqh16HB0s7ZeqTqdg,4480
|
|
53
54
|
tol/benchling/sql/extraction_extraction_type_rna.sql,sha256=Vy3uV_ns4uO9CwuOFo1KPhI0yK6dsSO47wObcwJbHXQ,3861
|
|
54
55
|
tol/benchling/sql/pacbio_prep.sql,sha256=a3dR-kcp8fT3ZZkbX8pV4StnweWGvcVl4fZNMVNCvbQ,4070
|
|
@@ -142,7 +143,7 @@ tol/ena/filter.py,sha256=UzOx5ivXvA0TY2QuNzFmS-zDPVNnaAx07DMVkAwVsAE,3370
|
|
|
142
143
|
tol/ena/parser.py,sha256=Z4YmUnpfLKng4QwmZkLEj1hUfwYb_bqr-DWgF1Gw-EY,3253
|
|
143
144
|
tol/excel/__init__.py,sha256=M0xL9w9Au8kYOLWzFGuijJ7WoZENOMkZ1XV1ephhlDY,229
|
|
144
145
|
tol/excel/excel.py,sha256=rcA-wfXY9R14OfNKS-NX2sn__9gmQ_G8LoUgWseF1Gk,2124
|
|
145
|
-
tol/excel/excel_datasource.py,sha256=
|
|
146
|
+
tol/excel/excel_datasource.py,sha256=WhkqIk4Qg-iDBczI4l1OFoLNDX32riwgj4SdGhr4DIs,3423
|
|
146
147
|
tol/excel/s3_factory.py,sha256=4lGyKrSvarPXWndyvm7K-tel0FoM0My8wnz-Mzwt0yQ,1245
|
|
147
148
|
tol/flows/__init__.py,sha256=M7iSvnBJs6fJ8M38cW0bYQa9WW0TN8FHAMjIHPDNAJ4,166
|
|
148
149
|
tol/flows/logger.py,sha256=rWXbaknGcPEZRFvC1CiB1qkhFRZsQk435w7VyJ3cpyw,170
|
|
@@ -334,16 +335,16 @@ tol/validators/mutually_exclusive.py,sha256=6blZK-2IY4Eq79fHKKrm-pxsQ6B5DNH5ldtx
|
|
|
334
335
|
tol/validators/regex.py,sha256=dLAi_vQt9_DsT6wQZmbYC7X5-Wp15l0leUE6XkPaItg,2602
|
|
335
336
|
tol/validators/regex_by_value.py,sha256=XM5EnT4vgD17rfpR3bUE9I56IemSw26BI9MZtMakd4E,2582
|
|
336
337
|
tol/validators/specimens_have_same_taxon.py,sha256=m2LLRIZMdhPj1fzyioDJOraI6UHXgy1l963xhezgk7E,2177
|
|
337
|
-
tol/validators/sts_fields.py,sha256=
|
|
338
|
+
tol/validators/sts_fields.py,sha256=aYbzy15btEg4-ocDT1qrspe7-atoWRrOJ_KmuPU6J14,8936
|
|
338
339
|
tol/validators/tolid.py,sha256=yODebLYbKtlem3IpVcv8XImvq90r-AK68asH9JEawqo,3897
|
|
339
|
-
tol/validators/types.py,sha256=
|
|
340
|
+
tol/validators/types.py,sha256=KDBNqx5isJG5XI1l2V9Wmi9135ZwDace3MU6Qij3J6E,2612
|
|
340
341
|
tol/validators/unique_values.py,sha256=o5IrfUNLEmlEp8kpInTtFnTq-FqiHSC9TItKdf-LI1o,3114
|
|
341
342
|
tol/validators/unique_whole_organisms.py,sha256=RdqA1GzIf3LTdrmNGGdxv0aW2udDY2P9EaqZb40hhik,5735
|
|
342
343
|
tol/validators/interfaces/__init__.py,sha256=jtOxnwnwqV_29xjmmMcS_kvlt-pQiWwQYJn2YRP07_w,172
|
|
343
344
|
tol/validators/interfaces/condition_evaluator.py,sha256=nj8Cb8hi47OBy6OVNfeLhF-Pjwtr8MiOSymYL6hfVes,3766
|
|
344
|
-
tol_sdk-1.8.
|
|
345
|
-
tol_sdk-1.8.
|
|
346
|
-
tol_sdk-1.8.
|
|
347
|
-
tol_sdk-1.8.
|
|
348
|
-
tol_sdk-1.8.
|
|
349
|
-
tol_sdk-1.8.
|
|
345
|
+
tol_sdk-1.8.1.dist-info/licenses/LICENSE,sha256=RF9Jacy-9BpUAQQ20INhTgtaNBkmdTolYCHtrrkM2-8,1077
|
|
346
|
+
tol_sdk-1.8.1.dist-info/METADATA,sha256=wXZU-uS5CIiiWYa0EGwD8YiK2Z9eOJz5x78yTUPFMmk,3142
|
|
347
|
+
tol_sdk-1.8.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
348
|
+
tol_sdk-1.8.1.dist-info/entry_points.txt,sha256=jH3HfTwxjzog7E3lq8CKpUWGIRY9FSXbyL6CpUmv6D0,36
|
|
349
|
+
tol_sdk-1.8.1.dist-info/top_level.txt,sha256=PwKMQLphyZNvagBoriVbl8uwHXQl8IC1niawVG0iXMM,10
|
|
350
|
+
tol_sdk-1.8.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|