tol-sdk 1.7.4__py3-none-any.whl → 1.7.5b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tol/api_base/__init__.py +1 -0
- tol/api_base/blueprint.py +19 -8
- tol/{s3/data_upload/blueprint.py → api_base/data_upload.py} +21 -6
- tol/api_base/pipeline_steps.py +4 -4
- tol/api_client/api_datasource.py +8 -8
- tol/api_client/converter.py +38 -52
- tol/api_client/factory.py +21 -19
- tol/api_client/parser.py +138 -98
- tol/api_client/view.py +118 -43
- tol/core/__init__.py +2 -1
- tol/core/data_object.py +27 -9
- tol/core/data_object_converter.py +37 -2
- tol/core/factory.py +51 -62
- tol/core/validate.py +1 -0
- tol/ena/client.py +61 -10
- tol/ena/ena_datasource.py +16 -10
- tol/ena/ena_methods.py +33 -32
- tol/ena/parser.py +15 -2
- tol/flows/converters/__init__.py +2 -0
- tol/flows/converters/incoming_sample_to_ena_sample_converter.py +130 -0
- tol/flows/converters/incoming_sample_to_incoming_sample_with_lists_converter.py +46 -0
- tol/s3/__init__.py +0 -1
- tol/sql/model.py +1 -1
- tol/sql/pipeline_step/factory.py +1 -1
- tol/sql/sql_converter.py +7 -1
- tol/validators/__init__.py +12 -1
- tol/validators/allowed_keys.py +17 -12
- tol/validators/allowed_values.py +21 -63
- tol/validators/allowed_values_from_datasource.py +89 -0
- tol/validators/assert_on_condition.py +56 -0
- tol/validators/ena_checklist.py +73 -0
- tol/validators/ena_submittable.py +61 -0
- tol/validators/interfaces/__init__.py +5 -0
- tol/validators/interfaces/condition_evaluator.py +102 -0
- tol/validators/min_one_valid_value.py +55 -0
- tol/validators/mutually_exclusive.py +111 -0
- tol/validators/regex.py +30 -23
- tol/validators/regex_by_value.py +33 -33
- tol/validators/specimens_have_same_taxon.py +60 -0
- tol/validators/sts_fields.py +88 -0
- tol/validators/tolid.py +110 -0
- tol/validators/unique_values.py +25 -17
- tol/validators/unique_whole_organisms.py +109 -0
- {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/METADATA +1 -1
- {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/RECORD +49 -36
- tol/s3/data_upload/__init__.py +0 -3
- {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/WHEEL +0 -0
- {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/entry_points.txt +0 -0
- {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/licenses/LICENSE +0 -0
- {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/top_level.txt +0 -0
|
@@ -90,5 +90,40 @@ class DefaultDataObjectToDataObjectConverter(DataObjectToDataObjectOrUpdateConve
|
|
|
90
90
|
if k != self.__id_field
|
|
91
91
|
}
|
|
92
92
|
)
|
|
93
|
-
|
|
94
|
-
|
|
93
|
+
yield ret
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class SanitisingConverter(DataObjectToDataObjectOrUpdateConverter):
|
|
97
|
+
|
|
98
|
+
def __init__(
|
|
99
|
+
self,
|
|
100
|
+
data_object_factory: DataObjectFactory
|
|
101
|
+
):
|
|
102
|
+
super().__init__(data_object_factory)
|
|
103
|
+
|
|
104
|
+
def convert(
|
|
105
|
+
self,
|
|
106
|
+
data_object: DataObject
|
|
107
|
+
) -> Iterable[DataObject]:
|
|
108
|
+
"""
|
|
109
|
+
A converter that removes leading and trailing whitespace from
|
|
110
|
+
string attributes.
|
|
111
|
+
"""
|
|
112
|
+
if data_object is not None and data_object.id is not None:
|
|
113
|
+
ret = self._data_object_factory(
|
|
114
|
+
id_=data_object.id,
|
|
115
|
+
type_=data_object.type,
|
|
116
|
+
attributes={
|
|
117
|
+
k: self.__sanitise(v)
|
|
118
|
+
for k, v in data_object.attributes.items()
|
|
119
|
+
}
|
|
120
|
+
)
|
|
121
|
+
yield ret
|
|
122
|
+
|
|
123
|
+
def __sanitise(
|
|
124
|
+
self,
|
|
125
|
+
value: object
|
|
126
|
+
) -> object:
|
|
127
|
+
if isinstance(value, str):
|
|
128
|
+
return value.strip()
|
|
129
|
+
return value
|
tol/core/factory.py
CHANGED
|
@@ -4,19 +4,10 @@
|
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
|
|
7
|
-
import logging
|
|
8
7
|
import typing
|
|
9
8
|
from abc import ABC
|
|
10
|
-
from
|
|
11
|
-
|
|
12
|
-
Callable,
|
|
13
|
-
Dict,
|
|
14
|
-
Iterable,
|
|
15
|
-
Optional,
|
|
16
|
-
Protocol,
|
|
17
|
-
Type,
|
|
18
|
-
Union
|
|
19
|
-
)
|
|
9
|
+
from collections.abc import Callable, Iterable
|
|
10
|
+
from typing import Any, Protocol
|
|
20
11
|
|
|
21
12
|
from .data_object import DataDict, DataObject
|
|
22
13
|
from .data_source_dict import DataSourceDict
|
|
@@ -28,7 +19,7 @@ if typing.TYPE_CHECKING:
|
|
|
28
19
|
from .datasource import DataSource
|
|
29
20
|
|
|
30
21
|
|
|
31
|
-
ToOne = dict[str,
|
|
22
|
+
ToOne = dict[str, DataObject | None]
|
|
32
23
|
ToMany = dict[str, Iterable[DataObject]]
|
|
33
24
|
|
|
34
25
|
|
|
@@ -42,8 +33,8 @@ class DataObjectFactory(Protocol):
|
|
|
42
33
|
self,
|
|
43
34
|
type_: str,
|
|
44
35
|
|
|
45
|
-
id_:
|
|
46
|
-
attributes:
|
|
36
|
+
id_: str | None = None,
|
|
37
|
+
attributes: dict[str, Any] | None = None,
|
|
47
38
|
to_one: ToOne | None = None,
|
|
48
39
|
to_many: ToMany | None = None
|
|
49
40
|
) -> DataObject:
|
|
@@ -66,17 +57,17 @@ class DataSourceDictFactory(Protocol):
|
|
|
66
57
|
|
|
67
58
|
OneDictFactory = Callable[
|
|
68
59
|
[DataObject],
|
|
69
|
-
|
|
60
|
+
dict[str, DataObject | None]
|
|
70
61
|
]
|
|
71
62
|
"""
|
|
72
63
|
Takes a `DataObject` instance, returns a `dict` mapping
|
|
73
|
-
to-one relationship names to its `
|
|
64
|
+
to-one relationship names to its optional `DataObject`
|
|
74
65
|
"""
|
|
75
66
|
|
|
76
67
|
|
|
77
68
|
ManyDictFactory = Callable[
|
|
78
69
|
[DataObject],
|
|
79
|
-
|
|
70
|
+
dict[str, Iterable[DataObject]]
|
|
80
71
|
]
|
|
81
72
|
"""
|
|
82
73
|
Takes a `DataObject` instance, returns a `dict` mapping
|
|
@@ -105,7 +96,7 @@ def core_data_object(
|
|
|
105
96
|
one_dict_factory: OneDictFactory = lambda o: ToOneDict(o),
|
|
106
97
|
many_dict_factory: ManyDictFactory = lambda o: ToManyDict(o),
|
|
107
98
|
data_source_dict_factory: DataSourceDictFactory = lambda *d: DataSourceDict(*d)
|
|
108
|
-
) ->
|
|
99
|
+
) -> type[DataObject]:
|
|
109
100
|
"""
|
|
110
101
|
Takes a tuple of DataSource instances, and creates a CoreDataObject
|
|
111
102
|
implementation that refers to all of them.
|
|
@@ -132,17 +123,18 @@ def core_data_object(
|
|
|
132
123
|
def __init__(
|
|
133
124
|
self,
|
|
134
125
|
type_: str,
|
|
135
|
-
id_:
|
|
126
|
+
id_: str | None = None,
|
|
136
127
|
attributes: DataDict | None = None,
|
|
137
128
|
to_one: ToOne | None = None,
|
|
138
129
|
to_many: ToMany | None = None,
|
|
139
130
|
stub: bool = False,
|
|
140
|
-
stub_types:
|
|
131
|
+
stub_types: Iterable[str] | None = None
|
|
141
132
|
):
|
|
142
133
|
self.__id = id_
|
|
143
134
|
self.__type = type_
|
|
144
|
-
self.__attributes =
|
|
145
|
-
self.__to_one_objects =
|
|
135
|
+
self.__attributes = {} if attributes is None else attributes
|
|
136
|
+
self.__to_one_objects = {} if to_one is None else to_one
|
|
137
|
+
self.__to_many_objects = {} if to_many is None else to_many
|
|
146
138
|
if stub and id_ is None:
|
|
147
139
|
raise DataSourceError('ID must be set if stub is True')
|
|
148
140
|
self.__stub_value = stub
|
|
@@ -151,11 +143,6 @@ def core_data_object(
|
|
|
151
143
|
self.__to_one_relations = one_dict_factory(self)
|
|
152
144
|
self.__to_many_relations = many_dict_factory(self)
|
|
153
145
|
|
|
154
|
-
if to_many:
|
|
155
|
-
logging.warning(
|
|
156
|
-
'Setting of to_many relations is unsupported'
|
|
157
|
-
)
|
|
158
|
-
|
|
159
146
|
def __str__(self) -> str:
|
|
160
147
|
dump = f'type="{self.type}"'
|
|
161
148
|
|
|
@@ -164,36 +151,34 @@ def core_data_object(
|
|
|
164
151
|
|
|
165
152
|
return f'CoreDataObject({dump})'
|
|
166
153
|
|
|
167
|
-
def __getattribute__(self,
|
|
168
|
-
if _local_name(
|
|
169
|
-
return object.__getattribute__(self,
|
|
154
|
+
def __getattribute__(self, name: str, /) -> Any:
|
|
155
|
+
if _local_name(name):
|
|
156
|
+
return object.__getattribute__(self, name)
|
|
170
157
|
|
|
171
158
|
if self.__stub_value:
|
|
172
159
|
self.__unstub()
|
|
173
160
|
|
|
174
|
-
if
|
|
175
|
-
if
|
|
176
|
-
return self._to_one_objects[
|
|
177
|
-
return self.to_one_relationships.get(
|
|
178
|
-
|
|
179
|
-
if
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
self
|
|
189
|
-
elif
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
status_code=400
|
|
194
|
-
)
|
|
161
|
+
if name in self.__to_one_names:
|
|
162
|
+
if name in self._to_one_objects:
|
|
163
|
+
return self._to_one_objects[name]
|
|
164
|
+
return self.to_one_relationships.get(name)
|
|
165
|
+
|
|
166
|
+
if name in self.__to_many_names:
|
|
167
|
+
if name in self._to_many_objects:
|
|
168
|
+
return self._to_many_objects[name]
|
|
169
|
+
return self.to_many_relationships.get(name, [])
|
|
170
|
+
|
|
171
|
+
return self.__attributes.get(name)
|
|
172
|
+
|
|
173
|
+
def __setattr__(self, name: str, value: Any, /) -> None:
|
|
174
|
+
if _local_name(name):
|
|
175
|
+
object.__setattr__(self, name, value)
|
|
176
|
+
elif name in self.__to_one_names:
|
|
177
|
+
self._to_one_objects[name] = value
|
|
178
|
+
elif name in self.__to_many_names:
|
|
179
|
+
self._to_many_objects[name] = value
|
|
195
180
|
else:
|
|
196
|
-
self.__attributes[
|
|
181
|
+
self.__attributes[name] = value
|
|
197
182
|
|
|
198
183
|
def __unstub(self) -> None:
|
|
199
184
|
self.__stub_value = False
|
|
@@ -214,7 +199,7 @@ def core_data_object(
|
|
|
214
199
|
return self.__type
|
|
215
200
|
|
|
216
201
|
@property
|
|
217
|
-
def id(self) ->
|
|
202
|
+
def id(self) -> str | None: # noqa
|
|
218
203
|
return self.__id
|
|
219
204
|
|
|
220
205
|
@id.setter
|
|
@@ -222,27 +207,31 @@ def core_data_object(
|
|
|
222
207
|
self.__id = new_id
|
|
223
208
|
|
|
224
209
|
@property
|
|
225
|
-
def attributes(self) ->
|
|
210
|
+
def attributes(self) -> dict[str, Any]:
|
|
226
211
|
if self.__stub_value:
|
|
227
212
|
self.__unstub()
|
|
228
213
|
return self.__attributes
|
|
229
214
|
|
|
230
215
|
@property
|
|
231
|
-
def to_one_relationships(self) ->
|
|
216
|
+
def to_one_relationships(self) -> dict[str, DataObject | None]:
|
|
232
217
|
if not self.__relational:
|
|
233
218
|
raise NotRelationalError(self)
|
|
234
219
|
return self.__to_one_relations
|
|
235
220
|
|
|
236
221
|
@property
|
|
237
|
-
def to_many_relationships(self) ->
|
|
222
|
+
def to_many_relationships(self) -> dict[str, list[DataObject]]:
|
|
238
223
|
if not self.__relational:
|
|
239
224
|
raise NotRelationalError(self)
|
|
240
225
|
return self.__to_many_relations
|
|
241
226
|
|
|
242
227
|
@property
|
|
243
|
-
def _to_one_objects(self) ->
|
|
228
|
+
def _to_one_objects(self) -> dict[str, DataObject | None]:
|
|
244
229
|
return self.__to_one_objects
|
|
245
230
|
|
|
231
|
+
@property
|
|
232
|
+
def _to_many_objects(self) -> dict[str, list[DataObject]]:
|
|
233
|
+
return self.__to_many_objects
|
|
234
|
+
|
|
246
235
|
@property
|
|
247
236
|
def __relational(self) -> bool:
|
|
248
237
|
"""Whether the hosting DataSource is relational or not"""
|
|
@@ -250,7 +239,7 @@ def core_data_object(
|
|
|
250
239
|
return isinstance(self._host, Relational)
|
|
251
240
|
|
|
252
241
|
@property
|
|
253
|
-
def __relationship_config(self) ->
|
|
242
|
+
def __relationship_config(self) -> RelationshipConfig | None:
|
|
254
243
|
return self._host.relationship_config.get(self.type)
|
|
255
244
|
|
|
256
245
|
@property
|
|
@@ -274,17 +263,17 @@ def core_data_object(
|
|
|
274
263
|
)
|
|
275
264
|
|
|
276
265
|
@property
|
|
277
|
-
def _host(self) ->
|
|
266
|
+
def _host(self) -> DataSource | Relational:
|
|
278
267
|
return data_source_dict[self.type]
|
|
279
268
|
|
|
280
269
|
def core_data_object_factory(
|
|
281
270
|
type_: str,
|
|
282
|
-
id_:
|
|
283
|
-
attributes:
|
|
271
|
+
id_: str | None = None,
|
|
272
|
+
attributes: dict[str, Any] | None = None,
|
|
284
273
|
to_one: ToOne | None = None,
|
|
285
274
|
to_many: ToMany | None = None,
|
|
286
275
|
stub: bool = False, # Set stub if only type and id are given on creation
|
|
287
|
-
stub_types:
|
|
276
|
+
stub_types: Iterable[str] | None = None
|
|
288
277
|
) -> DataObject:
|
|
289
278
|
|
|
290
279
|
return CoreDataObject(
|
tol/core/validate.py
CHANGED
tol/ena/client.py
CHANGED
|
@@ -7,9 +7,10 @@ from typing import Dict, Optional, Tuple
|
|
|
7
7
|
import requests
|
|
8
8
|
|
|
9
9
|
from .converter import EnaApiTransfer
|
|
10
|
+
from ..core import HttpClient
|
|
10
11
|
|
|
11
12
|
|
|
12
|
-
class EnaApiClient:
|
|
13
|
+
class EnaApiClient(HttpClient):
|
|
13
14
|
"""
|
|
14
15
|
Takes ENA API transfers and connects to a remote ENA API.
|
|
15
16
|
"""
|
|
@@ -22,6 +23,9 @@ class EnaApiClient:
|
|
|
22
23
|
ena_contact_name: str,
|
|
23
24
|
ena_contact_email: str,
|
|
24
25
|
) -> None:
|
|
26
|
+
super().__init__(
|
|
27
|
+
retries=5
|
|
28
|
+
)
|
|
25
29
|
self.__ena_url = ena_url
|
|
26
30
|
self.__ena_user = ena_user
|
|
27
31
|
self.__ena_password = ena_password
|
|
@@ -46,7 +50,7 @@ class EnaApiClient:
|
|
|
46
50
|
`object_type` and `object_id` or returns None if not found.
|
|
47
51
|
"""
|
|
48
52
|
url, params = self.__detail_url(object_type, object_ids, filter_string)
|
|
49
|
-
return self.__fetch_detail(url, params)
|
|
53
|
+
return self.__fetch_detail(url, params, text=(object_type == 'checklist'))
|
|
50
54
|
|
|
51
55
|
def get_list(
|
|
52
56
|
self,
|
|
@@ -59,17 +63,27 @@ class EnaApiClient:
|
|
|
59
63
|
def __fetch_detail(
|
|
60
64
|
self,
|
|
61
65
|
url: str,
|
|
62
|
-
params: Dict = {}
|
|
66
|
+
params: Dict = {},
|
|
67
|
+
text: bool = False
|
|
63
68
|
) -> Optional[EnaApiTransfer]:
|
|
64
69
|
"""
|
|
65
70
|
Fetches data from the ENA API.
|
|
66
71
|
"""
|
|
72
|
+
session = self._get_session_with_retries()
|
|
67
73
|
headers = {'Content-Type': 'application/json'}
|
|
68
|
-
r =
|
|
74
|
+
r = session.get(url, params=params, headers=headers)
|
|
69
75
|
if r.status_code == 404:
|
|
70
76
|
return []
|
|
71
77
|
r.raise_for_status()
|
|
72
|
-
|
|
78
|
+
if text:
|
|
79
|
+
return r.text
|
|
80
|
+
try:
|
|
81
|
+
data = r.json()
|
|
82
|
+
if isinstance(data, list):
|
|
83
|
+
return data
|
|
84
|
+
return [] if not data else [data]
|
|
85
|
+
except requests.exceptions.JSONDecodeError:
|
|
86
|
+
return []
|
|
73
87
|
|
|
74
88
|
def __fetch_list(
|
|
75
89
|
self,
|
|
@@ -79,13 +93,18 @@ class EnaApiClient:
|
|
|
79
93
|
"""
|
|
80
94
|
Fetches data from the ENA API.
|
|
81
95
|
"""
|
|
96
|
+
session = self._get_session_with_retries()
|
|
82
97
|
headers = {'Content-Type': 'application/json'}
|
|
83
|
-
r =
|
|
98
|
+
r = session.get(url, params=params, headers=headers)
|
|
84
99
|
|
|
85
100
|
if r.status_code == 404:
|
|
86
101
|
return []
|
|
87
102
|
r.raise_for_status()
|
|
88
|
-
|
|
103
|
+
try:
|
|
104
|
+
data = r.json()
|
|
105
|
+
return data if data else []
|
|
106
|
+
except requests.exceptions.JSONDecodeError:
|
|
107
|
+
return []
|
|
89
108
|
|
|
90
109
|
def __detail_url(
|
|
91
110
|
self,
|
|
@@ -96,6 +115,18 @@ class EnaApiClient:
|
|
|
96
115
|
"""
|
|
97
116
|
Returns the URL and parameters for a detail query.
|
|
98
117
|
"""
|
|
118
|
+
if object_type == 'checklist':
|
|
119
|
+
ids = ','.join(str(id_) for id_ in object_ids)
|
|
120
|
+
url = f'{self.__ena_url}/ena/browser/api/xml/{ids}'
|
|
121
|
+
params = {}
|
|
122
|
+
return url, params
|
|
123
|
+
if object_type == 'submittable_taxon':
|
|
124
|
+
# This is actually called separately for each taxon id
|
|
125
|
+
ids = ','.join(str(id_) for id_ in object_ids)
|
|
126
|
+
url = f'{self.__ena_url}/ena/taxonomy/rest/tax-id/{ids}'
|
|
127
|
+
params = {}
|
|
128
|
+
return url, params
|
|
129
|
+
|
|
99
130
|
url = f'{self.__ena_url}/ena/portal/api/search'
|
|
100
131
|
|
|
101
132
|
if object_type == 'assembly':
|
|
@@ -150,14 +181,34 @@ class EnaApiClient:
|
|
|
150
181
|
"""
|
|
151
182
|
Returns the fields for a given object type from the ENA portal API.
|
|
152
183
|
"""
|
|
153
|
-
|
|
154
|
-
|
|
184
|
+
if object_type == 'checklist':
|
|
185
|
+
return {'checklist': 'dict[str, Any]'}
|
|
186
|
+
if object_type == 'submittable_taxon':
|
|
187
|
+
return {
|
|
188
|
+
'scientific_name': 'str',
|
|
189
|
+
'formal_name': 'str',
|
|
190
|
+
'rank': 'str',
|
|
191
|
+
'division': 'str',
|
|
192
|
+
'lineage': 'str',
|
|
193
|
+
'genetic_code': 'str',
|
|
194
|
+
'mitochondrial_genetic_code': 'str',
|
|
195
|
+
'submittable': 'boolean',
|
|
196
|
+
'binomial': 'boolean',
|
|
197
|
+
'merged': 'str',
|
|
198
|
+
'authority': 'str',
|
|
199
|
+
'other_names': 'list[str]',
|
|
200
|
+
'metagenome': 'str'
|
|
201
|
+
}
|
|
202
|
+
session = self._get_session_with_retries()
|
|
203
|
+
r = session.get(
|
|
204
|
+
self.__ena_url + '/ena/portal/api/returnFields',
|
|
205
|
+
params={'result': object_type, 'format': 'json'},
|
|
155
206
|
headers={
|
|
156
207
|
'Content-Type': 'application/json'
|
|
157
208
|
}
|
|
158
209
|
)
|
|
159
210
|
fields = {}
|
|
160
|
-
for field in
|
|
211
|
+
for field in r.json():
|
|
161
212
|
|
|
162
213
|
type_ = field['type'] if 'type' in field else 'string'
|
|
163
214
|
ena_type = self.__type_mappings[type_] if type_ in self.__type_mappings else 'str'
|
tol/ena/ena_datasource.py
CHANGED
|
@@ -100,7 +100,10 @@ class EnaDataSource(
|
|
|
100
100
|
@property
|
|
101
101
|
@cache
|
|
102
102
|
def supported_types(self) -> list[str]:
|
|
103
|
-
return [
|
|
103
|
+
return [
|
|
104
|
+
'assembly', 'read_run', 'sample', 'study', 'taxon', 'checklist',
|
|
105
|
+
'submittable_taxon'
|
|
106
|
+
]
|
|
104
107
|
|
|
105
108
|
def get_by_id(
|
|
106
109
|
self,
|
|
@@ -111,7 +114,18 @@ class EnaDataSource(
|
|
|
111
114
|
self.__validate_object_type(object_type)
|
|
112
115
|
|
|
113
116
|
client = self.__client_factory()
|
|
114
|
-
|
|
117
|
+
# For a submittable_taxon we need to make multiple calls
|
|
118
|
+
if object_type == 'submittable_taxon':
|
|
119
|
+
ena_response = []
|
|
120
|
+
for object_id in object_ids:
|
|
121
|
+
response = client.get_detail(object_type, [object_id])
|
|
122
|
+
if response and isinstance(response, list):
|
|
123
|
+
ena_response.extend(response)
|
|
124
|
+
else:
|
|
125
|
+
ena_response = client.get_detail(object_type, object_ids)
|
|
126
|
+
# For a checklist we need to convert into a list of dicts
|
|
127
|
+
if object_type == 'checklist':
|
|
128
|
+
ena_response = convert_checklist_xml_to_dict(ena_response)
|
|
115
129
|
ena_converter = self.__ec_factory()
|
|
116
130
|
|
|
117
131
|
converted_objects, _ = ena_converter.convert_list(object_type, ena_response) \
|
|
@@ -198,7 +212,6 @@ class EnaDataSource(
|
|
|
198
212
|
return response
|
|
199
213
|
|
|
200
214
|
def get_request(self, command: str, headers=None, params=None) -> requests.Response:
|
|
201
|
-
|
|
202
215
|
response = requests.get(self.uri + command,
|
|
203
216
|
params=params, headers=headers,
|
|
204
217
|
auth=HTTPBasicAuth(self.user, self.password))
|
|
@@ -209,13 +222,6 @@ class EnaDataSource(
|
|
|
209
222
|
|
|
210
223
|
return response
|
|
211
224
|
|
|
212
|
-
def get_xml_checklist(self, checklist_id: str) -> Dict[str, Tuple[str, str, object]]:
|
|
213
|
-
output = self.get_request(f'/ena/browser/api/xml/{checklist_id}')
|
|
214
|
-
|
|
215
|
-
checklist_dict = convert_checklist_xml_to_dict(output.text)
|
|
216
|
-
|
|
217
|
-
return checklist_dict
|
|
218
|
-
|
|
219
225
|
def get_biosample_data_biosampleid(self, biosample_id: str):
|
|
220
226
|
output = self.get_request(f'/ena/browser/api/xml/{biosample_id}')
|
|
221
227
|
|
tol/ena/ena_methods.py
CHANGED
|
@@ -30,51 +30,52 @@ submission_xml_template = """<?xml version="1.0" encoding="UTF-8"?>
|
|
|
30
30
|
|
|
31
31
|
def convert_checklist_xml_to_dict(checklist_xml: str) -> Dict[str, Tuple[str, str, object]]:
|
|
32
32
|
# key label, val [mandatory_status, ]
|
|
33
|
-
|
|
34
|
-
fields = {}
|
|
35
|
-
|
|
33
|
+
checklists = []
|
|
36
34
|
root = ElementTree.fromstring(checklist_xml)
|
|
37
|
-
for
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
35
|
+
for checklist_node in root.findall('./CHECKLIST'):
|
|
36
|
+
checklist_id = checklist_node.get('accession')
|
|
37
|
+
fields = {}
|
|
38
|
+
for field_group_node in checklist_node.findall('./DESCRIPTOR/FIELD_GROUP'):
|
|
39
|
+
for field_node in field_group_node.findall('./FIELD'):
|
|
41
40
|
|
|
42
|
-
|
|
41
|
+
label, mandatory_status = None, None
|
|
43
42
|
|
|
44
|
-
|
|
45
|
-
label = label_node.text
|
|
43
|
+
label_node = field_node.find('./LABEL')
|
|
46
44
|
|
|
47
|
-
|
|
45
|
+
if label_node is not None:
|
|
46
|
+
label = label_node.text
|
|
48
47
|
|
|
49
|
-
|
|
50
|
-
mandatory_status = mandatory_node.text
|
|
48
|
+
mandatory_node = field_node.find('./MANDATORY')
|
|
51
49
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
regex_str = regex_node.text
|
|
55
|
-
fields[label] = [mandatory_status, 'restricted text', regex_str]
|
|
56
|
-
continue
|
|
50
|
+
if mandatory_node is not None:
|
|
51
|
+
mandatory_status = mandatory_node.text
|
|
57
52
|
|
|
58
|
-
|
|
53
|
+
regex_node = field_node.find('./FIELD_TYPE/TEXT_FIELD/REGEX_VALUE')
|
|
54
|
+
if regex_node is not None:
|
|
55
|
+
regex_str = regex_node.text
|
|
56
|
+
fields[label] = [mandatory_status, 'restricted text', regex_str]
|
|
57
|
+
continue
|
|
59
58
|
|
|
60
|
-
|
|
61
|
-
text_options = []
|
|
62
|
-
for text_option_node in text_choice_node.findall('./TEXT_VALUE/VALUE'):
|
|
63
|
-
text_options.append(text_option_node.text)
|
|
59
|
+
text_choice_node = field_node.find('./FIELD_TYPE/TEXT_CHOICE_FIELD')
|
|
64
60
|
|
|
65
|
-
|
|
66
|
-
|
|
61
|
+
if text_choice_node is not None:
|
|
62
|
+
text_options = []
|
|
63
|
+
for text_option_node in text_choice_node.findall('./TEXT_VALUE/VALUE'):
|
|
64
|
+
text_options.append(text_option_node.text)
|
|
67
65
|
|
|
68
|
-
|
|
66
|
+
fields[label] = [mandatory_status, 'text choice', text_options]
|
|
67
|
+
continue
|
|
69
68
|
|
|
70
|
-
|
|
71
|
-
regex_str = regex_node.text
|
|
72
|
-
fields[label] = [mandatory_status, 'valid taxonomy', '']
|
|
73
|
-
continue
|
|
69
|
+
taxon_node = field_node.find('./FIELD_TYPE/TEXT_FIELD/TAXON_FIELD')
|
|
74
70
|
|
|
75
|
-
|
|
71
|
+
if taxon_node is not None:
|
|
72
|
+
regex_str = regex_node.text
|
|
73
|
+
fields[label] = [mandatory_status, 'valid taxonomy', '']
|
|
74
|
+
continue
|
|
76
75
|
|
|
77
|
-
|
|
76
|
+
fields[label] = [mandatory_status, 'free text', '']
|
|
77
|
+
checklists.append({'checklist_id': checklist_id, 'checklist': fields})
|
|
78
|
+
return checklists
|
|
78
79
|
|
|
79
80
|
|
|
80
81
|
def convert_xml_to_list_of_sample_dict(response_xml: str) -> List[Dict[str, List[str]]]:
|
tol/ena/parser.py
CHANGED
|
@@ -8,6 +8,7 @@ import typing
|
|
|
8
8
|
from abc import ABC, abstractmethod
|
|
9
9
|
from typing import Any, Iterable, Optional
|
|
10
10
|
|
|
11
|
+
from caseconverter import snakecase
|
|
11
12
|
|
|
12
13
|
from ..core import DataObject
|
|
13
14
|
|
|
@@ -88,6 +89,10 @@ class DefaultParser(Parser):
|
|
|
88
89
|
return transfer['study_accession']
|
|
89
90
|
elif type_ == 'taxon':
|
|
90
91
|
return transfer['tax_id']
|
|
92
|
+
elif type_ == 'checklist':
|
|
93
|
+
return transfer['checklist_id']
|
|
94
|
+
elif type_ == 'submittable_taxon':
|
|
95
|
+
return transfer['taxId']
|
|
91
96
|
|
|
92
97
|
def __convert_attributes(
|
|
93
98
|
self,
|
|
@@ -99,6 +104,14 @@ class DefaultParser(Parser):
|
|
|
99
104
|
return ret
|
|
100
105
|
|
|
101
106
|
for k, v in attributes.items():
|
|
102
|
-
if k not in ['key'] and k in self.__dict[type_].attribute_types[type_]:
|
|
103
|
-
ret[k] = v
|
|
107
|
+
if k not in ['key'] and snakecase(k) in self.__dict[type_].attribute_types[type_]:
|
|
108
|
+
ret[snakecase(k)] = self.__convert_value(type_, snakecase(k), v)
|
|
104
109
|
return ret
|
|
110
|
+
|
|
111
|
+
def __convert_value(self, type_: str, attribute_name: str, value: Any) -> Any:
|
|
112
|
+
attribute_type = self.__dict[type_].attribute_types[type_][attribute_name]
|
|
113
|
+
if attribute_type == 'boolean':
|
|
114
|
+
if isinstance(value, str):
|
|
115
|
+
return value.lower() == 'true'
|
|
116
|
+
return bool(value)
|
|
117
|
+
return value
|
tol/flows/converters/__init__.py
CHANGED
|
@@ -34,6 +34,8 @@ from .gap_assembly_to_elastic_assembly_analysis_converter import GapAssemblyToEl
|
|
|
34
34
|
from .genome_notes_genome_note_to_elastic_genome_note_converter import GenomeNotesGenomeNoteToElasticGenomeNoteConverter # noqa F401
|
|
35
35
|
from .goat_taxon_to_elastic_species_converter import GoatTaxonToElasticSpeciesConverter # noqa F401
|
|
36
36
|
from .grit_issue_to_elastic_curation_converter import GritIssueToElasticCurationConverter # noqa F401
|
|
37
|
+
from .incoming_sample_to_ena_sample_converter import IncomingSampleToEnaSampleConverter # noqa
|
|
38
|
+
from .incoming_sample_to_incoming_sample_with_lists_converter import IncomingSampleToIncomingSampleWithListsConverter # noqa F401
|
|
37
39
|
from .informatics_tolid_to_elastic_tolid_converter import InformaticsTolidToElasticTolidConverter # noqa F401
|
|
38
40
|
from .labwhere_location_to_elastic_sample_update_converter import LabwhereLocationToElasticSampleUpdateConverter # noqa F401
|
|
39
41
|
from .labwhere_location_to_sts_tray_converter import LabwhereLocationToStsTrayConverter # noqa F401
|