tol-sdk 1.7.4__py3-none-any.whl → 1.7.5b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tol/api_base/__init__.py +1 -0
- tol/api_base/blueprint.py +19 -8
- tol/{s3/data_upload/blueprint.py → api_base/data_upload.py} +21 -6
- tol/api_base/pipeline_steps.py +4 -4
- tol/api_client/api_datasource.py +8 -8
- tol/api_client/converter.py +38 -52
- tol/api_client/factory.py +21 -19
- tol/api_client/parser.py +138 -98
- tol/api_client/view.py +118 -43
- tol/core/__init__.py +2 -1
- tol/core/data_object.py +27 -9
- tol/core/data_object_converter.py +37 -2
- tol/core/factory.py +51 -62
- tol/core/validate.py +1 -0
- tol/ena/client.py +61 -10
- tol/ena/ena_datasource.py +16 -10
- tol/ena/ena_methods.py +33 -32
- tol/ena/parser.py +15 -2
- tol/flows/converters/__init__.py +2 -0
- tol/flows/converters/incoming_sample_to_ena_sample_converter.py +130 -0
- tol/flows/converters/incoming_sample_to_incoming_sample_with_lists_converter.py +46 -0
- tol/s3/__init__.py +0 -1
- tol/sql/model.py +1 -1
- tol/sql/pipeline_step/factory.py +1 -1
- tol/sql/sql_converter.py +7 -1
- tol/validators/__init__.py +12 -1
- tol/validators/allowed_keys.py +17 -12
- tol/validators/allowed_values.py +21 -63
- tol/validators/allowed_values_from_datasource.py +89 -0
- tol/validators/assert_on_condition.py +56 -0
- tol/validators/ena_checklist.py +73 -0
- tol/validators/ena_submittable.py +61 -0
- tol/validators/interfaces/__init__.py +5 -0
- tol/validators/interfaces/condition_evaluator.py +102 -0
- tol/validators/min_one_valid_value.py +55 -0
- tol/validators/mutually_exclusive.py +111 -0
- tol/validators/regex.py +30 -23
- tol/validators/regex_by_value.py +33 -33
- tol/validators/specimens_have_same_taxon.py +60 -0
- tol/validators/sts_fields.py +88 -0
- tol/validators/tolid.py +110 -0
- tol/validators/unique_values.py +25 -17
- tol/validators/unique_whole_organisms.py +109 -0
- {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/METADATA +1 -1
- {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/RECORD +49 -36
- tol/s3/data_upload/__init__.py +0 -3
- {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/WHEEL +0 -0
- {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/entry_points.txt +0 -0
- {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/licenses/LICENSE +0 -0
- {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2025 Genome Research Ltd.
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
import re
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Iterable
|
|
7
|
+
|
|
8
|
+
from tol.core import DataObject, DataObjectToDataObjectOrUpdateConverter
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class IncomingSampleToEnaSampleConverter(DataObjectToDataObjectOrUpdateConverter):
|
|
12
|
+
|
|
13
|
+
@dataclass(slots=True, frozen=True, kw_only=True)
|
|
14
|
+
class Config:
|
|
15
|
+
ena_checklist_id: str
|
|
16
|
+
project_name: str
|
|
17
|
+
|
|
18
|
+
__slots__ = ['__config']
|
|
19
|
+
__config: Config
|
|
20
|
+
|
|
21
|
+
def __init__(self, data_object_factory, config: Config) -> None:
|
|
22
|
+
super().__init__(data_object_factory)
|
|
23
|
+
self.__config = config
|
|
24
|
+
self._data_object_factory = data_object_factory
|
|
25
|
+
|
|
26
|
+
def convert(self, data_object: DataObject) -> Iterable[DataObject]:
|
|
27
|
+
"""
|
|
28
|
+
converting the samples DataObject into ENA format
|
|
29
|
+
"""
|
|
30
|
+
s = data_object
|
|
31
|
+
attributes = {
|
|
32
|
+
'ENA-CHECKLIST': self.__config.ena_checklist_id,
|
|
33
|
+
'organism part': self.__replace_underscores(
|
|
34
|
+
s.attributes.get('ORGANISM_PART')),
|
|
35
|
+
'lifestage': (
|
|
36
|
+
'spore-bearing structure'
|
|
37
|
+
if s.attributes.get('LIFESTAGE') == 'SPORE_BEARING_STRUCTURE'
|
|
38
|
+
else self.__replace_underscores(
|
|
39
|
+
s.attributes.get('LIFESTAGE'))
|
|
40
|
+
),
|
|
41
|
+
'project name':
|
|
42
|
+
self.__config.project_name,
|
|
43
|
+
'collected by':
|
|
44
|
+
self.__replace_underscores(
|
|
45
|
+
s.attributes.get('COLLECTED_BY')),
|
|
46
|
+
'collection date':
|
|
47
|
+
self.__replace_underscores(
|
|
48
|
+
s.attributes.get('DATE_OF_COLLECTION')).lower(),
|
|
49
|
+
'geographic location (country and/or sea)':
|
|
50
|
+
self.__collection_country(s).replace('_', ' '),
|
|
51
|
+
'geographic location (latitude)':
|
|
52
|
+
self.__replace_underscores(
|
|
53
|
+
s.attributes.get('DECIMAL_LATITUDE')).lower(),
|
|
54
|
+
'geographic location (latitude) units':
|
|
55
|
+
'DD',
|
|
56
|
+
'geographic location (longitude)':
|
|
57
|
+
self.__replace_underscores(
|
|
58
|
+
s.attributes.get('DECIMAL_LONGITUDE')).lower(),
|
|
59
|
+
'geographic location (longitude) units':
|
|
60
|
+
'DD',
|
|
61
|
+
'geographic location (region and locality)':
|
|
62
|
+
self.__collection_region(s).replace('_', ' '),
|
|
63
|
+
'identified_by':
|
|
64
|
+
self.__replace_underscores(
|
|
65
|
+
s.attributes.get('IDENTIFIED_BY')),
|
|
66
|
+
'habitat':
|
|
67
|
+
self.__replace_underscores(
|
|
68
|
+
s.attributes.get('HABITAT')),
|
|
69
|
+
'identifier_affiliation':
|
|
70
|
+
self.__replace_underscores(
|
|
71
|
+
s.attributes.get('IDENTIFIER_AFFILIATION')),
|
|
72
|
+
'sex':
|
|
73
|
+
self.__replace_underscores(
|
|
74
|
+
s.attributes.get('SEX')),
|
|
75
|
+
'relationship':
|
|
76
|
+
self.__replace_underscores(
|
|
77
|
+
s.attributes.get('RELATIONSHIP')),
|
|
78
|
+
'SYMBIONT':
|
|
79
|
+
'Y' if s.attributes.get('SYMBIONT') == 'SYMBIONT' else 'N',
|
|
80
|
+
'collecting institution':
|
|
81
|
+
self.__replace_underscores(
|
|
82
|
+
s.attributes.get('COLLECTOR_AFFILIATION'))
|
|
83
|
+
}
|
|
84
|
+
if self.__sanitise(s.attributes.get('DEPTH')) != '':
|
|
85
|
+
attributes['geographic location (depth)'] = s.attributes.get('DEPTH')
|
|
86
|
+
attributes['geographic location (depth) units'] = 'm'
|
|
87
|
+
if self.__sanitise(s.attributes.get('ELEVATION')) != '':
|
|
88
|
+
attributes['geographic location (elevation)'] = s.attributes.get('ELEVATION')
|
|
89
|
+
attributes['geographic location (elevation) units'] = 'm'
|
|
90
|
+
if self.__sanitise(s.attributes.get('ORIGINAL_COLLECTION_DATE')) != '':
|
|
91
|
+
attributes['original collection date'] = s.attributes.get('ORIGINAL_COLLECTION_DATE')
|
|
92
|
+
if self.__sanitise(s.attributes.get('ORIGINAL_GEOGRAPHIC_LOCATION')) != '':
|
|
93
|
+
attributes['original geographic location'] = self.__replace_underscores(s.attributes.get('ORIGINAL_GEOGRAPHIC_LOCATION')) # noqa
|
|
94
|
+
if s.attributes.get('GAL') is not None:
|
|
95
|
+
attributes['GAL'] = s.attributes.get('GAL')
|
|
96
|
+
if s.attributes.get('VOUCHER_ID') is not None:
|
|
97
|
+
attributes['specimen_voucher'] = s.attributes.get('VOUCHER_ID')
|
|
98
|
+
if s.attributes.get('SPECIMEN_ID') is not None:
|
|
99
|
+
attributes['specimen_id'] = s.attributes.get('SPECIMEN_ID')
|
|
100
|
+
if s.attributes.get('GAL_SAMPLE_ID') is not None:
|
|
101
|
+
attributes['GAL_sample_id'] = s.attributes.get('GAL_SAMPLE_ID')
|
|
102
|
+
if s.attributes.get('CULTURE_OR_STRAIN_ID') is not None:
|
|
103
|
+
attributes['culture_or_strain_id'] = s.attributes.get('CULTURE_OR_STRAIN_ID')
|
|
104
|
+
|
|
105
|
+
ret = self._data_object_factory(
|
|
106
|
+
'sample',
|
|
107
|
+
s.id,
|
|
108
|
+
attributes=attributes,
|
|
109
|
+
)
|
|
110
|
+
yield ret
|
|
111
|
+
|
|
112
|
+
def __collection_country(self, data_object: DataObject):
|
|
113
|
+
return re.split(
|
|
114
|
+
r'\s*\|\s*',
|
|
115
|
+
data_object.attributes.get('COLLECTION_LOCATION'))[0]
|
|
116
|
+
|
|
117
|
+
def __collection_region(self, data_object: DataObject):
|
|
118
|
+
return ' | '.join(re.split(
|
|
119
|
+
r'\s*\|\s*',
|
|
120
|
+
data_object.attributes.get('COLLECTION_LOCATION'))[1:])
|
|
121
|
+
|
|
122
|
+
def __replace_underscores(self, value):
|
|
123
|
+
if type(value) != str:
|
|
124
|
+
return value
|
|
125
|
+
return self.__sanitise(value, '').replace('_', ' ')
|
|
126
|
+
|
|
127
|
+
def __sanitise(self, value, default_value=''):
|
|
128
|
+
if value is None:
|
|
129
|
+
return default_value
|
|
130
|
+
return value
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2025 Genome Research Ltd.
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Iterable
|
|
6
|
+
|
|
7
|
+
from tol.core import DataObject, DataObjectToDataObjectOrUpdateConverter
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class IncomingSampleToIncomingSampleWithListsConverter(DataObjectToDataObjectOrUpdateConverter):
|
|
11
|
+
|
|
12
|
+
@dataclass(slots=True, frozen=True, kw_only=True)
|
|
13
|
+
class Config:
|
|
14
|
+
fields_to_convert: str
|
|
15
|
+
separator: str = '|'
|
|
16
|
+
|
|
17
|
+
__slots__ = ['__config']
|
|
18
|
+
__config: Config
|
|
19
|
+
|
|
20
|
+
def __init__(self, data_object_factory, config: Config) -> None:
|
|
21
|
+
super().__init__(data_object_factory)
|
|
22
|
+
self.__config = config
|
|
23
|
+
self._data_object_factory = data_object_factory
|
|
24
|
+
|
|
25
|
+
def convert(self, data_object: DataObject) -> Iterable[DataObject]:
|
|
26
|
+
"""
|
|
27
|
+
converting the samples DataObject into ENA format
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
ret = self._data_object_factory(
|
|
31
|
+
data_object.type,
|
|
32
|
+
data_object.id,
|
|
33
|
+
attributes={
|
|
34
|
+
k: v for k, v in data_object.attributes.items()
|
|
35
|
+
if k not in self.__config.fields_to_convert
|
|
36
|
+
} | {
|
|
37
|
+
field: self.__convert_to_list(data_object.get_field_by_name(field))
|
|
38
|
+
for field in self.__config.fields_to_convert
|
|
39
|
+
}
|
|
40
|
+
)
|
|
41
|
+
yield ret
|
|
42
|
+
|
|
43
|
+
def __convert_to_list(self, value: str | None) -> list[str]:
|
|
44
|
+
if not value:
|
|
45
|
+
return []
|
|
46
|
+
return [item.strip() for item in value.split(self.__config.separator)]
|
tol/s3/__init__.py
CHANGED
tol/sql/model.py
CHANGED
|
@@ -255,7 +255,7 @@ def model_base() -> Type[DefaultModel]:
|
|
|
255
255
|
@property
|
|
256
256
|
def instance_to_many_relations(self) -> dict[str, Iterable[Model]]:
|
|
257
257
|
config = self.get_to_many_relationship_config()
|
|
258
|
-
return self.__get_attributes_map(config
|
|
258
|
+
return self.__get_attributes_map(config)
|
|
259
259
|
|
|
260
260
|
@property
|
|
261
261
|
def instance_id(self) -> Optional[str]:
|
tol/sql/pipeline_step/factory.py
CHANGED
|
@@ -140,7 +140,7 @@ def create_pipeline_step_models(
|
|
|
140
140
|
autoincrement=True
|
|
141
141
|
)
|
|
142
142
|
|
|
143
|
-
|
|
143
|
+
s3_bucket: Mapped[str] = mapped_column(nullable=False,)
|
|
144
144
|
s3_filename: Mapped[str] = mapped_column(nullable=False)
|
|
145
145
|
spreadsheet_config: Mapped[str] = mapped_column(nullable=True)
|
|
146
146
|
|
tol/sql/sql_converter.py
CHANGED
|
@@ -83,7 +83,13 @@ class DefaultModelConverter(ModelConverter):
|
|
|
83
83
|
return to_ones if to_ones else None
|
|
84
84
|
|
|
85
85
|
def __convert_to_many_requested(self, model, tree):
|
|
86
|
-
|
|
86
|
+
to_manys = {}
|
|
87
|
+
for rel_name in model.get_to_many_relationship_config():
|
|
88
|
+
if sub_tree := tree.get_sub_tree(rel_name):
|
|
89
|
+
to_manys[rel_name] = [
|
|
90
|
+
self.__convert_requested(x, sub_tree) for x in getattr(model, rel_name)
|
|
91
|
+
]
|
|
92
|
+
return to_manys if to_manys else None
|
|
87
93
|
|
|
88
94
|
|
|
89
95
|
class DataObjectConverter(Converter[DataObject, Model], ABC):
|
tol/validators/__init__.py
CHANGED
|
@@ -2,8 +2,19 @@
|
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: MIT
|
|
4
4
|
|
|
5
|
-
from .allowed_values import AllowedValues, AllowedValuesValidator # noqa
|
|
6
5
|
from .allowed_keys import AllowedKeysValidator # noqa
|
|
6
|
+
from .allowed_values import AllowedValuesValidator # noqa
|
|
7
|
+
from .allowed_values_from_datasource import AllowedValuesFromDataSourceValidator # noqa
|
|
8
|
+
from .assert_on_condition import AssertOnConditionValidator # noqa
|
|
9
|
+
from .ena_checklist import EnaChecklistValidator # noqa
|
|
10
|
+
from .mutually_exclusive import MutuallyExclusiveValidator # noqa
|
|
11
|
+
from .ena_submittable import EnaSubmittableValidator # noqa
|
|
7
12
|
from .regex import Regex, RegexValidator # noqa
|
|
8
13
|
from .regex_by_value import RegexByValueValidator # noqa
|
|
14
|
+
from .specimens_have_same_taxon import SpecimensHaveSameTaxonValidator # noqa
|
|
15
|
+
from .sts_fields import StsFieldsValidator # noqa
|
|
16
|
+
from .tolid import TolidValidator # noqa
|
|
9
17
|
from .unique_values import UniqueValuesValidator # noqa
|
|
18
|
+
from .unique_whole_organisms import UniqueWholeOrganismsValidator # noqa
|
|
19
|
+
from .interfaces import Condition # noqa
|
|
20
|
+
from .min_one_valid_value import MinOneValidValueValidator # noqa
|
tol/validators/allowed_keys.py
CHANGED
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: MIT
|
|
4
4
|
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import List
|
|
7
|
+
|
|
5
8
|
from tol.core import DataObject
|
|
6
9
|
from tol.core.validate import Validator
|
|
7
10
|
|
|
@@ -12,20 +15,22 @@ class AllowedKeysValidator(Validator):
|
|
|
12
15
|
ensuring that they only have attributes of the given
|
|
13
16
|
allowed keys.
|
|
14
17
|
"""
|
|
18
|
+
@dataclass(slots=True, frozen=True, kw_only=True)
|
|
19
|
+
class Config:
|
|
20
|
+
allowed_keys: List[str]
|
|
21
|
+
is_error: bool = True
|
|
22
|
+
detail: str = 'Key is not allowed'
|
|
23
|
+
|
|
24
|
+
__slots__ = ['__config']
|
|
25
|
+
__config: Config
|
|
15
26
|
|
|
16
27
|
def __init__(
|
|
17
28
|
self,
|
|
18
|
-
|
|
19
|
-
*,
|
|
20
|
-
is_error: bool = True,
|
|
21
|
-
detail: str = 'Key is not allowed'
|
|
29
|
+
config: Config
|
|
22
30
|
) -> None:
|
|
23
31
|
|
|
24
32
|
super().__init__()
|
|
25
|
-
|
|
26
|
-
self.__keys = allowed_keys
|
|
27
|
-
self.__is_error = is_error
|
|
28
|
-
self.__detail = detail
|
|
33
|
+
self.__config = config
|
|
29
34
|
|
|
30
35
|
def _validate_data_object(
|
|
31
36
|
self,
|
|
@@ -33,7 +38,7 @@ class AllowedKeysValidator(Validator):
|
|
|
33
38
|
) -> None:
|
|
34
39
|
|
|
35
40
|
for key in obj.attributes:
|
|
36
|
-
if key not in self.
|
|
41
|
+
if key not in self.__config.allowed_keys:
|
|
37
42
|
self.__add_result(
|
|
38
43
|
obj,
|
|
39
44
|
key,
|
|
@@ -45,15 +50,15 @@ class AllowedKeysValidator(Validator):
|
|
|
45
50
|
key: str,
|
|
46
51
|
) -> None:
|
|
47
52
|
|
|
48
|
-
if self.
|
|
53
|
+
if self.__config.is_error:
|
|
49
54
|
self.add_error(
|
|
50
55
|
object_id=obj.id,
|
|
51
|
-
detail=self.
|
|
56
|
+
detail=self.__config.detail,
|
|
52
57
|
field=key,
|
|
53
58
|
)
|
|
54
59
|
else:
|
|
55
60
|
self.add_warning(
|
|
56
61
|
object_id=obj.id,
|
|
57
|
-
detail=self.
|
|
62
|
+
detail=self.__config.detail,
|
|
58
63
|
field=key,
|
|
59
64
|
)
|
tol/validators/allowed_values.py
CHANGED
|
@@ -3,103 +3,61 @@
|
|
|
3
3
|
# SPDX-License-Identifier: MIT
|
|
4
4
|
|
|
5
5
|
from dataclasses import dataclass
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any, List
|
|
7
7
|
|
|
8
8
|
from tol.core import DataObject
|
|
9
9
|
from tol.core.validate import Validator
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
@dataclass(frozen=True, kw_only=True)
|
|
13
|
-
class AllowedValues:
|
|
14
|
-
key: str
|
|
15
|
-
values: list[Any]
|
|
16
|
-
|
|
17
|
-
is_error: bool = True
|
|
18
|
-
detail: str = 'Value is not allowed for given key'
|
|
19
|
-
|
|
20
|
-
def is_allowed(self, __v: Any) -> bool:
|
|
21
|
-
return __v in self.values
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
AllowedValuesDict = dict[
|
|
25
|
-
str,
|
|
26
|
-
str | bool | list[Any],
|
|
27
|
-
]
|
|
28
|
-
"""Can also specify `AllowedValues` as a `dict`"""
|
|
29
|
-
|
|
30
|
-
|
|
31
12
|
class AllowedValuesValidator(Validator):
|
|
32
13
|
"""
|
|
33
14
|
Validates an incoming stream of `DataObject` instances
|
|
34
15
|
according to the specified allowed values for a given
|
|
35
16
|
key.
|
|
36
17
|
"""
|
|
18
|
+
@dataclass(slots=True, frozen=True, kw_only=True)
|
|
19
|
+
class Config:
|
|
20
|
+
field: str
|
|
21
|
+
allowed_values: List[Any]
|
|
22
|
+
is_error: bool = True
|
|
23
|
+
detail: str = 'Value is not allowed for the given key'
|
|
24
|
+
|
|
25
|
+
__slots__ = ['__config']
|
|
26
|
+
__config: Config
|
|
37
27
|
|
|
38
28
|
def __init__(
|
|
39
29
|
self,
|
|
40
|
-
config:
|
|
30
|
+
config: Config
|
|
41
31
|
) -> None:
|
|
42
32
|
|
|
43
33
|
super().__init__()
|
|
44
34
|
|
|
45
|
-
self.__config =
|
|
35
|
+
self.__config = config
|
|
46
36
|
|
|
47
37
|
def _validate_data_object(
|
|
48
38
|
self,
|
|
49
39
|
obj: DataObject
|
|
50
40
|
) -> None:
|
|
51
41
|
|
|
52
|
-
for
|
|
53
|
-
self.
|
|
54
|
-
|
|
55
|
-
def __get_config(
|
|
56
|
-
self,
|
|
57
|
-
config: list[AllowedValues | AllowedValuesDict],
|
|
58
|
-
) -> list[AllowedValues]:
|
|
59
|
-
|
|
60
|
-
return [
|
|
61
|
-
c if isinstance(c, AllowedValues) else AllowedValues(**c)
|
|
62
|
-
for c in config
|
|
63
|
-
]
|
|
64
|
-
|
|
65
|
-
def __validate_attribute(
|
|
66
|
-
self,
|
|
67
|
-
obj: DataObject,
|
|
68
|
-
key: str,
|
|
69
|
-
value: Any,
|
|
70
|
-
) -> None:
|
|
71
|
-
|
|
72
|
-
config = self.__filter_config(key)
|
|
73
|
-
|
|
74
|
-
for c in config:
|
|
75
|
-
if not c.is_allowed(value):
|
|
76
|
-
self.__add_result(obj, c)
|
|
77
|
-
|
|
78
|
-
def __filter_config(
|
|
79
|
-
self,
|
|
80
|
-
key: str,
|
|
81
|
-
) -> list[AllowedValues]:
|
|
82
|
-
|
|
83
|
-
return [
|
|
84
|
-
a for a in self.__config
|
|
85
|
-
if a.key == key
|
|
86
|
-
]
|
|
42
|
+
for key, value in obj.attributes.items():
|
|
43
|
+
if key == self.__config.field and value not in self.__config.allowed_values:
|
|
44
|
+
self.__add_result(obj, key)
|
|
87
45
|
|
|
88
46
|
def __add_result(
|
|
89
47
|
self,
|
|
90
48
|
obj: DataObject,
|
|
91
|
-
|
|
49
|
+
key: str,
|
|
92
50
|
) -> None:
|
|
93
51
|
|
|
94
|
-
if
|
|
52
|
+
if self.__config.is_error:
|
|
95
53
|
self.add_error(
|
|
96
54
|
object_id=obj.id,
|
|
97
|
-
detail=
|
|
98
|
-
field=
|
|
55
|
+
detail=self.__config.detail,
|
|
56
|
+
field=key
|
|
99
57
|
)
|
|
100
58
|
else:
|
|
101
59
|
self.add_warning(
|
|
102
60
|
object_id=obj.id,
|
|
103
|
-
detail=
|
|
104
|
-
field=
|
|
61
|
+
detail=self.__config.detail,
|
|
62
|
+
field=key,
|
|
105
63
|
)
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2025 Genome Research Ltd.
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import List
|
|
7
|
+
|
|
8
|
+
from tol.core import DataObject, DataSourceUtils
|
|
9
|
+
from tol.core.validate import Validator
|
|
10
|
+
from tol.sources.portaldb import portaldb
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AllowedValuesFromDataSourceValidator(Validator):
|
|
14
|
+
"""
|
|
15
|
+
Validates that a stream of `DataObject` instances
|
|
16
|
+
contains field that is part of a list.
|
|
17
|
+
"""
|
|
18
|
+
@dataclass(slots=True, frozen=True, kw_only=True)
|
|
19
|
+
class Config:
|
|
20
|
+
datasource_instance_id: int
|
|
21
|
+
datasource_object_type: str
|
|
22
|
+
datasource_field_name: str
|
|
23
|
+
field_name: str
|
|
24
|
+
|
|
25
|
+
__slots__ = ['__config', '__cached_list']
|
|
26
|
+
__config: Config
|
|
27
|
+
__cached_list: List[str | int | float]
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
config: Config,
|
|
32
|
+
allowed_values: List[str | int | float] | None = None # For testing
|
|
33
|
+
) -> None:
|
|
34
|
+
|
|
35
|
+
super().__init__()
|
|
36
|
+
|
|
37
|
+
self.__config = config
|
|
38
|
+
self.__cached_list = allowed_values \
|
|
39
|
+
or self.__initialize_list_from_datasource()
|
|
40
|
+
|
|
41
|
+
def __initialize_list_from_datasource(self) -> List[str | int | float]:
|
|
42
|
+
dsi = portaldb().get_one('data_source_instance', self.__config.datasource_instance_id)
|
|
43
|
+
ds = DataSourceUtils.get_data_source_by_data_source_instance(dsi)
|
|
44
|
+
return [
|
|
45
|
+
obj.get_field_by_name(
|
|
46
|
+
self.__config.datasource_field_name
|
|
47
|
+
) for obj in ds.get_list(
|
|
48
|
+
object_type=self.__config.datasource_object_type
|
|
49
|
+
)
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
def _validate_data_object(
|
|
53
|
+
self,
|
|
54
|
+
obj: DataObject
|
|
55
|
+
) -> None:
|
|
56
|
+
field_value = obj.get_field_by_name(self.__config.field_name)
|
|
57
|
+
if not field_value:
|
|
58
|
+
return
|
|
59
|
+
if not isinstance(field_value, list):
|
|
60
|
+
field_value = [field_value]
|
|
61
|
+
if any(value not in self.__cached_list for value in field_value):
|
|
62
|
+
multiple_cached_values = len(self.__cached_list) > 1
|
|
63
|
+
|
|
64
|
+
cached_list_str = ''
|
|
65
|
+
if multiple_cached_values:
|
|
66
|
+
for index, field in enumerate(self.__cached_list):
|
|
67
|
+
if index == 0:
|
|
68
|
+
# First item in the list
|
|
69
|
+
cached_list_str += f'{field}'
|
|
70
|
+
elif index == len(self.__cached_list) - 1:
|
|
71
|
+
# Last item in the list
|
|
72
|
+
cached_list_str += f' or {field}'
|
|
73
|
+
else:
|
|
74
|
+
# Middle items
|
|
75
|
+
cached_list_str += f', {field}'
|
|
76
|
+
else: # Only one field
|
|
77
|
+
cached_list_str = self.__cached_list[0]
|
|
78
|
+
|
|
79
|
+
# This is extracted rather than being evaluated in the f-string
|
|
80
|
+
# because otherwise the linter doesn't like it!
|
|
81
|
+
pluralisation = ' one of' if multiple_cached_values else ''
|
|
82
|
+
|
|
83
|
+
self.add_error(
|
|
84
|
+
object_id=obj.id,
|
|
85
|
+
detail=f'The value of the field {self.__config.field_name} '
|
|
86
|
+
f'must be{pluralisation} {cached_list_str} '
|
|
87
|
+
f'(found value {field_value})',
|
|
88
|
+
field=self.__config.field_name,
|
|
89
|
+
)
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2025 Genome Research Ltd.
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import List
|
|
7
|
+
|
|
8
|
+
from tol.core import DataObject, Validator
|
|
9
|
+
|
|
10
|
+
from .interfaces import Condition, ConditionDict, ConditionEvaluator
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AssertOnConditionValidator(Validator, ConditionEvaluator):
|
|
14
|
+
"""
|
|
15
|
+
Validates an incoming stream of `DataObject` instances,
|
|
16
|
+
using a condition to check a specific attrbiute. If this
|
|
17
|
+
condition passes, then the assertions will be run, which must
|
|
18
|
+
all pass.
|
|
19
|
+
"""
|
|
20
|
+
@dataclass(slots=True, frozen=True, kw_only=True)
|
|
21
|
+
class Config:
|
|
22
|
+
condition: ConditionDict
|
|
23
|
+
assertions: List[ConditionDict]
|
|
24
|
+
|
|
25
|
+
__slots__ = ['__config']
|
|
26
|
+
__config: Config
|
|
27
|
+
|
|
28
|
+
def __init__(self, config: Config) -> None:
|
|
29
|
+
super().__init__()
|
|
30
|
+
|
|
31
|
+
self.__config = config
|
|
32
|
+
|
|
33
|
+
def _validate_data_object(self, obj: DataObject) -> None:
|
|
34
|
+
# Check condition atribute
|
|
35
|
+
# (only perform the assertions if the condition passes)
|
|
36
|
+
if self._does_condition_pass(Condition.from_dict(self.__config.condition), obj):
|
|
37
|
+
# Perform each assertion
|
|
38
|
+
for assertion in self.__config.assertions:
|
|
39
|
+
self.__perform_assertion(obj, Condition.from_dict(assertion))
|
|
40
|
+
|
|
41
|
+
def __perform_assertion(self, obj: DataObject, assertion: Condition) -> None:
|
|
42
|
+
# There's only an error or warning if the assertion condition fails
|
|
43
|
+
condition_passed, found_value = self._evaluate_condition(assertion, obj)
|
|
44
|
+
if not condition_passed:
|
|
45
|
+
if assertion.is_error:
|
|
46
|
+
self.add_error(
|
|
47
|
+
object_id=obj.id,
|
|
48
|
+
detail=f'Expected {assertion} (found value {found_value})',
|
|
49
|
+
field=assertion.field,
|
|
50
|
+
)
|
|
51
|
+
else:
|
|
52
|
+
self.add_warning(
|
|
53
|
+
object_id=obj.id,
|
|
54
|
+
detail=f'Expected {assertion} (found value {found_value})',
|
|
55
|
+
field=assertion.field,
|
|
56
|
+
)
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2025 Genome Research Ltd.
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
from tol.core import DataSource
|
|
9
|
+
from tol.core.data_object import DataObject
|
|
10
|
+
from tol.core.validate import Validator
|
|
11
|
+
from tol.sources.ena import ena
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class EnaChecklistValidator(Validator):
|
|
15
|
+
"""
|
|
16
|
+
validates the ENA_CHECKLIST for each samples
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
@dataclass(slots=True, frozen=True, kw_only=True)
|
|
20
|
+
class Config:
|
|
21
|
+
ena_checklist_id: str
|
|
22
|
+
|
|
23
|
+
__slots__ = ['__config']
|
|
24
|
+
__config: Config
|
|
25
|
+
|
|
26
|
+
def __init__(self, config: Config, datasource: DataSource = ena()) -> None:
|
|
27
|
+
super().__init__()
|
|
28
|
+
self.__config = config
|
|
29
|
+
self._datasource = datasource
|
|
30
|
+
|
|
31
|
+
def _validate_data_object(self, obj: DataObject) -> None:
|
|
32
|
+
ena_datasource = self._datasource
|
|
33
|
+
ena_checklist = ena_datasource.get_one('checklist', self.__config.ena_checklist_id)
|
|
34
|
+
|
|
35
|
+
validations = ena_checklist.attributes['checklist']
|
|
36
|
+
for key in validations:
|
|
37
|
+
field_name = key
|
|
38
|
+
if 'field' in validations[key]:
|
|
39
|
+
field_name = validations[key]['field']
|
|
40
|
+
if 'mandatory' in validations[key] and key not in obj.attributes:
|
|
41
|
+
self.add_error(object_id=obj.id, detail='Must be given', field=[field_name])
|
|
42
|
+
continue
|
|
43
|
+
if 'mandatory' in validations[key] and obj.attributes[key] is None:
|
|
44
|
+
self.add_error(object_id=obj.id, detail='Must be given', field=[field_name])
|
|
45
|
+
continue
|
|
46
|
+
if 'mandatory' in validations[key] and obj.attributes.get(key) == '':
|
|
47
|
+
self.add_error(
|
|
48
|
+
object_id=obj.id,
|
|
49
|
+
detail='Must not be empty', field=[field_name]
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
if 'restricted text' in validations[key] and key in obj.attributes:
|
|
53
|
+
for condition in validations[key]:
|
|
54
|
+
if type(condition) == str and '(' in condition:
|
|
55
|
+
regex = condition
|
|
56
|
+
compiled_re = re.compile(regex)
|
|
57
|
+
if not compiled_re.search(obj.attributes.get(key)):
|
|
58
|
+
self.add_error(
|
|
59
|
+
object_id=obj.id,
|
|
60
|
+
detail='Must match specific pattern', field=[field_name]
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Check against allowed values
|
|
64
|
+
if 'text choice' in validations[key] and key in obj.attributes:
|
|
65
|
+
for condition in validations[key]:
|
|
66
|
+
if type(condition) == list:
|
|
67
|
+
allowed_values = condition
|
|
68
|
+
if obj.attributes.get(key).lower() not in \
|
|
69
|
+
[x.lower() for x in allowed_values]:
|
|
70
|
+
self.add_error(
|
|
71
|
+
object_id=obj.id,
|
|
72
|
+
detail='Must be in allowed values', field=[field_name]
|
|
73
|
+
)
|