tol-sdk 1.7.5b0__py3-none-any.whl → 1.7.5b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tol/ena/client.py +3 -2
- tol/flows/converters/__init__.py +2 -0
- tol/flows/converters/incoming_sample_to_ena_sample_converter.py +130 -0
- tol/flows/converters/incoming_sample_to_incoming_sample_with_lists_converter.py +46 -0
- tol/validators/__init__.py +1 -1
- tol/validators/allowed_values_from_datasource.py +7 -9
- tol/validators/assert_on_condition.py +5 -5
- tol/validators/ena_checklist.py +73 -0
- tol/validators/interfaces/__init__.py +1 -1
- tol/validators/interfaces/condition_evaluator.py +42 -1
- tol/validators/mutually_exclusive.py +11 -7
- tol/validators/regex.py +23 -2
- tol/validators/regex_by_value.py +22 -3
- tol/validators/tolid.py +1 -1
- {tol_sdk-1.7.5b0.dist-info → tol_sdk-1.7.5b2.dist-info}/METADATA +1 -1
- {tol_sdk-1.7.5b0.dist-info → tol_sdk-1.7.5b2.dist-info}/RECORD +20 -17
- {tol_sdk-1.7.5b0.dist-info → tol_sdk-1.7.5b2.dist-info}/WHEEL +0 -0
- {tol_sdk-1.7.5b0.dist-info → tol_sdk-1.7.5b2.dist-info}/entry_points.txt +0 -0
- {tol_sdk-1.7.5b0.dist-info → tol_sdk-1.7.5b2.dist-info}/licenses/LICENSE +0 -0
- {tol_sdk-1.7.5b0.dist-info → tol_sdk-1.7.5b2.dist-info}/top_level.txt +0 -0
tol/ena/client.py
CHANGED
|
@@ -116,12 +116,13 @@ class EnaApiClient(HttpClient):
|
|
|
116
116
|
Returns the URL and parameters for a detail query.
|
|
117
117
|
"""
|
|
118
118
|
if object_type == 'checklist':
|
|
119
|
-
ids = ','.join(object_ids)
|
|
119
|
+
ids = ','.join(str(id_) for id_ in object_ids)
|
|
120
120
|
url = f'{self.__ena_url}/ena/browser/api/xml/{ids}'
|
|
121
121
|
params = {}
|
|
122
122
|
return url, params
|
|
123
123
|
if object_type == 'submittable_taxon':
|
|
124
|
-
|
|
124
|
+
# This is actually called separately for each taxon id
|
|
125
|
+
ids = ','.join(str(id_) for id_ in object_ids)
|
|
125
126
|
url = f'{self.__ena_url}/ena/taxonomy/rest/tax-id/{ids}'
|
|
126
127
|
params = {}
|
|
127
128
|
return url, params
|
tol/flows/converters/__init__.py
CHANGED
|
@@ -34,6 +34,8 @@ from .gap_assembly_to_elastic_assembly_analysis_converter import GapAssemblyToEl
|
|
|
34
34
|
from .genome_notes_genome_note_to_elastic_genome_note_converter import GenomeNotesGenomeNoteToElasticGenomeNoteConverter # noqa F401
|
|
35
35
|
from .goat_taxon_to_elastic_species_converter import GoatTaxonToElasticSpeciesConverter # noqa F401
|
|
36
36
|
from .grit_issue_to_elastic_curation_converter import GritIssueToElasticCurationConverter # noqa F401
|
|
37
|
+
from .incoming_sample_to_ena_sample_converter import IncomingSampleToEnaSampleConverter # noqa
|
|
38
|
+
from .incoming_sample_to_incoming_sample_with_lists_converter import IncomingSampleToIncomingSampleWithListsConverter # noqa F401
|
|
37
39
|
from .informatics_tolid_to_elastic_tolid_converter import InformaticsTolidToElasticTolidConverter # noqa F401
|
|
38
40
|
from .labwhere_location_to_elastic_sample_update_converter import LabwhereLocationToElasticSampleUpdateConverter # noqa F401
|
|
39
41
|
from .labwhere_location_to_sts_tray_converter import LabwhereLocationToStsTrayConverter # noqa F401
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2025 Genome Research Ltd.
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
import re
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Iterable
|
|
7
|
+
|
|
8
|
+
from tol.core import DataObject, DataObjectToDataObjectOrUpdateConverter
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class IncomingSampleToEnaSampleConverter(DataObjectToDataObjectOrUpdateConverter):
|
|
12
|
+
|
|
13
|
+
@dataclass(slots=True, frozen=True, kw_only=True)
|
|
14
|
+
class Config:
|
|
15
|
+
ena_checklist_id: str
|
|
16
|
+
project_name: str
|
|
17
|
+
|
|
18
|
+
__slots__ = ['__config']
|
|
19
|
+
__config: Config
|
|
20
|
+
|
|
21
|
+
def __init__(self, data_object_factory, config: Config) -> None:
|
|
22
|
+
super().__init__(data_object_factory)
|
|
23
|
+
self.__config = config
|
|
24
|
+
self._data_object_factory = data_object_factory
|
|
25
|
+
|
|
26
|
+
def convert(self, data_object: DataObject) -> Iterable[DataObject]:
|
|
27
|
+
"""
|
|
28
|
+
converting the samples DataObject into ENA format
|
|
29
|
+
"""
|
|
30
|
+
s = data_object
|
|
31
|
+
attributes = {
|
|
32
|
+
'ENA-CHECKLIST': self.__config.ena_checklist_id,
|
|
33
|
+
'organism part': self.__replace_underscores(
|
|
34
|
+
s.attributes.get('ORGANISM_PART')),
|
|
35
|
+
'lifestage': (
|
|
36
|
+
'spore-bearing structure'
|
|
37
|
+
if s.attributes.get('LIFESTAGE') == 'SPORE_BEARING_STRUCTURE'
|
|
38
|
+
else self.__replace_underscores(
|
|
39
|
+
s.attributes.get('LIFESTAGE'))
|
|
40
|
+
),
|
|
41
|
+
'project name':
|
|
42
|
+
self.__config.project_name,
|
|
43
|
+
'collected by':
|
|
44
|
+
self.__replace_underscores(
|
|
45
|
+
s.attributes.get('COLLECTED_BY')),
|
|
46
|
+
'collection date':
|
|
47
|
+
self.__replace_underscores(
|
|
48
|
+
s.attributes.get('DATE_OF_COLLECTION')).lower(),
|
|
49
|
+
'geographic location (country and/or sea)':
|
|
50
|
+
self.__collection_country(s).replace('_', ' '),
|
|
51
|
+
'geographic location (latitude)':
|
|
52
|
+
self.__replace_underscores(
|
|
53
|
+
s.attributes.get('DECIMAL_LATITUDE')).lower(),
|
|
54
|
+
'geographic location (latitude) units':
|
|
55
|
+
'DD',
|
|
56
|
+
'geographic location (longitude)':
|
|
57
|
+
self.__replace_underscores(
|
|
58
|
+
s.attributes.get('DECIMAL_LONGITUDE')).lower(),
|
|
59
|
+
'geographic location (longitude) units':
|
|
60
|
+
'DD',
|
|
61
|
+
'geographic location (region and locality)':
|
|
62
|
+
self.__collection_region(s).replace('_', ' '),
|
|
63
|
+
'identified_by':
|
|
64
|
+
self.__replace_underscores(
|
|
65
|
+
s.attributes.get('IDENTIFIED_BY')),
|
|
66
|
+
'habitat':
|
|
67
|
+
self.__replace_underscores(
|
|
68
|
+
s.attributes.get('HABITAT')),
|
|
69
|
+
'identifier_affiliation':
|
|
70
|
+
self.__replace_underscores(
|
|
71
|
+
s.attributes.get('IDENTIFIER_AFFILIATION')),
|
|
72
|
+
'sex':
|
|
73
|
+
self.__replace_underscores(
|
|
74
|
+
s.attributes.get('SEX')),
|
|
75
|
+
'relationship':
|
|
76
|
+
self.__replace_underscores(
|
|
77
|
+
s.attributes.get('RELATIONSHIP')),
|
|
78
|
+
'SYMBIONT':
|
|
79
|
+
'Y' if s.attributes.get('SYMBIONT') == 'SYMBIONT' else 'N',
|
|
80
|
+
'collecting institution':
|
|
81
|
+
self.__replace_underscores(
|
|
82
|
+
s.attributes.get('COLLECTOR_AFFILIATION'))
|
|
83
|
+
}
|
|
84
|
+
if self.__sanitise(s.attributes.get('DEPTH')) != '':
|
|
85
|
+
attributes['geographic location (depth)'] = s.attributes.get('DEPTH')
|
|
86
|
+
attributes['geographic location (depth) units'] = 'm'
|
|
87
|
+
if self.__sanitise(s.attributes.get('ELEVATION')) != '':
|
|
88
|
+
attributes['geographic location (elevation)'] = s.attributes.get('ELEVATION')
|
|
89
|
+
attributes['geographic location (elevation) units'] = 'm'
|
|
90
|
+
if self.__sanitise(s.attributes.get('ORIGINAL_COLLECTION_DATE')) != '':
|
|
91
|
+
attributes['original collection date'] = s.attributes.get('ORIGINAL_COLLECTION_DATE')
|
|
92
|
+
if self.__sanitise(s.attributes.get('ORIGINAL_GEOGRAPHIC_LOCATION')) != '':
|
|
93
|
+
attributes['original geographic location'] = self.__replace_underscores(s.attributes.get('ORIGINAL_GEOGRAPHIC_LOCATION')) # noqa
|
|
94
|
+
if s.attributes.get('GAL') is not None:
|
|
95
|
+
attributes['GAL'] = s.attributes.get('GAL')
|
|
96
|
+
if s.attributes.get('VOUCHER_ID') is not None:
|
|
97
|
+
attributes['specimen_voucher'] = s.attributes.get('VOUCHER_ID')
|
|
98
|
+
if s.attributes.get('SPECIMEN_ID') is not None:
|
|
99
|
+
attributes['specimen_id'] = s.attributes.get('SPECIMEN_ID')
|
|
100
|
+
if s.attributes.get('GAL_SAMPLE_ID') is not None:
|
|
101
|
+
attributes['GAL_sample_id'] = s.attributes.get('GAL_SAMPLE_ID')
|
|
102
|
+
if s.attributes.get('CULTURE_OR_STRAIN_ID') is not None:
|
|
103
|
+
attributes['culture_or_strain_id'] = s.attributes.get('CULTURE_OR_STRAIN_ID')
|
|
104
|
+
|
|
105
|
+
ret = self._data_object_factory(
|
|
106
|
+
'sample',
|
|
107
|
+
s.id,
|
|
108
|
+
attributes=attributes,
|
|
109
|
+
)
|
|
110
|
+
yield ret
|
|
111
|
+
|
|
112
|
+
def __collection_country(self, data_object: DataObject):
|
|
113
|
+
return re.split(
|
|
114
|
+
r'\s*\|\s*',
|
|
115
|
+
data_object.attributes.get('COLLECTION_LOCATION'))[0]
|
|
116
|
+
|
|
117
|
+
def __collection_region(self, data_object: DataObject):
|
|
118
|
+
return ' | '.join(re.split(
|
|
119
|
+
r'\s*\|\s*',
|
|
120
|
+
data_object.attributes.get('COLLECTION_LOCATION'))[1:])
|
|
121
|
+
|
|
122
|
+
def __replace_underscores(self, value):
|
|
123
|
+
if type(value) != str:
|
|
124
|
+
return value
|
|
125
|
+
return self.__sanitise(value, '').replace('_', ' ')
|
|
126
|
+
|
|
127
|
+
def __sanitise(self, value, default_value=''):
|
|
128
|
+
if value is None:
|
|
129
|
+
return default_value
|
|
130
|
+
return value
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2025 Genome Research Ltd.
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Iterable
|
|
6
|
+
|
|
7
|
+
from tol.core import DataObject, DataObjectToDataObjectOrUpdateConverter
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class IncomingSampleToIncomingSampleWithListsConverter(DataObjectToDataObjectOrUpdateConverter):
|
|
11
|
+
|
|
12
|
+
@dataclass(slots=True, frozen=True, kw_only=True)
|
|
13
|
+
class Config:
|
|
14
|
+
fields_to_convert: str
|
|
15
|
+
separator: str = '|'
|
|
16
|
+
|
|
17
|
+
__slots__ = ['__config']
|
|
18
|
+
__config: Config
|
|
19
|
+
|
|
20
|
+
def __init__(self, data_object_factory, config: Config) -> None:
|
|
21
|
+
super().__init__(data_object_factory)
|
|
22
|
+
self.__config = config
|
|
23
|
+
self._data_object_factory = data_object_factory
|
|
24
|
+
|
|
25
|
+
def convert(self, data_object: DataObject) -> Iterable[DataObject]:
|
|
26
|
+
"""
|
|
27
|
+
converting the samples DataObject into ENA format
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
ret = self._data_object_factory(
|
|
31
|
+
data_object.type,
|
|
32
|
+
data_object.id,
|
|
33
|
+
attributes={
|
|
34
|
+
k: v for k, v in data_object.attributes.items()
|
|
35
|
+
if k not in self.__config.fields_to_convert
|
|
36
|
+
} | {
|
|
37
|
+
field: self.__convert_to_list(data_object.get_field_by_name(field))
|
|
38
|
+
for field in self.__config.fields_to_convert
|
|
39
|
+
}
|
|
40
|
+
)
|
|
41
|
+
yield ret
|
|
42
|
+
|
|
43
|
+
def __convert_to_list(self, value: str | None) -> list[str]:
|
|
44
|
+
if not value:
|
|
45
|
+
return []
|
|
46
|
+
return [item.strip() for item in value.split(self.__config.separator)]
|
tol/validators/__init__.py
CHANGED
|
@@ -6,6 +6,7 @@ from .allowed_keys import AllowedKeysValidator # noqa
|
|
|
6
6
|
from .allowed_values import AllowedValuesValidator # noqa
|
|
7
7
|
from .allowed_values_from_datasource import AllowedValuesFromDataSourceValidator # noqa
|
|
8
8
|
from .assert_on_condition import AssertOnConditionValidator # noqa
|
|
9
|
+
from .ena_checklist import EnaChecklistValidator # noqa
|
|
9
10
|
from .mutually_exclusive import MutuallyExclusiveValidator # noqa
|
|
10
11
|
from .ena_submittable import EnaSubmittableValidator # noqa
|
|
11
12
|
from .regex import Regex, RegexValidator # noqa
|
|
@@ -15,6 +16,5 @@ from .sts_fields import StsFieldsValidator # noqa
|
|
|
15
16
|
from .tolid import TolidValidator # noqa
|
|
16
17
|
from .unique_values import UniqueValuesValidator # noqa
|
|
17
18
|
from .unique_whole_organisms import UniqueWholeOrganismsValidator # noqa
|
|
18
|
-
# Conditions are used where validators are defined, not just within validators
|
|
19
19
|
from .interfaces import Condition # noqa
|
|
20
20
|
from .min_one_valid_value import MinOneValidValueValidator # noqa
|
|
@@ -17,7 +17,6 @@ class AllowedValuesFromDataSourceValidator(Validator):
|
|
|
17
17
|
"""
|
|
18
18
|
@dataclass(slots=True, frozen=True, kw_only=True)
|
|
19
19
|
class Config:
|
|
20
|
-
allowed_values: List[str | int | float] | None = None
|
|
21
20
|
datasource_instance_id: int
|
|
22
21
|
datasource_object_type: str
|
|
23
22
|
datasource_field_name: str
|
|
@@ -30,19 +29,15 @@ class AllowedValuesFromDataSourceValidator(Validator):
|
|
|
30
29
|
def __init__(
|
|
31
30
|
self,
|
|
32
31
|
config: Config,
|
|
32
|
+
allowed_values: List[str | int | float] | None = None # For testing
|
|
33
33
|
) -> None:
|
|
34
34
|
|
|
35
35
|
super().__init__()
|
|
36
36
|
|
|
37
37
|
self.__config = config
|
|
38
|
-
self.__cached_list =
|
|
38
|
+
self.__cached_list = allowed_values \
|
|
39
39
|
or self.__initialize_list_from_datasource()
|
|
40
40
|
|
|
41
|
-
if self.__config.allowed_values is None:
|
|
42
|
-
self.__cached_list = self.__initialize_list_from_datasource()
|
|
43
|
-
else:
|
|
44
|
-
self.__cached_list = self.__config.allowed_values
|
|
45
|
-
|
|
46
41
|
def __initialize_list_from_datasource(self) -> List[str | int | float]:
|
|
47
42
|
dsi = portaldb().get_one('data_source_instance', self.__config.datasource_instance_id)
|
|
48
43
|
ds = DataSourceUtils.get_data_source_by_data_source_instance(dsi)
|
|
@@ -59,8 +54,11 @@ class AllowedValuesFromDataSourceValidator(Validator):
|
|
|
59
54
|
obj: DataObject
|
|
60
55
|
) -> None:
|
|
61
56
|
field_value = obj.get_field_by_name(self.__config.field_name)
|
|
62
|
-
if
|
|
63
|
-
|
|
57
|
+
if not field_value:
|
|
58
|
+
return
|
|
59
|
+
if not isinstance(field_value, list):
|
|
60
|
+
field_value = [field_value]
|
|
61
|
+
if any(value not in self.__cached_list for value in field_value):
|
|
64
62
|
multiple_cached_values = len(self.__cached_list) > 1
|
|
65
63
|
|
|
66
64
|
cached_list_str = ''
|
|
@@ -7,7 +7,7 @@ from typing import List
|
|
|
7
7
|
|
|
8
8
|
from tol.core import DataObject, Validator
|
|
9
9
|
|
|
10
|
-
from .interfaces import Condition, ConditionEvaluator
|
|
10
|
+
from .interfaces import Condition, ConditionDict, ConditionEvaluator
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class AssertOnConditionValidator(Validator, ConditionEvaluator):
|
|
@@ -19,8 +19,8 @@ class AssertOnConditionValidator(Validator, ConditionEvaluator):
|
|
|
19
19
|
"""
|
|
20
20
|
@dataclass(slots=True, frozen=True, kw_only=True)
|
|
21
21
|
class Config:
|
|
22
|
-
condition:
|
|
23
|
-
assertions: List[
|
|
22
|
+
condition: ConditionDict
|
|
23
|
+
assertions: List[ConditionDict]
|
|
24
24
|
|
|
25
25
|
__slots__ = ['__config']
|
|
26
26
|
__config: Config
|
|
@@ -33,10 +33,10 @@ class AssertOnConditionValidator(Validator, ConditionEvaluator):
|
|
|
33
33
|
def _validate_data_object(self, obj: DataObject) -> None:
|
|
34
34
|
# Check condition atribute
|
|
35
35
|
# (only perform the assertions if the condition passes)
|
|
36
|
-
if self._does_condition_pass(self.__config.condition, obj):
|
|
36
|
+
if self._does_condition_pass(Condition.from_dict(self.__config.condition), obj):
|
|
37
37
|
# Perform each assertion
|
|
38
38
|
for assertion in self.__config.assertions:
|
|
39
|
-
self.__perform_assertion(obj, assertion)
|
|
39
|
+
self.__perform_assertion(obj, Condition.from_dict(assertion))
|
|
40
40
|
|
|
41
41
|
def __perform_assertion(self, obj: DataObject, assertion: Condition) -> None:
|
|
42
42
|
# There's only an error or warning if the assertion condition fails
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2025 Genome Research Ltd.
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
from tol.core import DataSource
|
|
9
|
+
from tol.core.data_object import DataObject
|
|
10
|
+
from tol.core.validate import Validator
|
|
11
|
+
from tol.sources.ena import ena
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class EnaChecklistValidator(Validator):
|
|
15
|
+
"""
|
|
16
|
+
validates the ENA_CHECKLIST for each samples
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
@dataclass(slots=True, frozen=True, kw_only=True)
|
|
20
|
+
class Config:
|
|
21
|
+
ena_checklist_id: str
|
|
22
|
+
|
|
23
|
+
__slots__ = ['__config']
|
|
24
|
+
__config: Config
|
|
25
|
+
|
|
26
|
+
def __init__(self, config: Config, datasource: DataSource = ena()) -> None:
|
|
27
|
+
super().__init__()
|
|
28
|
+
self.__config = config
|
|
29
|
+
self._datasource = datasource
|
|
30
|
+
|
|
31
|
+
def _validate_data_object(self, obj: DataObject) -> None:
|
|
32
|
+
ena_datasource = self._datasource
|
|
33
|
+
ena_checklist = ena_datasource.get_one('checklist', self.__config.ena_checklist_id)
|
|
34
|
+
|
|
35
|
+
validations = ena_checklist.attributes['checklist']
|
|
36
|
+
for key in validations:
|
|
37
|
+
field_name = key
|
|
38
|
+
if 'field' in validations[key]:
|
|
39
|
+
field_name = validations[key]['field']
|
|
40
|
+
if 'mandatory' in validations[key] and key not in obj.attributes:
|
|
41
|
+
self.add_error(object_id=obj.id, detail='Must be given', field=[field_name])
|
|
42
|
+
continue
|
|
43
|
+
if 'mandatory' in validations[key] and obj.attributes[key] is None:
|
|
44
|
+
self.add_error(object_id=obj.id, detail='Must be given', field=[field_name])
|
|
45
|
+
continue
|
|
46
|
+
if 'mandatory' in validations[key] and obj.attributes.get(key) == '':
|
|
47
|
+
self.add_error(
|
|
48
|
+
object_id=obj.id,
|
|
49
|
+
detail='Must not be empty', field=[field_name]
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
if 'restricted text' in validations[key] and key in obj.attributes:
|
|
53
|
+
for condition in validations[key]:
|
|
54
|
+
if type(condition) == str and '(' in condition:
|
|
55
|
+
regex = condition
|
|
56
|
+
compiled_re = re.compile(regex)
|
|
57
|
+
if not compiled_re.search(obj.attributes.get(key)):
|
|
58
|
+
self.add_error(
|
|
59
|
+
object_id=obj.id,
|
|
60
|
+
detail='Must match specific pattern', field=[field_name]
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Check against allowed values
|
|
64
|
+
if 'text choice' in validations[key] and key in obj.attributes:
|
|
65
|
+
for condition in validations[key]:
|
|
66
|
+
if type(condition) == list:
|
|
67
|
+
allowed_values = condition
|
|
68
|
+
if obj.attributes.get(key).lower() not in \
|
|
69
|
+
[x.lower() for x in allowed_values]:
|
|
70
|
+
self.add_error(
|
|
71
|
+
object_id=obj.id,
|
|
72
|
+
detail='Must be in allowed values', field=[field_name]
|
|
73
|
+
)
|
|
@@ -4,11 +4,18 @@
|
|
|
4
4
|
|
|
5
5
|
from abc import ABC
|
|
6
6
|
from dataclasses import dataclass
|
|
7
|
-
from typing import Any, Tuple
|
|
7
|
+
from typing import Any, Dict, Tuple, cast
|
|
8
8
|
|
|
9
9
|
from tol.core import DataObject
|
|
10
10
|
|
|
11
11
|
|
|
12
|
+
ConditionDict = Dict[str, str | Any | bool]
|
|
13
|
+
"""
|
|
14
|
+
The dict representation of a Condition. Conditions can be constructed
|
|
15
|
+
from such a dict through Condition.from_dict(condition_dict)
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
|
|
12
19
|
@dataclass(slots=True)
|
|
13
20
|
class Condition:
|
|
14
21
|
field: str
|
|
@@ -21,6 +28,40 @@ class Condition:
|
|
|
21
28
|
def __repr__(self) -> str:
|
|
22
29
|
return f'{self.field} {self.operator} {self.value}'
|
|
23
30
|
|
|
31
|
+
@staticmethod
|
|
32
|
+
def from_dict(condition_dict: ConditionDict) -> 'Condition':
|
|
33
|
+
"""
|
|
34
|
+
A means of instantiating a Condition from a dictionary.
|
|
35
|
+
This is a separate method (rather than constructing with kwargs
|
|
36
|
+
like `Condition(**condition_dict))`) to allow for both precense
|
|
37
|
+
and type checking for each field.
|
|
38
|
+
"""
|
|
39
|
+
try:
|
|
40
|
+
# Extract fields
|
|
41
|
+
field = condition_dict['field']
|
|
42
|
+
operator = condition_dict['operator']
|
|
43
|
+
value = condition_dict['value']
|
|
44
|
+
is_error = condition_dict.get('is_error', True)
|
|
45
|
+
|
|
46
|
+
# Ensure fields are the correct type
|
|
47
|
+
if not isinstance(field, str) and not isinstance(operator, str):
|
|
48
|
+
raise Exception(
|
|
49
|
+
f'Dictionary {condition_dict} not in valid format '
|
|
50
|
+
f'to convert to Condition (type of condition dict incorrect)'
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
return Condition(
|
|
54
|
+
cast(str, field),
|
|
55
|
+
cast(str, operator),
|
|
56
|
+
value,
|
|
57
|
+
cast(bool, is_error),
|
|
58
|
+
)
|
|
59
|
+
except IndexError as e:
|
|
60
|
+
raise Exception(
|
|
61
|
+
f'Dictionary {condition_dict} not in valid format '
|
|
62
|
+
f'to convert to Condition (key "{e.args[0]}" not found)'
|
|
63
|
+
)
|
|
64
|
+
|
|
24
65
|
|
|
25
66
|
class ConditionEvaluator(ABC):
|
|
26
67
|
"""
|
|
@@ -7,7 +7,7 @@ from typing import Any, List
|
|
|
7
7
|
|
|
8
8
|
from tol.core import DataObject, Validator
|
|
9
9
|
|
|
10
|
-
from .interfaces import Condition, ConditionEvaluator
|
|
10
|
+
from .interfaces import Condition, ConditionDict, ConditionEvaluator
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class MutuallyExclusiveValidator(Validator, ConditionEvaluator):
|
|
@@ -19,12 +19,16 @@ class MutuallyExclusiveValidator(Validator, ConditionEvaluator):
|
|
|
19
19
|
"""
|
|
20
20
|
@dataclass(slots=True, frozen=True, kw_only=True)
|
|
21
21
|
class Config:
|
|
22
|
-
first_field_where:
|
|
23
|
-
second_field_where:
|
|
22
|
+
first_field_where: ConditionDict
|
|
23
|
+
second_field_where: ConditionDict
|
|
24
24
|
target_fields: List[str]
|
|
25
25
|
detail: str | None = None
|
|
26
26
|
|
|
27
27
|
def _get_error_message(self) -> str:
|
|
28
|
+
# Extract conditions
|
|
29
|
+
first_condition = Condition.from_dict(self.first_field_where)
|
|
30
|
+
second_condition = Condition.from_dict(self.second_field_where)
|
|
31
|
+
|
|
28
32
|
# Use a pre-defined, hard-coded detail message if one was not provided
|
|
29
33
|
if self.detail is None:
|
|
30
34
|
multiple_target_fields = len(self.target_fields) > 1
|
|
@@ -47,8 +51,8 @@ class MutuallyExclusiveValidator(Validator, ConditionEvaluator):
|
|
|
47
51
|
|
|
48
52
|
return (
|
|
49
53
|
f'The field{possible_plural} {target_fields_str} cannot have the same '
|
|
50
|
-
f'value{possible_plural} both when {
|
|
51
|
-
f'{
|
|
54
|
+
f'value{possible_plural} both when {first_condition} and when '
|
|
55
|
+
f'{second_condition}'
|
|
52
56
|
)
|
|
53
57
|
else:
|
|
54
58
|
return self.detail
|
|
@@ -67,7 +71,7 @@ class MutuallyExclusiveValidator(Validator, ConditionEvaluator):
|
|
|
67
71
|
|
|
68
72
|
def _validate_data_object(self, obj: DataObject) -> None:
|
|
69
73
|
# Check first field
|
|
70
|
-
if self._does_condition_pass(self.__config.first_field_where, obj):
|
|
74
|
+
if self._does_condition_pass(Condition.from_dict(self.__config.first_field_where), obj):
|
|
71
75
|
# Check whether the values of the target fields were found in the second list
|
|
72
76
|
if [
|
|
73
77
|
obj.get_field_by_name(target_field)
|
|
@@ -86,7 +90,7 @@ class MutuallyExclusiveValidator(Validator, ConditionEvaluator):
|
|
|
86
90
|
]
|
|
87
91
|
)
|
|
88
92
|
# Check second field (same as the first condition, but for the second!)
|
|
89
|
-
elif self._does_condition_pass(self.__config.second_field_where, obj):
|
|
93
|
+
elif self._does_condition_pass(Condition.from_dict(self.__config.second_field_where), obj):
|
|
90
94
|
# Check whether the values of the target fields were found in the first list
|
|
91
95
|
if [
|
|
92
96
|
obj.get_field_by_name(target_field)
|
tol/validators/regex.py
CHANGED
|
@@ -10,6 +10,12 @@ from tol.core import DataObject
|
|
|
10
10
|
from tol.core.validate import Validator
|
|
11
11
|
|
|
12
12
|
|
|
13
|
+
RegexDict = dict[
|
|
14
|
+
str,
|
|
15
|
+
str | bool | list[Any],
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
|
|
13
19
|
@dataclass(frozen=True, kw_only=True)
|
|
14
20
|
class Regex:
|
|
15
21
|
key: str
|
|
@@ -31,7 +37,7 @@ class RegexValidator(Validator):
|
|
|
31
37
|
"""
|
|
32
38
|
@dataclass(slots=True, frozen=True, kw_only=True)
|
|
33
39
|
class Config:
|
|
34
|
-
regexes: List[Regex]
|
|
40
|
+
regexes: List[Regex | RegexDict]
|
|
35
41
|
|
|
36
42
|
__slots__ = ['__config']
|
|
37
43
|
__config: Config
|
|
@@ -43,7 +49,7 @@ class RegexValidator(Validator):
|
|
|
43
49
|
|
|
44
50
|
super().__init__()
|
|
45
51
|
|
|
46
|
-
self.__config = config
|
|
52
|
+
self.__config = self.__get_config(config)
|
|
47
53
|
|
|
48
54
|
def _validate_data_object(
|
|
49
55
|
self,
|
|
@@ -93,3 +99,18 @@ class RegexValidator(Validator):
|
|
|
93
99
|
detail=c.detail,
|
|
94
100
|
field=c.key,
|
|
95
101
|
)
|
|
102
|
+
|
|
103
|
+
def __get_config(
|
|
104
|
+
self,
|
|
105
|
+
config: Config,
|
|
106
|
+
) -> Config:
|
|
107
|
+
|
|
108
|
+
# Ensure config is in Regex format
|
|
109
|
+
# (as you can either pass in a list of Regex or a RegexDict,
|
|
110
|
+
# which can be used to initialize a Regex)
|
|
111
|
+
return self.Config(
|
|
112
|
+
regexes=[
|
|
113
|
+
c if isinstance(c, Regex) else Regex(**c)
|
|
114
|
+
for c in config.regexes
|
|
115
|
+
]
|
|
116
|
+
)
|
tol/validators/regex_by_value.py
CHANGED
|
@@ -8,7 +8,7 @@ from typing import Dict, List
|
|
|
8
8
|
from tol.core import DataObject
|
|
9
9
|
from tol.core.validate import Validator
|
|
10
10
|
|
|
11
|
-
from .regex import Regex
|
|
11
|
+
from .regex import Regex, RegexDict
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class RegexByValueValidator(Validator):
|
|
@@ -20,7 +20,7 @@ class RegexByValueValidator(Validator):
|
|
|
20
20
|
@dataclass(slots=True, frozen=True, kw_only=True)
|
|
21
21
|
class Config:
|
|
22
22
|
key_column: str
|
|
23
|
-
regexes: Dict[str, List[Regex]]
|
|
23
|
+
regexes: Dict[str, List[Regex | RegexDict]]
|
|
24
24
|
|
|
25
25
|
__slots__ = ['__config']
|
|
26
26
|
config: Config
|
|
@@ -32,7 +32,7 @@ class RegexByValueValidator(Validator):
|
|
|
32
32
|
|
|
33
33
|
super().__init__()
|
|
34
34
|
|
|
35
|
-
self.__config = config
|
|
35
|
+
self.__config = self.__get_config(config)
|
|
36
36
|
|
|
37
37
|
def _validate_data_object(
|
|
38
38
|
self,
|
|
@@ -78,3 +78,22 @@ class RegexByValueValidator(Validator):
|
|
|
78
78
|
detail=c.detail,
|
|
79
79
|
field=c.key,
|
|
80
80
|
)
|
|
81
|
+
|
|
82
|
+
def __get_config(
|
|
83
|
+
self,
|
|
84
|
+
config: Config,
|
|
85
|
+
) -> Config:
|
|
86
|
+
|
|
87
|
+
# Ensure config is in Regex format
|
|
88
|
+
# (as you can either pass in a list of Regex or a RegexDict,
|
|
89
|
+
# which can be used to initialize a Regex)
|
|
90
|
+
return self.Config(
|
|
91
|
+
key_column=config.key_column,
|
|
92
|
+
regexes={
|
|
93
|
+
k: [
|
|
94
|
+
c if isinstance(c, Regex) else Regex(**c)
|
|
95
|
+
for c in v
|
|
96
|
+
]
|
|
97
|
+
for k, v in config.regexes.items()
|
|
98
|
+
}
|
|
99
|
+
)
|
tol/validators/tolid.py
CHANGED
|
@@ -90,7 +90,7 @@ class TolidValidator(Validator):
|
|
|
90
90
|
f.and_ = {'specimen_id': {'eq': {'value': specimen_id}}}
|
|
91
91
|
self.__cached_tolids[specimen_id] = list(self.__datasource.get_list(
|
|
92
92
|
object_type='specimen',
|
|
93
|
-
|
|
93
|
+
object_filters=f
|
|
94
94
|
))
|
|
95
95
|
|
|
96
96
|
if (len(self.__cached_tolids[specimen_id]) == 0):
|
|
@@ -133,7 +133,7 @@ tol/eln/entities.py,sha256=3-Z4ibZR11zepz5Nw74PnkpxA_HA31FsRUx61FSPYUo,1209
|
|
|
133
133
|
tol/eln/generators.py,sha256=ghRUiohLakiAcMzGVPJrPxYmdWijs63YxfRTqmKu3Zc,1227
|
|
134
134
|
tol/eln/sanitise.py,sha256=fMj-VrQTnw4zn2X0wnjWQAI8gWAa8RYqNuv23LXQssI,406
|
|
135
135
|
tol/ena/__init__.py,sha256=T3TCqaHpgi2Uk2PjPGu60GaG2V8cTrHJlVLtZfLFhTQ,174
|
|
136
|
-
tol/ena/client.py,sha256=
|
|
136
|
+
tol/ena/client.py,sha256=ldmm7Z9_auQf1zVWjsFLXYgbKvGtSHTsr88YO3zfv2Y,6731
|
|
137
137
|
tol/ena/converter.py,sha256=nxbo4IFzzOvKNMq3Aeiw5iDqVWvY33nTngLppjHAoGY,1144
|
|
138
138
|
tol/ena/ena_datasource.py,sha256=jEvyUaH4pfFxmdtn6O_PwOdjPz6u80uAT3SLlR2f5nM,8968
|
|
139
139
|
tol/ena/ena_methods.py,sha256=jgpLssZq-F-vgkO-fYu4jrXenmNkdFpFKAY3VKp5HHE,9209
|
|
@@ -148,7 +148,7 @@ tol/flows/__init__.py,sha256=M7iSvnBJs6fJ8M38cW0bYQa9WW0TN8FHAMjIHPDNAJ4,166
|
|
|
148
148
|
tol/flows/logger.py,sha256=rWXbaknGcPEZRFvC1CiB1qkhFRZsQk435w7VyJ3cpyw,170
|
|
149
149
|
tol/flows/secrets.py,sha256=1mlbsxaahzYRfVAx3XdztHOmUCtDMSJDzHysdbaCtj0,352
|
|
150
150
|
tol/flows/sequencing_submissions.py,sha256=ukz_y5be-BCBN2y3JPQ2EK6b3jwOCh-187j-jnw3EUY,11027
|
|
151
|
-
tol/flows/converters/__init__.py,sha256=
|
|
151
|
+
tol/flows/converters/__init__.py,sha256=Ee1yMFWwsRoG71ZFCsqtNwF45AR8farmV9h3ERrK994,6154
|
|
152
152
|
tol/flows/converters/benchling_entity_to_benchling_worklist_item_converter_factory.py,sha256=PN27fcvN4JLBnLrtPPAot1cWjAwPQHVcIDoMfPDeKzU,1210
|
|
153
153
|
tol/flows/converters/benchling_extraction_to_elastic_extraction_converter.py,sha256=S8pbmIeKlcXrLPRJHYBUGP0-Q7jTOV2QQk2TeA2naWo,1966
|
|
154
154
|
tol/flows/converters/benchling_extraction_to_elastic_sequencing_request_converter.py,sha256=2RiyRvGRSWzpUwEI4p-s0afshJpFUUxPqv2z-nyDSVg,1992
|
|
@@ -180,6 +180,8 @@ tol/flows/converters/gap_assembly_to_elastic_assembly_converter.py,sha256=XK-es-
|
|
|
180
180
|
tol/flows/converters/genome_notes_genome_note_to_elastic_genome_note_converter.py,sha256=AaUWbVTaWU-NXnUQPaPwI41TE7a-nC4zlg-jrWpPT2s,1166
|
|
181
181
|
tol/flows/converters/goat_taxon_to_elastic_species_converter.py,sha256=1NGs9427OdXGsBaMB467nOF7aTlJsUKYCuoSoABw9L4,1074
|
|
182
182
|
tol/flows/converters/grit_issue_to_elastic_curation_converter.py,sha256=XpRpoRn589MxTqEk6zPWGn6tamJiqY9Ctxk8v0q-dvA,3953
|
|
183
|
+
tol/flows/converters/incoming_sample_to_ena_sample_converter.py,sha256=HmGsg-VCE4W9Dl3lAlcNhWfkVYp1d22DZlFoTaFzeqA,5560
|
|
184
|
+
tol/flows/converters/incoming_sample_to_incoming_sample_with_lists_converter.py,sha256=5Fp1_ojsYqvRcKTgXJbyWqetPisi_vtWFcWr6RtGZoA,1504
|
|
183
185
|
tol/flows/converters/informatics_tolid_to_elastic_tolid_converter.py,sha256=VrvtsDTPlc5Xa3K4rcAMHwV4n71zOH7q5EfALLLQ1tI,587
|
|
184
186
|
tol/flows/converters/labwhere_location_to_elastic_sample_update_converter.py,sha256=NJNmG9sCc2WXc-2J5XfCKXhb2sDH82nZUBekd16PHcw,656
|
|
185
187
|
tol/flows/converters/labwhere_location_to_sts_tray_converter.py,sha256=dSBP5HfdvyIGvNQD6bhi0RKvkwhN8jq69g-0hOXUqEg,672
|
|
@@ -318,26 +320,27 @@ tol/treeval/treeval_datasource.py,sha256=GzY6JwH67b5QdV-UVdCFJfgGAIuZ96J2nl53YxZ
|
|
|
318
320
|
tol/utils/__init__.py,sha256=764-Na1OaNGUDWpMIu51ZtXG7n_nB5MccUFK6LmkWRI,138
|
|
319
321
|
tol/utils/csv.py,sha256=mihww25fSn72c4h-RFeqD_pFIG6KHZP4v1_C0rx81ws,421
|
|
320
322
|
tol/utils/s3.py,sha256=aoYCwJ-qcMqFrpxmViFqPa0O1jgp0phtztO3-0CSNjw,491
|
|
321
|
-
tol/validators/__init__.py,sha256=
|
|
323
|
+
tol/validators/__init__.py,sha256=CcamQvWpywu5aXWc1D9CoZAZcbioRBlNUuTX-XOKqQE,1060
|
|
322
324
|
tol/validators/allowed_keys.py,sha256=fX4KqpxOJNmmVNqQq215MtX27v4dbKSjCM1laXuXfZE,1488
|
|
323
325
|
tol/validators/allowed_values.py,sha256=Sxsb6EhiR8Ey90sfPoyGqkehkJN0hHSNXlLYSL_k5f4,1518
|
|
324
|
-
tol/validators/allowed_values_from_datasource.py,sha256=
|
|
325
|
-
tol/validators/assert_on_condition.py,sha256=
|
|
326
|
+
tol/validators/allowed_values_from_datasource.py,sha256=EqHfaTk8VAMQAra7SfkpZYjFUJm9bee_i73vFwwGG_Y,3169
|
|
327
|
+
tol/validators/assert_on_condition.py,sha256=FCzmN2Crhku0bdEpXuKEbDaoU6RbAZCUDUxQ8OHK92M,2017
|
|
328
|
+
tol/validators/ena_checklist.py,sha256=nxoiJWHKDk2HzeJMKgME_KGNYUBSaCx5jbp4yXQMf00,2875
|
|
326
329
|
tol/validators/ena_submittable.py,sha256=ruryfVcQtRJ5JsAFBcVZ_3hzTDwRWGXpdPUAfdrYmWg,1948
|
|
327
330
|
tol/validators/min_one_valid_value.py,sha256=3Fv-b-fojvPLpiWwUqrFfO0YMGtV40KEp15zTmdOlyI,1425
|
|
328
|
-
tol/validators/mutually_exclusive.py,sha256=
|
|
329
|
-
tol/validators/regex.py,sha256=
|
|
330
|
-
tol/validators/regex_by_value.py,sha256=
|
|
331
|
+
tol/validators/mutually_exclusive.py,sha256=76AqX4G_syKAbFyDgydbcMHvNAroENazrcWbR5vAN30,4482
|
|
332
|
+
tol/validators/regex.py,sha256=L8ap5oxn_54DoE7Tnqo1VjQ-dr5-fJAe7cCHYg8cBzw,2523
|
|
333
|
+
tol/validators/regex_by_value.py,sha256=bSPloXfeOUMuiSQH-_GkvQ6IiD3Q1fojT0iydlkLanI,2564
|
|
331
334
|
tol/validators/specimens_have_same_taxon.py,sha256=BrkYXhr1KVnJcDe-PrKXf0AF9740BbFV2XVMrtu98CQ,2167
|
|
332
335
|
tol/validators/sts_fields.py,sha256=piHqdzuGepIEv-ZSSwdkYE6ybQpqc4hBN8hN8UrN4Vg,3442
|
|
333
|
-
tol/validators/tolid.py,sha256=
|
|
336
|
+
tol/validators/tolid.py,sha256=gHiaNQGWHos4I6Rz7aKCOATbv0ff8qL4LdObnZ_Lgt8,3876
|
|
334
337
|
tol/validators/unique_values.py,sha256=CZvIdg7GC2VmVAGuPToxJ_r-2YCwy2xkQKp2qwVwbzU,3096
|
|
335
338
|
tol/validators/unique_whole_organisms.py,sha256=Z-PUjlCcn-WHDKMYDyt_WhalXzQFfqLWItM6qywVt_Y,5725
|
|
336
|
-
tol/validators/interfaces/__init__.py,sha256=
|
|
337
|
-
tol/validators/interfaces/condition_evaluator.py,sha256=
|
|
338
|
-
tol_sdk-1.7.
|
|
339
|
-
tol_sdk-1.7.
|
|
340
|
-
tol_sdk-1.7.
|
|
341
|
-
tol_sdk-1.7.
|
|
342
|
-
tol_sdk-1.7.
|
|
343
|
-
tol_sdk-1.7.
|
|
339
|
+
tol/validators/interfaces/__init__.py,sha256=jtOxnwnwqV_29xjmmMcS_kvlt-pQiWwQYJn2YRP07_w,172
|
|
340
|
+
tol/validators/interfaces/condition_evaluator.py,sha256=nj8Cb8hi47OBy6OVNfeLhF-Pjwtr8MiOSymYL6hfVes,3766
|
|
341
|
+
tol_sdk-1.7.5b2.dist-info/licenses/LICENSE,sha256=RF9Jacy-9BpUAQQ20INhTgtaNBkmdTolYCHtrrkM2-8,1077
|
|
342
|
+
tol_sdk-1.7.5b2.dist-info/METADATA,sha256=FxtTjGw1IxkHprv89dW071kyQ68kiR9sxu-QdfLaF1g,3081
|
|
343
|
+
tol_sdk-1.7.5b2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
344
|
+
tol_sdk-1.7.5b2.dist-info/entry_points.txt,sha256=jH3HfTwxjzog7E3lq8CKpUWGIRY9FSXbyL6CpUmv6D0,36
|
|
345
|
+
tol_sdk-1.7.5b2.dist-info/top_level.txt,sha256=PwKMQLphyZNvagBoriVbl8uwHXQl8IC1niawVG0iXMM,10
|
|
346
|
+
tol_sdk-1.7.5b2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|