tol-sdk 1.7.4__py3-none-any.whl → 1.7.5b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. tol/api_base/__init__.py +1 -0
  2. tol/api_base/blueprint.py +19 -8
  3. tol/{s3/data_upload/blueprint.py → api_base/data_upload.py} +21 -6
  4. tol/api_base/pipeline_steps.py +4 -4
  5. tol/api_client/api_datasource.py +8 -8
  6. tol/api_client/converter.py +38 -52
  7. tol/api_client/factory.py +21 -19
  8. tol/api_client/parser.py +138 -98
  9. tol/api_client/view.py +118 -43
  10. tol/core/__init__.py +2 -1
  11. tol/core/data_object.py +27 -9
  12. tol/core/data_object_converter.py +37 -2
  13. tol/core/factory.py +51 -62
  14. tol/core/validate.py +1 -0
  15. tol/ena/client.py +61 -10
  16. tol/ena/ena_datasource.py +16 -10
  17. tol/ena/ena_methods.py +33 -32
  18. tol/ena/parser.py +15 -2
  19. tol/flows/converters/__init__.py +2 -0
  20. tol/flows/converters/incoming_sample_to_ena_sample_converter.py +130 -0
  21. tol/flows/converters/incoming_sample_to_incoming_sample_with_lists_converter.py +46 -0
  22. tol/s3/__init__.py +0 -1
  23. tol/sql/model.py +1 -1
  24. tol/sql/pipeline_step/factory.py +1 -1
  25. tol/sql/sql_converter.py +7 -1
  26. tol/validators/__init__.py +12 -1
  27. tol/validators/allowed_keys.py +17 -12
  28. tol/validators/allowed_values.py +21 -63
  29. tol/validators/allowed_values_from_datasource.py +89 -0
  30. tol/validators/assert_on_condition.py +56 -0
  31. tol/validators/ena_checklist.py +73 -0
  32. tol/validators/ena_submittable.py +61 -0
  33. tol/validators/interfaces/__init__.py +5 -0
  34. tol/validators/interfaces/condition_evaluator.py +102 -0
  35. tol/validators/min_one_valid_value.py +55 -0
  36. tol/validators/mutually_exclusive.py +111 -0
  37. tol/validators/regex.py +30 -23
  38. tol/validators/regex_by_value.py +33 -33
  39. tol/validators/specimens_have_same_taxon.py +60 -0
  40. tol/validators/sts_fields.py +88 -0
  41. tol/validators/tolid.py +110 -0
  42. tol/validators/unique_values.py +25 -17
  43. tol/validators/unique_whole_organisms.py +109 -0
  44. {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/METADATA +1 -1
  45. {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/RECORD +49 -36
  46. tol/s3/data_upload/__init__.py +0 -3
  47. {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/WHEEL +0 -0
  48. {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/entry_points.txt +0 -0
  49. {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/licenses/LICENSE +0 -0
  50. {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,130 @@
1
+ # SPDX-FileCopyrightText: 2025 Genome Research Ltd.
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ import re
5
+ from dataclasses import dataclass
6
+ from typing import Iterable
7
+
8
+ from tol.core import DataObject, DataObjectToDataObjectOrUpdateConverter
9
+
10
+
11
+ class IncomingSampleToEnaSampleConverter(DataObjectToDataObjectOrUpdateConverter):
12
+
13
+ @dataclass(slots=True, frozen=True, kw_only=True)
14
+ class Config:
15
+ ena_checklist_id: str
16
+ project_name: str
17
+
18
+ __slots__ = ['__config']
19
+ __config: Config
20
+
21
+ def __init__(self, data_object_factory, config: Config) -> None:
22
+ super().__init__(data_object_factory)
23
+ self.__config = config
24
+ self._data_object_factory = data_object_factory
25
+
26
+ def convert(self, data_object: DataObject) -> Iterable[DataObject]:
27
+ """
28
+ converting the samples DataObject into ENA format
29
+ """
30
+ s = data_object
31
+ attributes = {
32
+ 'ENA-CHECKLIST': self.__config.ena_checklist_id,
33
+ 'organism part': self.__replace_underscores(
34
+ s.attributes.get('ORGANISM_PART')),
35
+ 'lifestage': (
36
+ 'spore-bearing structure'
37
+ if s.attributes.get('LIFESTAGE') == 'SPORE_BEARING_STRUCTURE'
38
+ else self.__replace_underscores(
39
+ s.attributes.get('LIFESTAGE'))
40
+ ),
41
+ 'project name':
42
+ self.__config.project_name,
43
+ 'collected by':
44
+ self.__replace_underscores(
45
+ s.attributes.get('COLLECTED_BY')),
46
+ 'collection date':
47
+ self.__replace_underscores(
48
+ s.attributes.get('DATE_OF_COLLECTION')).lower(),
49
+ 'geographic location (country and/or sea)':
50
+ self.__collection_country(s).replace('_', ' '),
51
+ 'geographic location (latitude)':
52
+ self.__replace_underscores(
53
+ s.attributes.get('DECIMAL_LATITUDE')).lower(),
54
+ 'geographic location (latitude) units':
55
+ 'DD',
56
+ 'geographic location (longitude)':
57
+ self.__replace_underscores(
58
+ s.attributes.get('DECIMAL_LONGITUDE')).lower(),
59
+ 'geographic location (longitude) units':
60
+ 'DD',
61
+ 'geographic location (region and locality)':
62
+ self.__collection_region(s).replace('_', ' '),
63
+ 'identified_by':
64
+ self.__replace_underscores(
65
+ s.attributes.get('IDENTIFIED_BY')),
66
+ 'habitat':
67
+ self.__replace_underscores(
68
+ s.attributes.get('HABITAT')),
69
+ 'identifier_affiliation':
70
+ self.__replace_underscores(
71
+ s.attributes.get('IDENTIFIER_AFFILIATION')),
72
+ 'sex':
73
+ self.__replace_underscores(
74
+ s.attributes.get('SEX')),
75
+ 'relationship':
76
+ self.__replace_underscores(
77
+ s.attributes.get('RELATIONSHIP')),
78
+ 'SYMBIONT':
79
+ 'Y' if s.attributes.get('SYMBIONT') == 'SYMBIONT' else 'N',
80
+ 'collecting institution':
81
+ self.__replace_underscores(
82
+ s.attributes.get('COLLECTOR_AFFILIATION'))
83
+ }
84
+ if self.__sanitise(s.attributes.get('DEPTH')) != '':
85
+ attributes['geographic location (depth)'] = s.attributes.get('DEPTH')
86
+ attributes['geographic location (depth) units'] = 'm'
87
+ if self.__sanitise(s.attributes.get('ELEVATION')) != '':
88
+ attributes['geographic location (elevation)'] = s.attributes.get('ELEVATION')
89
+ attributes['geographic location (elevation) units'] = 'm'
90
+ if self.__sanitise(s.attributes.get('ORIGINAL_COLLECTION_DATE')) != '':
91
+ attributes['original collection date'] = s.attributes.get('ORIGINAL_COLLECTION_DATE')
92
+ if self.__sanitise(s.attributes.get('ORIGINAL_GEOGRAPHIC_LOCATION')) != '':
93
+ attributes['original geographic location'] = self.__replace_underscores(s.attributes.get('ORIGINAL_GEOGRAPHIC_LOCATION')) # noqa
94
+ if s.attributes.get('GAL') is not None:
95
+ attributes['GAL'] = s.attributes.get('GAL')
96
+ if s.attributes.get('VOUCHER_ID') is not None:
97
+ attributes['specimen_voucher'] = s.attributes.get('VOUCHER_ID')
98
+ if s.attributes.get('SPECIMEN_ID') is not None:
99
+ attributes['specimen_id'] = s.attributes.get('SPECIMEN_ID')
100
+ if s.attributes.get('GAL_SAMPLE_ID') is not None:
101
+ attributes['GAL_sample_id'] = s.attributes.get('GAL_SAMPLE_ID')
102
+ if s.attributes.get('CULTURE_OR_STRAIN_ID') is not None:
103
+ attributes['culture_or_strain_id'] = s.attributes.get('CULTURE_OR_STRAIN_ID')
104
+
105
+ ret = self._data_object_factory(
106
+ 'sample',
107
+ s.id,
108
+ attributes=attributes,
109
+ )
110
+ yield ret
111
+
112
+ def __collection_country(self, data_object: DataObject):
113
+ return re.split(
114
+ r'\s*\|\s*',
115
+ data_object.attributes.get('COLLECTION_LOCATION'))[0]
116
+
117
+ def __collection_region(self, data_object: DataObject):
118
+ return ' | '.join(re.split(
119
+ r'\s*\|\s*',
120
+ data_object.attributes.get('COLLECTION_LOCATION'))[1:])
121
+
122
+ def __replace_underscores(self, value):
123
+ if type(value) != str:
124
+ return value
125
+ return self.__sanitise(value, '').replace('_', ' ')
126
+
127
+ def __sanitise(self, value, default_value=''):
128
+ if value is None:
129
+ return default_value
130
+ return value
@@ -0,0 +1,46 @@
1
+ # SPDX-FileCopyrightText: 2025 Genome Research Ltd.
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ from dataclasses import dataclass
5
+ from typing import Iterable
6
+
7
+ from tol.core import DataObject, DataObjectToDataObjectOrUpdateConverter
8
+
9
+
10
+ class IncomingSampleToIncomingSampleWithListsConverter(DataObjectToDataObjectOrUpdateConverter):
11
+
12
+ @dataclass(slots=True, frozen=True, kw_only=True)
13
+ class Config:
14
+ fields_to_convert: str
15
+ separator: str = '|'
16
+
17
+ __slots__ = ['__config']
18
+ __config: Config
19
+
20
+ def __init__(self, data_object_factory, config: Config) -> None:
21
+ super().__init__(data_object_factory)
22
+ self.__config = config
23
+ self._data_object_factory = data_object_factory
24
+
25
+ def convert(self, data_object: DataObject) -> Iterable[DataObject]:
26
+ """
27
+ converting the samples DataObject into ENA format
28
+ """
29
+
30
+ ret = self._data_object_factory(
31
+ data_object.type,
32
+ data_object.id,
33
+ attributes={
34
+ k: v for k, v in data_object.attributes.items()
35
+ if k not in self.__config.fields_to_convert
36
+ } | {
37
+ field: self.__convert_to_list(data_object.get_field_by_name(field))
38
+ for field in self.__config.fields_to_convert
39
+ }
40
+ )
41
+ yield ret
42
+
43
+ def __convert_to_list(self, value: str | None) -> list[str]:
44
+ if not value:
45
+ return []
46
+ return [item.strip() for item in value.split(self.__config.separator)]
tol/s3/__init__.py CHANGED
@@ -6,4 +6,3 @@ from .parser import Parser # noqa F401
6
6
  from .converter import S3Converter # noqa F401
7
7
  from .factory import create_s3_datasource # noqa F401
8
8
  from .s3_datasource import S3DataSource # noqa F401
9
- from .data_upload.blueprint import data_upload_blueprint # noqa F401
tol/sql/model.py CHANGED
@@ -255,7 +255,7 @@ def model_base() -> Type[DefaultModel]:
255
255
  @property
256
256
  def instance_to_many_relations(self) -> dict[str, Iterable[Model]]:
257
257
  config = self.get_to_many_relationship_config()
258
- return self.__get_attributes_map(config.keys())
258
+ return self.__get_attributes_map(config)
259
259
 
260
260
  @property
261
261
  def instance_id(self) -> Optional[str]:
@@ -140,7 +140,7 @@ def create_pipeline_step_models(
140
140
  autoincrement=True
141
141
  )
142
142
 
143
- s3_url: Mapped[str] = mapped_column(nullable=False,)
143
+ s3_bucket: Mapped[str] = mapped_column(nullable=False,)
144
144
  s3_filename: Mapped[str] = mapped_column(nullable=False)
145
145
  spreadsheet_config: Mapped[str] = mapped_column(nullable=True)
146
146
 
tol/sql/sql_converter.py CHANGED
@@ -83,7 +83,13 @@ class DefaultModelConverter(ModelConverter):
83
83
  return to_ones if to_ones else None
84
84
 
85
85
  def __convert_to_many_requested(self, model, tree):
86
- return None
86
+ to_manys = {}
87
+ for rel_name in model.get_to_many_relationship_config():
88
+ if sub_tree := tree.get_sub_tree(rel_name):
89
+ to_manys[rel_name] = [
90
+ self.__convert_requested(x, sub_tree) for x in getattr(model, rel_name)
91
+ ]
92
+ return to_manys if to_manys else None
87
93
 
88
94
 
89
95
  class DataObjectConverter(Converter[DataObject, Model], ABC):
@@ -2,8 +2,19 @@
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
4
 
5
- from .allowed_values import AllowedValues, AllowedValuesValidator # noqa
6
5
  from .allowed_keys import AllowedKeysValidator # noqa
6
+ from .allowed_values import AllowedValuesValidator # noqa
7
+ from .allowed_values_from_datasource import AllowedValuesFromDataSourceValidator # noqa
8
+ from .assert_on_condition import AssertOnConditionValidator # noqa
9
+ from .ena_checklist import EnaChecklistValidator # noqa
10
+ from .mutually_exclusive import MutuallyExclusiveValidator # noqa
11
+ from .ena_submittable import EnaSubmittableValidator # noqa
7
12
  from .regex import Regex, RegexValidator # noqa
8
13
  from .regex_by_value import RegexByValueValidator # noqa
14
+ from .specimens_have_same_taxon import SpecimensHaveSameTaxonValidator # noqa
15
+ from .sts_fields import StsFieldsValidator # noqa
16
+ from .tolid import TolidValidator # noqa
9
17
  from .unique_values import UniqueValuesValidator # noqa
18
+ from .unique_whole_organisms import UniqueWholeOrganismsValidator # noqa
19
+ from .interfaces import Condition # noqa
20
+ from .min_one_valid_value import MinOneValidValueValidator # noqa
@@ -2,6 +2,9 @@
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
4
 
5
+ from dataclasses import dataclass
6
+ from typing import List
7
+
5
8
  from tol.core import DataObject
6
9
  from tol.core.validate import Validator
7
10
 
@@ -12,20 +15,22 @@ class AllowedKeysValidator(Validator):
12
15
  ensuring that they only have attributes of the given
13
16
  allowed keys.
14
17
  """
18
+ @dataclass(slots=True, frozen=True, kw_only=True)
19
+ class Config:
20
+ allowed_keys: List[str]
21
+ is_error: bool = True
22
+ detail: str = 'Key is not allowed'
23
+
24
+ __slots__ = ['__config']
25
+ __config: Config
15
26
 
16
27
  def __init__(
17
28
  self,
18
- allowed_keys: list[str],
19
- *,
20
- is_error: bool = True,
21
- detail: str = 'Key is not allowed'
29
+ config: Config
22
30
  ) -> None:
23
31
 
24
32
  super().__init__()
25
-
26
- self.__keys = allowed_keys
27
- self.__is_error = is_error
28
- self.__detail = detail
33
+ self.__config = config
29
34
 
30
35
  def _validate_data_object(
31
36
  self,
@@ -33,7 +38,7 @@ class AllowedKeysValidator(Validator):
33
38
  ) -> None:
34
39
 
35
40
  for key in obj.attributes:
36
- if key not in self.__keys:
41
+ if key not in self.__config.allowed_keys:
37
42
  self.__add_result(
38
43
  obj,
39
44
  key,
@@ -45,15 +50,15 @@ class AllowedKeysValidator(Validator):
45
50
  key: str,
46
51
  ) -> None:
47
52
 
48
- if self.__is_error:
53
+ if self.__config.is_error:
49
54
  self.add_error(
50
55
  object_id=obj.id,
51
- detail=self.__detail,
56
+ detail=self.__config.detail,
52
57
  field=key,
53
58
  )
54
59
  else:
55
60
  self.add_warning(
56
61
  object_id=obj.id,
57
- detail=self.__detail,
62
+ detail=self.__config.detail,
58
63
  field=key,
59
64
  )
@@ -3,103 +3,61 @@
3
3
  # SPDX-License-Identifier: MIT
4
4
 
5
5
  from dataclasses import dataclass
6
- from typing import Any
6
+ from typing import Any, List
7
7
 
8
8
  from tol.core import DataObject
9
9
  from tol.core.validate import Validator
10
10
 
11
11
 
12
- @dataclass(frozen=True, kw_only=True)
13
- class AllowedValues:
14
- key: str
15
- values: list[Any]
16
-
17
- is_error: bool = True
18
- detail: str = 'Value is not allowed for given key'
19
-
20
- def is_allowed(self, __v: Any) -> bool:
21
- return __v in self.values
22
-
23
-
24
- AllowedValuesDict = dict[
25
- str,
26
- str | bool | list[Any],
27
- ]
28
- """Can also specify `AllowedValues` as a `dict`"""
29
-
30
-
31
12
  class AllowedValuesValidator(Validator):
32
13
  """
33
14
  Validates an incoming stream of `DataObject` instances
34
15
  according to the specified allowed values for a given
35
16
  key.
36
17
  """
18
+ @dataclass(slots=True, frozen=True, kw_only=True)
19
+ class Config:
20
+ field: str
21
+ allowed_values: List[Any]
22
+ is_error: bool = True
23
+ detail: str = 'Value is not allowed for the given key'
24
+
25
+ __slots__ = ['__config']
26
+ __config: Config
37
27
 
38
28
  def __init__(
39
29
  self,
40
- config: list[AllowedValues | AllowedValuesDict]
30
+ config: Config
41
31
  ) -> None:
42
32
 
43
33
  super().__init__()
44
34
 
45
- self.__config = self.__get_config(config)
35
+ self.__config = config
46
36
 
47
37
  def _validate_data_object(
48
38
  self,
49
39
  obj: DataObject
50
40
  ) -> None:
51
41
 
52
- for k, v in obj.attributes.items():
53
- self.__validate_attribute(obj, k, v)
54
-
55
- def __get_config(
56
- self,
57
- config: list[AllowedValues | AllowedValuesDict],
58
- ) -> list[AllowedValues]:
59
-
60
- return [
61
- c if isinstance(c, AllowedValues) else AllowedValues(**c)
62
- for c in config
63
- ]
64
-
65
- def __validate_attribute(
66
- self,
67
- obj: DataObject,
68
- key: str,
69
- value: Any,
70
- ) -> None:
71
-
72
- config = self.__filter_config(key)
73
-
74
- for c in config:
75
- if not c.is_allowed(value):
76
- self.__add_result(obj, c)
77
-
78
- def __filter_config(
79
- self,
80
- key: str,
81
- ) -> list[AllowedValues]:
82
-
83
- return [
84
- a for a in self.__config
85
- if a.key == key
86
- ]
42
+ for key, value in obj.attributes.items():
43
+ if key == self.__config.field and value not in self.__config.allowed_values:
44
+ self.__add_result(obj, key)
87
45
 
88
46
  def __add_result(
89
47
  self,
90
48
  obj: DataObject,
91
- c: AllowedValues,
49
+ key: str,
92
50
  ) -> None:
93
51
 
94
- if c.is_error:
52
+ if self.__config.is_error:
95
53
  self.add_error(
96
54
  object_id=obj.id,
97
- detail=c.detail,
98
- field=c.key
55
+ detail=self.__config.detail,
56
+ field=key
99
57
  )
100
58
  else:
101
59
  self.add_warning(
102
60
  object_id=obj.id,
103
- detail=c.detail,
104
- field=c.key,
61
+ detail=self.__config.detail,
62
+ field=key,
105
63
  )
@@ -0,0 +1,89 @@
1
+ # SPDX-FileCopyrightText: 2025 Genome Research Ltd.
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ from dataclasses import dataclass
6
+ from typing import List
7
+
8
+ from tol.core import DataObject, DataSourceUtils
9
+ from tol.core.validate import Validator
10
+ from tol.sources.portaldb import portaldb
11
+
12
+
13
+ class AllowedValuesFromDataSourceValidator(Validator):
14
+ """
15
+ Validates that a stream of `DataObject` instances
16
+ contains field that is part of a list.
17
+ """
18
+ @dataclass(slots=True, frozen=True, kw_only=True)
19
+ class Config:
20
+ datasource_instance_id: int
21
+ datasource_object_type: str
22
+ datasource_field_name: str
23
+ field_name: str
24
+
25
+ __slots__ = ['__config', '__cached_list']
26
+ __config: Config
27
+ __cached_list: List[str | int | float]
28
+
29
+ def __init__(
30
+ self,
31
+ config: Config,
32
+ allowed_values: List[str | int | float] | None = None # For testing
33
+ ) -> None:
34
+
35
+ super().__init__()
36
+
37
+ self.__config = config
38
+ self.__cached_list = allowed_values \
39
+ or self.__initialize_list_from_datasource()
40
+
41
+ def __initialize_list_from_datasource(self) -> List[str | int | float]:
42
+ dsi = portaldb().get_one('data_source_instance', self.__config.datasource_instance_id)
43
+ ds = DataSourceUtils.get_data_source_by_data_source_instance(dsi)
44
+ return [
45
+ obj.get_field_by_name(
46
+ self.__config.datasource_field_name
47
+ ) for obj in ds.get_list(
48
+ object_type=self.__config.datasource_object_type
49
+ )
50
+ ]
51
+
52
+ def _validate_data_object(
53
+ self,
54
+ obj: DataObject
55
+ ) -> None:
56
+ field_value = obj.get_field_by_name(self.__config.field_name)
57
+ if not field_value:
58
+ return
59
+ if not isinstance(field_value, list):
60
+ field_value = [field_value]
61
+ if any(value not in self.__cached_list for value in field_value):
62
+ multiple_cached_values = len(self.__cached_list) > 1
63
+
64
+ cached_list_str = ''
65
+ if multiple_cached_values:
66
+ for index, field in enumerate(self.__cached_list):
67
+ if index == 0:
68
+ # First item in the list
69
+ cached_list_str += f'{field}'
70
+ elif index == len(self.__cached_list) - 1:
71
+ # Last item in the list
72
+ cached_list_str += f' or {field}'
73
+ else:
74
+ # Middle items
75
+ cached_list_str += f', {field}'
76
+ else: # Only one field
77
+ cached_list_str = self.__cached_list[0]
78
+
79
+ # This is extracted rather than being evaluated in the f-string
80
+ # because otherwise the linter doesn't like it!
81
+ pluralisation = ' one of' if multiple_cached_values else ''
82
+
83
+ self.add_error(
84
+ object_id=obj.id,
85
+ detail=f'The value of the field {self.__config.field_name} '
86
+ f'must be{pluralisation} {cached_list_str} '
87
+ f'(found value {field_value})',
88
+ field=self.__config.field_name,
89
+ )
@@ -0,0 +1,56 @@
1
+ # SPDX-FileCopyrightText: 2025 Genome Research Ltd.
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ from dataclasses import dataclass
6
+ from typing import List
7
+
8
+ from tol.core import DataObject, Validator
9
+
10
+ from .interfaces import Condition, ConditionDict, ConditionEvaluator
11
+
12
+
13
+ class AssertOnConditionValidator(Validator, ConditionEvaluator):
14
+ """
15
+ Validates an incoming stream of `DataObject` instances,
16
+ using a condition to check a specific attrbiute. If this
17
+ condition passes, then the assertions will be run, which must
18
+ all pass.
19
+ """
20
+ @dataclass(slots=True, frozen=True, kw_only=True)
21
+ class Config:
22
+ condition: ConditionDict
23
+ assertions: List[ConditionDict]
24
+
25
+ __slots__ = ['__config']
26
+ __config: Config
27
+
28
+ def __init__(self, config: Config) -> None:
29
+ super().__init__()
30
+
31
+ self.__config = config
32
+
33
+ def _validate_data_object(self, obj: DataObject) -> None:
34
+ # Check condition atribute
35
+ # (only perform the assertions if the condition passes)
36
+ if self._does_condition_pass(Condition.from_dict(self.__config.condition), obj):
37
+ # Perform each assertion
38
+ for assertion in self.__config.assertions:
39
+ self.__perform_assertion(obj, Condition.from_dict(assertion))
40
+
41
+ def __perform_assertion(self, obj: DataObject, assertion: Condition) -> None:
42
+ # There's only an error or warning if the assertion condition fails
43
+ condition_passed, found_value = self._evaluate_condition(assertion, obj)
44
+ if not condition_passed:
45
+ if assertion.is_error:
46
+ self.add_error(
47
+ object_id=obj.id,
48
+ detail=f'Expected {assertion} (found value {found_value})',
49
+ field=assertion.field,
50
+ )
51
+ else:
52
+ self.add_warning(
53
+ object_id=obj.id,
54
+ detail=f'Expected {assertion} (found value {found_value})',
55
+ field=assertion.field,
56
+ )
@@ -0,0 +1,73 @@
1
+ # SPDX-FileCopyrightText: 2025 Genome Research Ltd.
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ import re
6
+ from dataclasses import dataclass
7
+
8
+ from tol.core import DataSource
9
+ from tol.core.data_object import DataObject
10
+ from tol.core.validate import Validator
11
+ from tol.sources.ena import ena
12
+
13
+
14
+ class EnaChecklistValidator(Validator):
15
+ """
16
+ validates the ENA_CHECKLIST for each samples
17
+ """
18
+
19
+ @dataclass(slots=True, frozen=True, kw_only=True)
20
+ class Config:
21
+ ena_checklist_id: str
22
+
23
+ __slots__ = ['__config']
24
+ __config: Config
25
+
26
+ def __init__(self, config: Config, datasource: DataSource = ena()) -> None:
27
+ super().__init__()
28
+ self.__config = config
29
+ self._datasource = datasource
30
+
31
+ def _validate_data_object(self, obj: DataObject) -> None:
32
+ ena_datasource = self._datasource
33
+ ena_checklist = ena_datasource.get_one('checklist', self.__config.ena_checklist_id)
34
+
35
+ validations = ena_checklist.attributes['checklist']
36
+ for key in validations:
37
+ field_name = key
38
+ if 'field' in validations[key]:
39
+ field_name = validations[key]['field']
40
+ if 'mandatory' in validations[key] and key not in obj.attributes:
41
+ self.add_error(object_id=obj.id, detail='Must be given', field=[field_name])
42
+ continue
43
+ if 'mandatory' in validations[key] and obj.attributes[key] is None:
44
+ self.add_error(object_id=obj.id, detail='Must be given', field=[field_name])
45
+ continue
46
+ if 'mandatory' in validations[key] and obj.attributes.get(key) == '':
47
+ self.add_error(
48
+ object_id=obj.id,
49
+ detail='Must not be empty', field=[field_name]
50
+ )
51
+
52
+ if 'restricted text' in validations[key] and key in obj.attributes:
53
+ for condition in validations[key]:
54
+ if type(condition) == str and '(' in condition:
55
+ regex = condition
56
+ compiled_re = re.compile(regex)
57
+ if not compiled_re.search(obj.attributes.get(key)):
58
+ self.add_error(
59
+ object_id=obj.id,
60
+ detail='Must match specific pattern', field=[field_name]
61
+ )
62
+
63
+ # Check against allowed values
64
+ if 'text choice' in validations[key] and key in obj.attributes:
65
+ for condition in validations[key]:
66
+ if type(condition) == list:
67
+ allowed_values = condition
68
+ if obj.attributes.get(key).lower() not in \
69
+ [x.lower() for x in allowed_values]:
70
+ self.add_error(
71
+ object_id=obj.id,
72
+ detail='Must be in allowed values', field=[field_name]
73
+ )