tol-sdk 1.7.4__py3-none-any.whl → 1.7.5b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. tol/api_base/__init__.py +1 -0
  2. tol/api_base/blueprint.py +19 -8
  3. tol/{s3/data_upload/blueprint.py → api_base/data_upload.py} +21 -6
  4. tol/api_base/pipeline_steps.py +4 -4
  5. tol/api_client/api_datasource.py +8 -8
  6. tol/api_client/converter.py +38 -52
  7. tol/api_client/factory.py +21 -19
  8. tol/api_client/parser.py +138 -98
  9. tol/api_client/view.py +118 -43
  10. tol/core/__init__.py +2 -1
  11. tol/core/data_object.py +27 -9
  12. tol/core/data_object_converter.py +37 -2
  13. tol/core/factory.py +51 -62
  14. tol/core/validate.py +1 -0
  15. tol/ena/client.py +61 -10
  16. tol/ena/ena_datasource.py +16 -10
  17. tol/ena/ena_methods.py +33 -32
  18. tol/ena/parser.py +15 -2
  19. tol/flows/converters/__init__.py +2 -0
  20. tol/flows/converters/incoming_sample_to_ena_sample_converter.py +130 -0
  21. tol/flows/converters/incoming_sample_to_incoming_sample_with_lists_converter.py +46 -0
  22. tol/s3/__init__.py +0 -1
  23. tol/sql/model.py +1 -1
  24. tol/sql/pipeline_step/factory.py +1 -1
  25. tol/sql/sql_converter.py +7 -1
  26. tol/validators/__init__.py +12 -1
  27. tol/validators/allowed_keys.py +17 -12
  28. tol/validators/allowed_values.py +21 -63
  29. tol/validators/allowed_values_from_datasource.py +89 -0
  30. tol/validators/assert_on_condition.py +56 -0
  31. tol/validators/ena_checklist.py +73 -0
  32. tol/validators/ena_submittable.py +61 -0
  33. tol/validators/interfaces/__init__.py +5 -0
  34. tol/validators/interfaces/condition_evaluator.py +102 -0
  35. tol/validators/min_one_valid_value.py +55 -0
  36. tol/validators/mutually_exclusive.py +111 -0
  37. tol/validators/regex.py +30 -23
  38. tol/validators/regex_by_value.py +33 -33
  39. tol/validators/specimens_have_same_taxon.py +60 -0
  40. tol/validators/sts_fields.py +88 -0
  41. tol/validators/tolid.py +110 -0
  42. tol/validators/unique_values.py +25 -17
  43. tol/validators/unique_whole_organisms.py +109 -0
  44. {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/METADATA +1 -1
  45. {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/RECORD +49 -36
  46. tol/s3/data_upload/__init__.py +0 -3
  47. {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/WHEEL +0 -0
  48. {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/entry_points.txt +0 -0
  49. {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/licenses/LICENSE +0 -0
  50. {tol_sdk-1.7.4.dist-info → tol_sdk-1.7.5b2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,61 @@
1
+ # SPDX-FileCopyrightText: 2025 Genome Research Ltd.
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Any, Dict
7
+
8
+ from tol.core import DataObject, DataSource
9
+ from tol.core.validate import Validator
10
+ from tol.sources.ena import ena
11
+
12
+
13
+ class EnaSubmittableValidator(Validator):
14
+ """
15
+ Validates that a stream of `DataObject` instances
16
+ contains field that is part of a list.
17
+ """
18
+ @dataclass(slots=True, frozen=True, kw_only=True)
19
+ class Config:
20
+ field_name: str
21
+
22
+ __slots__ = ['__config', '__ena_datasource', '__cached_species']
23
+ __config: Config
24
+ __ena_datasource: DataSource
25
+ __cached_species: Dict[str, Any]
26
+
27
+ def __init__(
28
+ self,
29
+ config: Config,
30
+ ena_datasource: DataSource | None = ena(), # For testing
31
+ ) -> None:
32
+
33
+ super().__init__()
34
+
35
+ self.__config = config
36
+ self.__ena_datasource = ena_datasource
37
+ self.__cached_species = {}
38
+
39
+ def _validate_data_object(
40
+ self,
41
+ obj: DataObject
42
+ ) -> None:
43
+ taxon_id = obj.get_field_by_name(self.__config.field_name)
44
+ if taxon_id not in self.__cached_species:
45
+ ena_taxon = self.__ena_datasource.get_one('submittable_taxon', taxon_id)
46
+ if ena_taxon:
47
+ self.__cached_species[taxon_id] = ena_taxon
48
+ if taxon_id not in self.__cached_species:
49
+ self.add_error(
50
+ object_id=obj.id,
51
+ detail=f'Field {self.__config.field_name} value '
52
+ f'"{taxon_id}" not found in ENA',
53
+ field=self.__config.field_name,
54
+ )
55
+ elif not self.__cached_species[taxon_id].submittable:
56
+ self.add_error(
57
+ object_id=obj.id,
58
+ detail=f'Field {self.__config.field_name} value '
59
+ f'"{taxon_id}" is not submittable in ENA',
60
+ field=self.__config.field_name,
61
+ )
@@ -0,0 +1,5 @@
1
+ # SPDX-FileCopyrightText: 2025 Genome Research Ltd.
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ from .condition_evaluator import Condition, ConditionDict, ConditionEvaluator # noqa
@@ -0,0 +1,102 @@
1
+ # SPDX-FileCopyrightText: 2025 Genome Research Ltd.
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ from abc import ABC
6
+ from dataclasses import dataclass
7
+ from typing import Any, Dict, Tuple, cast
8
+
9
+ from tol.core import DataObject
10
+
11
+
12
+ ConditionDict = Dict[str, str | Any | bool]
13
+ """
14
+ The dict representation of a Condition. Conditions can be constructed
15
+ from such a dict through Condition.from_dict(condition_dict)
16
+ """
17
+
18
+
19
+ @dataclass(slots=True)
20
+ class Condition:
21
+ field: str
22
+ operator: str
23
+ value: Any
24
+ # If this condition fails, should it be an error or a warning?
25
+ # The reporting of this error or warning is done in the calling validator
26
+ is_error: bool = True
27
+
28
+ def __repr__(self) -> str:
29
+ return f'{self.field} {self.operator} {self.value}'
30
+
31
+ @staticmethod
32
+ def from_dict(condition_dict: ConditionDict) -> 'Condition':
33
+ """
34
+ A means of instantiating a Condition from a dictionary.
35
+ This is a separate method (rather than constructing with kwargs
36
+ like `Condition(**condition_dict))`) to allow for both precense
37
+ and type checking for each field.
38
+ """
39
+ try:
40
+ # Extract fields
41
+ field = condition_dict['field']
42
+ operator = condition_dict['operator']
43
+ value = condition_dict['value']
44
+ is_error = condition_dict.get('is_error', True)
45
+
46
+ # Ensure fields are the correct type
47
+ if not isinstance(field, str) and not isinstance(operator, str):
48
+ raise Exception(
49
+ f'Dictionary {condition_dict} not in valid format '
50
+ f'to convert to Condition (type of condition dict incorrect)'
51
+ )
52
+
53
+ return Condition(
54
+ cast(str, field),
55
+ cast(str, operator),
56
+ value,
57
+ cast(bool, is_error),
58
+ )
59
+ except IndexError as e:
60
+ raise Exception(
61
+ f'Dictionary {condition_dict} not in valid format '
62
+ f'to convert to Condition (key "{e.args[0]}" not found)'
63
+ )
64
+
65
+
66
+ class ConditionEvaluator(ABC):
67
+ """
68
+ Interface to be inherited by validators. Evaluates the provided condition given its
69
+ operator and operands
70
+ """
71
+ def _evaluate_condition(self, condition: Condition, obj: DataObject) -> Tuple[bool, Any]:
72
+ """
73
+ Evaluates the provided condition given its operator and operands.
74
+ If `operator` is not one of the supported operators, an exception is thrown.
75
+ """
76
+ value_to_test = obj.get_field_by_name(condition.field)
77
+
78
+ match condition.operator:
79
+ case '==':
80
+ return (value_to_test == condition.value, value_to_test)
81
+ case '!=':
82
+ return (value_to_test != condition.value, value_to_test)
83
+ case '<':
84
+ return (value_to_test < condition.value, value_to_test)
85
+ case '<=':
86
+ return (value_to_test <= condition.value, value_to_test)
87
+ case '>':
88
+ return (value_to_test > condition.value, value_to_test)
89
+ case '>=':
90
+ return (value_to_test >= condition.value, value_to_test)
91
+ case 'in':
92
+ return (value_to_test in condition.value, value_to_test)
93
+ case _:
94
+ raise Exception(f'VALIDATOR SETUP ERROR: `{condition.operator}` is not '
95
+ f'a supported operator for {type(self).__name__}')
96
+
97
+ def _does_condition_pass(self, condition: Condition, obj: DataObject) -> bool:
98
+ """
99
+ Helper function for when you only want to know whether the condition passes,
100
+ and don't need the actual value
101
+ """
102
+ return self._evaluate_condition(condition, obj)[0]
@@ -0,0 +1,55 @@
1
+ # SPDX-FileCopyrightText: 2025 Genome Research Ltd.
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ from dataclasses import dataclass
6
+
7
+ from tol.core import DataObject
8
+ from tol.core.validate import Validator
9
+
10
+
11
+ class MinOneValidValueValidator(Validator):
12
+ """
13
+ Validates that a stream of `DataObject` instances
14
+ have at least one valid value present in a list of specified keys.
15
+ """
16
+ @dataclass(slots=True, frozen=True, kw_only=True)
17
+ class Config:
18
+ non_valid_values: list[str]
19
+ keys: list[str]
20
+
21
+ __slots__ = ['__config']
22
+ __config: Config
23
+
24
+ def __init__(
25
+ self,
26
+ config: Config,
27
+ ) -> None:
28
+
29
+ super().__init__()
30
+ self.__config = config
31
+
32
+ def _validate_data_object(
33
+ self,
34
+ obj: DataObject
35
+ ) -> None:
36
+
37
+ found_valid_value = False
38
+
39
+ for key in self.__config.keys:
40
+ value = obj.attributes[key]
41
+
42
+ if value is not None and value not in self.__config.non_valid_values:
43
+ found_valid_value = True
44
+ break
45
+
46
+ if not found_valid_value:
47
+ self.add_error(
48
+ object_id=obj.id,
49
+ detail=(
50
+ f'At least one of: {self.__config.keys} '
51
+ 'must not be: ' + ', '.join(self.__config.non_valid_values)
52
+ + ' or empty.'
53
+ ),
54
+ field=', '.join(self.__config.keys),
55
+ )
@@ -0,0 +1,111 @@
1
+ # SPDX-FileCopyrightText: 2025 Genome Research Ltd.
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Any, List
7
+
8
+ from tol.core import DataObject, Validator
9
+
10
+ from .interfaces import Condition, ConditionDict, ConditionEvaluator
11
+
12
+
13
+ class MutuallyExclusiveValidator(Validator, ConditionEvaluator):
14
+ """
15
+ Validates an incoming stream of `DataObject` instances,
16
+ where the resultant field from field_one_condition must not
17
+ have the same values for target_fields as the resultant
18
+ field from field_two_condition
19
+ """
20
+ @dataclass(slots=True, frozen=True, kw_only=True)
21
+ class Config:
22
+ first_field_where: ConditionDict
23
+ second_field_where: ConditionDict
24
+ target_fields: List[str]
25
+ detail: str | None = None
26
+
27
+ def _get_error_message(self) -> str:
28
+ # Extract conditions
29
+ first_condition = Condition.from_dict(self.first_field_where)
30
+ second_condition = Condition.from_dict(self.second_field_where)
31
+
32
+ # Use a pre-defined, hard-coded detail message if one was not provided
33
+ if self.detail is None:
34
+ multiple_target_fields = len(self.target_fields) > 1
35
+ possible_plural = 's' if multiple_target_fields else ''
36
+
37
+ target_fields_str = ''
38
+ if multiple_target_fields:
39
+ for index, field in enumerate(self.target_fields):
40
+ if index == 0:
41
+ # First field in the list
42
+ target_fields_str += f'{field}'
43
+ elif index == len(self.target_fields) - 1:
44
+ # Last field in the list
45
+ target_fields_str += f' and {field}'
46
+ else:
47
+ # Middle fields
48
+ target_fields_str += f', {field}'
49
+ else: # Only one field
50
+ target_fields_str = self.target_fields[0]
51
+
52
+ return (
53
+ f'The field{possible_plural} {target_fields_str} cannot have the same '
54
+ f'value{possible_plural} both when {first_condition} and when '
55
+ f'{second_condition}'
56
+ )
57
+ else:
58
+ return self.detail
59
+
60
+ __slots__ = ['__config', '__first_list', '__second_list']
61
+ __config: Config
62
+ __first_list: List[Any]
63
+ __second_list: List[Any]
64
+
65
+ def __init__(self, config: Config) -> None:
66
+ super().__init__()
67
+
68
+ self.__config = config
69
+ self.__first_list = []
70
+ self.__second_list = []
71
+
72
+ def _validate_data_object(self, obj: DataObject) -> None:
73
+ # Check first field
74
+ if self._does_condition_pass(Condition.from_dict(self.__config.first_field_where), obj):
75
+ # Check whether the values of the target fields were found in the second list
76
+ if [
77
+ obj.get_field_by_name(target_field)
78
+ for target_field in self.__config.target_fields
79
+ ] in self.__second_list:
80
+ self.add_error(
81
+ object_id=obj.id,
82
+ detail=self.__config._get_error_message()
83
+ )
84
+
85
+ # Add the values of the target fields to the first list
86
+ self.__first_list.append(
87
+ [
88
+ obj.get_field_by_name(target_field)
89
+ for target_field in self.__config.target_fields
90
+ ]
91
+ )
92
+ # Check second field (same as the first condition, but for the second!)
93
+ elif self._does_condition_pass(Condition.from_dict(self.__config.second_field_where), obj):
94
+ # Check whether the values of the target fields were found in the first list
95
+ if [
96
+ obj.get_field_by_name(target_field)
97
+ for target_field in self.__config.target_fields
98
+ ] in self.__first_list:
99
+ self.add_error(
100
+ object_id=obj.id,
101
+ detail=self.__config._get_error_message()
102
+ )
103
+
104
+ # Add the values of the target fields to the second list
105
+ self.__second_list.append(
106
+ [
107
+ obj.get_field_by_name(target_field)
108
+ for target_field in self.__config.target_fields
109
+ ]
110
+ )
111
+ # If neither condition passes, the data object must be valid (for this validator anyway!)
tol/validators/regex.py CHANGED
@@ -4,12 +4,18 @@
4
4
 
5
5
  import re
6
6
  from dataclasses import dataclass
7
- from typing import Any
7
+ from typing import Any, List
8
8
 
9
9
  from tol.core import DataObject
10
10
  from tol.core.validate import Validator
11
11
 
12
12
 
13
+ RegexDict = dict[
14
+ str,
15
+ str | bool | list[Any],
16
+ ]
17
+
18
+
13
19
  @dataclass(frozen=True, kw_only=True)
14
20
  class Regex:
15
21
  key: str
@@ -23,23 +29,22 @@ class Regex:
23
29
  return re.search(self.regex, str(__v or ''))
24
30
 
25
31
 
26
- RegexDict = dict[
27
- str,
28
- str | bool | list[Any],
29
- ]
30
- """Can also specify `Regex` as a `dict`"""
31
-
32
-
33
32
  class RegexValidator(Validator):
34
33
  """
35
34
  Validates an incoming stream of `DataObject` instances
36
35
  according to the specified allowed values for a given
37
36
  key.
38
37
  """
38
+ @dataclass(slots=True, frozen=True, kw_only=True)
39
+ class Config:
40
+ regexes: List[Regex | RegexDict]
41
+
42
+ __slots__ = ['__config']
43
+ __config: Config
39
44
 
40
45
  def __init__(
41
46
  self,
42
- config: list[Regex | RegexDict]
47
+ config: Config
43
48
  ) -> None:
44
49
 
45
50
  super().__init__()
@@ -54,19 +59,6 @@ class RegexValidator(Validator):
54
59
  for k, v in obj.attributes.items():
55
60
  self.__validate_attribute(obj, k, v)
56
61
 
57
- def __get_config(
58
- self,
59
- config: list[Regex | RegexDict],
60
- ) -> list[Regex]:
61
-
62
- # Ensure config is in Regex format
63
- # (as you can either pass in a list of Regex or a RegexDict,
64
- # which can be used to initialize a Regex)
65
- return [
66
- c if isinstance(c, Regex) else Regex(**c)
67
- for c in config
68
- ]
69
-
70
62
  def __validate_attribute(
71
63
  self,
72
64
  obj: DataObject,
@@ -85,7 +77,7 @@ class RegexValidator(Validator):
85
77
  key: str,
86
78
  ) -> list[Regex]:
87
79
  return [
88
- a for a in self.__config
80
+ a for a in self.__config.regexes
89
81
  if a.key == key
90
82
  ]
91
83
 
@@ -107,3 +99,18 @@ class RegexValidator(Validator):
107
99
  detail=c.detail,
108
100
  field=c.key,
109
101
  )
102
+
103
+ def __get_config(
104
+ self,
105
+ config: Config,
106
+ ) -> Config:
107
+
108
+ # Ensure config is in Regex format
109
+ # (as you can either pass in a list of Regex or a RegexDict,
110
+ # which can be used to initialize a Regex)
111
+ return self.Config(
112
+ regexes=[
113
+ c if isinstance(c, Regex) else Regex(**c)
114
+ for c in config.regexes
115
+ ]
116
+ )
@@ -2,20 +2,13 @@
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
4
 
5
- from typing import Any
5
+ from dataclasses import dataclass
6
+ from typing import Dict, List
6
7
 
7
8
  from tol.core import DataObject
8
9
  from tol.core.validate import Validator
9
10
 
10
- from .regex import Regex
11
-
12
- RegexDict = dict[
13
- str,
14
- str | bool | list[Any],
15
- ]
16
- Config = dict[str, str | dict[str, list[Regex | RegexDict]]]
17
-
18
- """Can also specify `Regex` as a `dict`"""
11
+ from .regex import Regex, RegexDict
19
12
 
20
13
 
21
14
  class RegexByValueValidator(Validator):
@@ -24,46 +17,34 @@ class RegexByValueValidator(Validator):
24
17
  according to the specified allowed values for a given
25
18
  key.
26
19
  """
20
+ @dataclass(slots=True, frozen=True, kw_only=True)
21
+ class Config:
22
+ key_column: str
23
+ regexes: Dict[str, List[Regex | RegexDict]]
24
+
25
+ __slots__ = ['__config']
26
+ config: Config
27
27
 
28
28
  def __init__(
29
29
  self,
30
- config: dict[str, str | list[str]]
30
+ config: Config
31
31
  ) -> None:
32
32
 
33
33
  super().__init__()
34
34
 
35
35
  self.__config = self.__get_config(config)
36
36
 
37
- def __get_config(
38
- self,
39
- config: Config,
40
- ) -> Config:
41
-
42
- return {
43
- 'key_column': config['key_column'],
44
- 'regexes': {
45
- k: [
46
- # Ensure they're all in Regex format
47
- # (as you can either pass in a list of Regex or a RegexDict,
48
- # which can be used to initialize a Regex)
49
- c if isinstance(c, Regex) else Regex(**c)
50
- for c in v
51
- ]
52
- for k, v in config['regexes'].items()
53
- }
54
- }
55
-
56
37
  def _validate_data_object(
57
38
  self,
58
39
  obj: DataObject
59
40
  ) -> None:
60
41
  # Pull out value of the 'key_column' attribute
61
- key_column_value = obj.attributes.get(self.__config['key_column'])
42
+ key_column_value = obj.attributes.get(self.__config.key_column)
62
43
  if not key_column_value:
63
44
  return
64
45
 
65
46
  # Pull out relevant regex list based on this value: {[{'name': 'regex'}]}
66
- regex_list = self.__config['regexes'].get(key_column_value)
47
+ regex_list = self.__config.regexes.get(key_column_value)
67
48
  if not regex_list:
68
49
  return
69
50
  self.__validate_attribute(obj, regex_list)
@@ -71,7 +52,7 @@ class RegexByValueValidator(Validator):
71
52
  def __validate_attribute(
72
53
  self,
73
54
  obj: DataObject,
74
- regexes: list[Regex],
55
+ regexes: List[Regex],
75
56
  ) -> None:
76
57
  for r in regexes:
77
58
  attribute_name = r.key
@@ -97,3 +78,22 @@ class RegexByValueValidator(Validator):
97
78
  detail=c.detail,
98
79
  field=c.key,
99
80
  )
81
+
82
+ def __get_config(
83
+ self,
84
+ config: Config,
85
+ ) -> Config:
86
+
87
+ # Ensure config is in Regex format
88
+ # (as you can either pass in a list of Regex or a RegexDict,
89
+ # which can be used to initialize a Regex)
90
+ return self.Config(
91
+ key_column=config.key_column,
92
+ regexes={
93
+ k: [
94
+ c if isinstance(c, Regex) else Regex(**c)
95
+ for c in v
96
+ ]
97
+ for k, v in config.regexes.items()
98
+ }
99
+ )
@@ -0,0 +1,60 @@
1
+ # SPDX-FileCopyrightText: 2025 Genome Research Ltd.
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Dict
7
+
8
+ from tol.core import Validator
9
+ from tol.core.data_object import DataObject
10
+
11
+
12
+ class SpecimensHaveSameTaxonValidator(Validator):
13
+ """
14
+ Validates an incoming stream of `DataObject` instances.
15
+ For each data object (sample) not a SYMBIONT, it checks that
16
+ there are no samples with SPECIMEN_ID which has different TAXON_ID
17
+ """
18
+ @dataclass(slots=True, frozen=True, kw_only=True)
19
+ class Config:
20
+ taxon_id_field: str
21
+ symbiont_field: str
22
+ specimen_id_field: str
23
+
24
+ __slots__ = ['__config', '__seen']
25
+ __config: Config
26
+ __seen: Dict[str, str]
27
+
28
+ def __init__(self, config: Config) -> None:
29
+ super().__init__()
30
+ self.__seen = {}
31
+ self.__config = config
32
+
33
+ def _validate_data_object(self, obj: DataObject) -> None:
34
+ # Explaining the code concept using a standard example
35
+ # seen{}
36
+ # 1st Pass=> element['specimen_id'] = A
37
+ # element['taxon_id'] = AA
38
+ # seen{ A:AA }
39
+ # 2nd pass=> element['specimen_id'] = A
40
+ # element['taxon_id'] = AB
41
+ # AB != AA
42
+ # Flag error
43
+ # From Nithin :)
44
+
45
+ # Ensure the data object is not a SYMBIONT
46
+ if obj.attributes.get(self.__config.symbiont_field) != 'SYMBIONT':
47
+ specimen_id = obj.attributes.get(self.__config.specimen_id_field)
48
+ if specimen_id is None:
49
+ return
50
+ taxon_id = obj.attributes.get(self.__config.taxon_id_field)
51
+ if taxon_id is None:
52
+ return
53
+ if specimen_id in self.__seen and taxon_id != self.__seen[specimen_id]:
54
+ self.add_error(
55
+ object_id=obj.id,
56
+ detail='A non-symbiont must have a matching Specimen ID and Taxon ID',
57
+ field=self.__config.specimen_id_field,
58
+ )
59
+ if specimen_id not in self.__seen:
60
+ self.__seen[specimen_id] = taxon_id
@@ -0,0 +1,88 @@
1
+ # SPDX-FileCopyrightText: 2025 Genome Research Ltd.
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ from dataclasses import dataclass
6
+ from typing import List
7
+
8
+ from tol.core import DataObject, DataSource
9
+ from tol.core.validate import Validator
10
+ from tol.sources.sts import sts
11
+
12
+
13
+ class StsFieldsValidator(Validator):
14
+ """
15
+ Validates that a stream of `DataObject` instances
16
+ contains fields that observe the validations in STS
17
+ """
18
+ @dataclass(slots=True, frozen=True, kw_only=True)
19
+ class Config:
20
+ project_code: str
21
+
22
+ __slots__ = ['__config', '__datasource', '__fields']
23
+ __config: Config
24
+ __datasource: DataSource
25
+ __fields: List[str | int | float]
26
+
27
+ def __init__(
28
+ self,
29
+ config: Config,
30
+ datasource: DataSource = sts(), # For testing
31
+ ) -> None:
32
+
33
+ super().__init__()
34
+
35
+ self.__config = config
36
+ self.__datasource = datasource
37
+ self.__fields = self.__initialize_fields_from_datasource()
38
+
39
+ def __initialize_fields_from_datasource(self) -> List[str | int | float]:
40
+ return {
41
+ field.get('data_input_key'): field
42
+ for field in self.__datasource.get_one(
43
+ 'project', self.__config.project_code
44
+ ).template.get('data_fields', [])
45
+ if field.get('in_manifest')
46
+ }
47
+
48
+ def _validate_data_object(
49
+ self,
50
+ obj: DataObject
51
+ ) -> None:
52
+ for field in self.__fields.values():
53
+ # Get the value from the data object
54
+ field_value = obj.get_field_by_name(field.get('data_input_key'))
55
+ if field.get('mandatory_input') and (field_value is None or field_value == ''):
56
+ self.add_error(
57
+ object_id=obj.id,
58
+ detail=f'Field {field.get("data_input_key")} is required '
59
+ f'for project {self.__config.project_code}',
60
+ field=field.get('data_input_key'),
61
+ )
62
+ elif field.get('allowed_values') and field_value not in field.get('allowed_values'):
63
+ self.add_error(
64
+ object_id=obj.id,
65
+ detail=f'Field {field.get("data_input_key")} value '
66
+ f'"{field_value}" not found in allowed values '
67
+ f'{field.get("allowed_values")} for project '
68
+ f'{self.__config.project_code}',
69
+ field=field.get('data_input_key'),
70
+ )
71
+ elif field.get('min') and field_value < field.get('min'):
72
+ self.add_error(
73
+ object_id=obj.id,
74
+ detail=f'Field {field.get("data_input_key")} value '
75
+ f'"{field_value}" is less than minimum value '
76
+ f'"{field.get("min")}" for project '
77
+ f'{self.__config.project_code}',
78
+ field=field.get('data_input_key'),
79
+ )
80
+ elif field.get('max') and field_value > field.get('max'):
81
+ self.add_error(
82
+ object_id=obj.id,
83
+ detail=f'Field {field.get("data_input_key")} value '
84
+ f'"{field_value}" is greater than maximum value '
85
+ f'"{field.get("max")}" for project '
86
+ f'{self.__config.project_code}',
87
+ field=field.get('data_input_key'),
88
+ )