tol-sdk 1.8.8__py3-none-any.whl → 1.8.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tol/ena/ena_datasource.py CHANGED
@@ -11,6 +11,7 @@ from cachetools.func import ttl_cache
11
11
 
12
12
  import requests
13
13
  from requests.auth import HTTPBasicAuth
14
+ from requests.exceptions import HTTPError
14
15
 
15
16
 
16
17
  from .client import EnaApiClient
@@ -118,9 +119,13 @@ class EnaDataSource(
118
119
  if object_type == 'submittable_taxon':
119
120
  ena_response = []
120
121
  for object_id in object_ids:
121
- response = client.get_detail(object_type, [object_id])
122
- if response and isinstance(response, list):
123
- ena_response.extend(response)
122
+ try:
123
+ response = client.get_detail(object_type, [object_id])
124
+ if response and isinstance(response, list):
125
+ ena_response.extend(response)
126
+ except HTTPError as http_error:
127
+ if http_error.response.status_code != 400:
128
+ raise
124
129
  else:
125
130
  ena_response = client.get_detail(object_type, object_ids)
126
131
  # For a checklist we need to convert into a list of dicts
@@ -19,8 +19,9 @@ from .types import TypesValidator # noqa
19
19
  from .unique_values import UniqueValuesValidator # noqa
20
20
  from .unique_whole_organisms import UniqueWholeOrganismsValidator # noqa
21
21
  from .interfaces import Condition # noqa
22
- from .min_one_valid_value import MinOneValidValueValidator # noqa
22
+ from .min_one_valid_value import MinOneValidValueValidator # noqa
23
23
  from .value_check import ValueCheckValidator # noqa
24
24
  from .branching import BranchingValidator # noqa
25
25
  from .unique_value_check import UniqueValueCheckValidator # noqa
26
- from .date_sorting import DateSortingValidator # noqa
26
+ from .date_sorting import DateSortingValidator # noqa
27
+ from .taxon_matches_goat import TaxonMatchesGoatValidator # noqa
@@ -30,6 +30,7 @@ class AllowedKeysValidator(Validator):
30
30
  **kwargs
31
31
  ) -> None:
32
32
 
33
+ del kwargs
33
34
  super().__init__()
34
35
  self.__config = config
35
36
 
@@ -0,0 +1,86 @@
1
+ # SPDX-FileCopyrightText: 2026 Genome Research Ltd.
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ from dataclasses import dataclass
6
+
7
+ from tol.core import DataObject, Validator
8
+ from tol.sources.goat import GoatDataSource, goat
9
+
10
+
11
+ class TaxonMatchesGoatValidator(Validator):
12
+ """
13
+ Validates a stream of `DataObject` instances, checking whether its Taxonomy information
14
+ matches that in GoaT
15
+ """
16
+ @dataclass(slots=True, frozen=True, kw_only=True)
17
+ class Config:
18
+ species_field: str | None = None
19
+ genus_field: str | None = None
20
+ family_field: str | None = None
21
+ superfamily_field: str | None = None
22
+ phylum_field: str | None = None
23
+ kingdom_field: str | None = None
24
+ superkingdom_field: str | None = None
25
+ domain_field: str | None = None
26
+
27
+ __slots__ = ['__config', '__goat_datasource', '_cached_taxa']
28
+ __config: Config
29
+ __goat_datasource: GoatDataSource
30
+ _cached_taxa: dict[str, DataObject]
31
+
32
+ def __init__(self, config: Config) -> None:
33
+ super().__init__()
34
+ self.__config = config
35
+ self.__goat_datasource = goat()
36
+ self._cached_taxa = {}
37
+
38
+ def _validate_data_object(self, obj: DataObject) -> None:
39
+ taxon_id = obj.get_field_by_name('TAXON_ID')
40
+
41
+ # Check whether we already have the information for this id in the cache.
42
+ # If we don't, fetch it from GoaT and add it to the cache
43
+ taxon: DataObject | None
44
+ if taxon_id in self._cached_taxa:
45
+ taxon = self._cached_taxa[taxon_id]
46
+ else:
47
+ taxon = self.__goat_datasource.get_one('taxon', taxon_id)
48
+
49
+ # Add this taxon to cached taxa.
50
+ # Error if GoaT has no taxon with this id
51
+ if taxon is not None:
52
+ self._cached_taxa[taxon_id] = taxon
53
+ else:
54
+ self.add_error(
55
+ object_id=obj.id,
56
+ detail=f'Invalid Taxon ID: {taxon_id}',
57
+ field='taxon_id'
58
+ )
59
+
60
+ # We can't validate a taxon that doesn't exist, so after this error move on
61
+ # to the next DataObject
62
+ return
63
+
64
+ # Check that each associated taxonomy rank for this taxon matches those in GoaT
65
+ taxonomic_ranks = ('species', 'genus', 'family', 'superfamily',
66
+ 'phylum', 'kingdom', 'superkingdom', 'domain')
67
+ for rank in taxonomic_ranks:
68
+ # From the rank to check, get the name of its field in the data object we're validating
69
+ # from the validator config. If this field name is `None`, then this taxonomic rank
70
+ # isn't being checked (likely because the data object does not have this field)
71
+ field_name: str | None = getattr(self.__config, f'{rank}_field')
72
+ if field_name is None:
73
+ continue
74
+
75
+ # Fetch the values of these taxonomic ranks
76
+ value_in_data_object = obj.get_field_by_name(field_name)
77
+ value_in_goat = taxon.get_field_by_name(f'{rank}.scientific_name')
78
+
79
+ # Ensure the value in the data object matches the one in GoaT
80
+ if value_in_data_object != value_in_goat:
81
+ self.add_warning(
82
+ object_id=obj.id,
83
+ detail=(f'Value for {field_name} ({value_in_data_object}) '
84
+ f'does not match the value in GoaT ({value_in_goat})'),
85
+ field=field_name,
86
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tol-sdk
3
- Version: 1.8.8
3
+ Version: 1.8.9
4
4
  Summary: SDK for interaction with ToL, Sanger and external services
5
5
  Author-email: ToL Platforms Team <tol-platforms@sanger.ac.uk>
6
6
  License: MIT
@@ -142,7 +142,7 @@ tol/eln/sanitise.py,sha256=fMj-VrQTnw4zn2X0wnjWQAI8gWAa8RYqNuv23LXQssI,406
142
142
  tol/ena/__init__.py,sha256=T3TCqaHpgi2Uk2PjPGu60GaG2V8cTrHJlVLtZfLFhTQ,174
143
143
  tol/ena/client.py,sha256=ldmm7Z9_auQf1zVWjsFLXYgbKvGtSHTsr88YO3zfv2Y,6731
144
144
  tol/ena/converter.py,sha256=nxbo4IFzzOvKNMq3Aeiw5iDqVWvY33nTngLppjHAoGY,1144
145
- tol/ena/ena_datasource.py,sha256=jEvyUaH4pfFxmdtn6O_PwOdjPz6u80uAT3SLlR2f5nM,8968
145
+ tol/ena/ena_datasource.py,sha256=QRj4pXFeAaC0qPdmtPCrCD3DKVsXhu1jYZ97P3AHlnA,9184
146
146
  tol/ena/ena_methods.py,sha256=jgpLssZq-F-vgkO-fYu4jrXenmNkdFpFKAY3VKp5HHE,9209
147
147
  tol/ena/factory.py,sha256=3IJCmGLo59PWbGmNqmKho5WYG330OjL8SoZYstIwHt4,3423
148
148
  tol/ena/filter.py,sha256=UzOx5ivXvA0TY2QuNzFmS-zDPVNnaAx07DMVkAwVsAE,3370
@@ -335,8 +335,8 @@ tol/treeval/treeval_datasource.py,sha256=GzY6JwH67b5QdV-UVdCFJfgGAIuZ96J2nl53YxZ
335
335
  tol/utils/__init__.py,sha256=764-Na1OaNGUDWpMIu51ZtXG7n_nB5MccUFK6LmkWRI,138
336
336
  tol/utils/csv.py,sha256=mihww25fSn72c4h-RFeqD_pFIG6KHZP4v1_C0rx81ws,421
337
337
  tol/utils/s3.py,sha256=aoYCwJ-qcMqFrpxmViFqPa0O1jgp0phtztO3-0CSNjw,491
338
- tol/validators/__init__.py,sha256=_ETv6oGQ2bTH_6-foYFy9T5wP5OG3cl96zEjvrIS7zk,1399
339
- tol/validators/allowed_keys.py,sha256=RJcHBiguL84B8hjSRaXLNES21yZqaKFwJNp2Tz9zvh0,1506
338
+ tol/validators/__init__.py,sha256=sF9i4rxi-NsJkqLvvxj2k_I_QPRgkhQZc9ErSqT7quk,1465
339
+ tol/validators/allowed_keys.py,sha256=eLZnr6DVqK6ru6b-T-1hvzSdAxlWZkTNZuohrbO0vS8,1525
340
340
  tol/validators/allowed_values.py,sha256=-Yy3Sqo1WYacGKlot_dn3M2o7Oj5MXOioJrJmrWCCxs,1536
341
341
  tol/validators/allowed_values_from_datasource.py,sha256=9cVwllBbzfCls8UsojazfCInt9_AakA0_H9pBO1wSL4,3173
342
342
  tol/validators/assert_on_condition.py,sha256=eBGgSVfIQ6e45SheM-ZDg7daXJjyZxRVS5L8AWvbXag,2027
@@ -351,6 +351,7 @@ tol/validators/regex.py,sha256=dLAi_vQt9_DsT6wQZmbYC7X5-Wp15l0leUE6XkPaItg,2602
351
351
  tol/validators/regex_by_value.py,sha256=XM5EnT4vgD17rfpR3bUE9I56IemSw26BI9MZtMakd4E,2582
352
352
  tol/validators/specimens_have_same_taxon.py,sha256=BaJcZ38ZprPcuGTIorSxxC9uGN0_lj6HS6B54EObcuY,2183
353
353
  tol/validators/sts_fields.py,sha256=aYbzy15btEg4-ocDT1qrspe7-atoWRrOJ_KmuPU6J14,8936
354
+ tol/validators/taxon_matches_goat.py,sha256=-5UJus3WZOc_ji5Kmat43nI6Auhr924BVi-igEnFP6Q,3482
354
355
  tol/validators/tolid.py,sha256=VOb6lNFz11H_0KaWX8_nvsw8xJEa6KrjB0p-5lkcqog,3885
355
356
  tol/validators/types.py,sha256=jMVpckRp8RS93f7usf58YH_K-5rKWgZIYs7bO9dHhQc,2914
356
357
  tol/validators/unique_value_check.py,sha256=sFvDooYkKeORvULGEOTsgIcxlbe0AXDWxY3Gbr3j0KI,1282
@@ -359,9 +360,9 @@ tol/validators/unique_whole_organisms.py,sha256=RdqA1GzIf3LTdrmNGGdxv0aW2udDY2P9
359
360
  tol/validators/value_check.py,sha256=DdNx_B1gns01zgBg5N6Bwia46Aukw6MAteM-M37Kv1k,1122
360
361
  tol/validators/interfaces/__init__.py,sha256=jtOxnwnwqV_29xjmmMcS_kvlt-pQiWwQYJn2YRP07_w,172
361
362
  tol/validators/interfaces/condition_evaluator.py,sha256=nj8Cb8hi47OBy6OVNfeLhF-Pjwtr8MiOSymYL6hfVes,3766
362
- tol_sdk-1.8.8.dist-info/licenses/LICENSE,sha256=RF9Jacy-9BpUAQQ20INhTgtaNBkmdTolYCHtrrkM2-8,1077
363
- tol_sdk-1.8.8.dist-info/METADATA,sha256=iyFf12GxWPRsRVPQgR5mrGzLDhAH-OrJ58-4T6CROLA,3142
364
- tol_sdk-1.8.8.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
365
- tol_sdk-1.8.8.dist-info/entry_points.txt,sha256=jH3HfTwxjzog7E3lq8CKpUWGIRY9FSXbyL6CpUmv6D0,36
366
- tol_sdk-1.8.8.dist-info/top_level.txt,sha256=PwKMQLphyZNvagBoriVbl8uwHXQl8IC1niawVG0iXMM,10
367
- tol_sdk-1.8.8.dist-info/RECORD,,
363
+ tol_sdk-1.8.9.dist-info/licenses/LICENSE,sha256=RF9Jacy-9BpUAQQ20INhTgtaNBkmdTolYCHtrrkM2-8,1077
364
+ tol_sdk-1.8.9.dist-info/METADATA,sha256=MgDxVlsVbpKw9s2vFEdeeTUYpv9hdL48ETxeGTbGF-Y,3142
365
+ tol_sdk-1.8.9.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
366
+ tol_sdk-1.8.9.dist-info/entry_points.txt,sha256=jH3HfTwxjzog7E3lq8CKpUWGIRY9FSXbyL6CpUmv6D0,36
367
+ tol_sdk-1.8.9.dist-info/top_level.txt,sha256=PwKMQLphyZNvagBoriVbl8uwHXQl8IC1niawVG0iXMM,10
368
+ tol_sdk-1.8.9.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.10.1)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5