tol-sdk 1.8.8__py3-none-any.whl → 1.8.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tol/ena/ena_datasource.py +8 -3
- tol/validators/__init__.py +3 -2
- tol/validators/allowed_keys.py +1 -0
- tol/validators/taxon_matches_goat.py +86 -0
- {tol_sdk-1.8.8.dist-info → tol_sdk-1.8.9.dist-info}/METADATA +1 -1
- {tol_sdk-1.8.8.dist-info → tol_sdk-1.8.9.dist-info}/RECORD +10 -9
- {tol_sdk-1.8.8.dist-info → tol_sdk-1.8.9.dist-info}/WHEEL +1 -1
- {tol_sdk-1.8.8.dist-info → tol_sdk-1.8.9.dist-info}/entry_points.txt +0 -0
- {tol_sdk-1.8.8.dist-info → tol_sdk-1.8.9.dist-info}/licenses/LICENSE +0 -0
- {tol_sdk-1.8.8.dist-info → tol_sdk-1.8.9.dist-info}/top_level.txt +0 -0
tol/ena/ena_datasource.py
CHANGED
|
@@ -11,6 +11,7 @@ from cachetools.func import ttl_cache
|
|
|
11
11
|
|
|
12
12
|
import requests
|
|
13
13
|
from requests.auth import HTTPBasicAuth
|
|
14
|
+
from requests.exceptions import HTTPError
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
from .client import EnaApiClient
|
|
@@ -118,9 +119,13 @@ class EnaDataSource(
|
|
|
118
119
|
if object_type == 'submittable_taxon':
|
|
119
120
|
ena_response = []
|
|
120
121
|
for object_id in object_ids:
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
122
|
+
try:
|
|
123
|
+
response = client.get_detail(object_type, [object_id])
|
|
124
|
+
if response and isinstance(response, list):
|
|
125
|
+
ena_response.extend(response)
|
|
126
|
+
except HTTPError as http_error:
|
|
127
|
+
if http_error.response.status_code != 400:
|
|
128
|
+
raise
|
|
124
129
|
else:
|
|
125
130
|
ena_response = client.get_detail(object_type, object_ids)
|
|
126
131
|
# For a checklist we need to convert into a list of dicts
|
tol/validators/__init__.py
CHANGED
|
@@ -19,8 +19,9 @@ from .types import TypesValidator # noqa
|
|
|
19
19
|
from .unique_values import UniqueValuesValidator # noqa
|
|
20
20
|
from .unique_whole_organisms import UniqueWholeOrganismsValidator # noqa
|
|
21
21
|
from .interfaces import Condition # noqa
|
|
22
|
-
from .min_one_valid_value import MinOneValidValueValidator
|
|
22
|
+
from .min_one_valid_value import MinOneValidValueValidator # noqa
|
|
23
23
|
from .value_check import ValueCheckValidator # noqa
|
|
24
24
|
from .branching import BranchingValidator # noqa
|
|
25
25
|
from .unique_value_check import UniqueValueCheckValidator # noqa
|
|
26
|
-
from .date_sorting import DateSortingValidator
|
|
26
|
+
from .date_sorting import DateSortingValidator # noqa
|
|
27
|
+
from .taxon_matches_goat import TaxonMatchesGoatValidator # noqa
|
tol/validators/allowed_keys.py
CHANGED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Genome Research Ltd.
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
from tol.core import DataObject, Validator
|
|
8
|
+
from tol.sources.goat import GoatDataSource, goat
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TaxonMatchesGoatValidator(Validator):
|
|
12
|
+
"""
|
|
13
|
+
Validates a stream of `DataObject` instances, checking whether its Taxonomy information
|
|
14
|
+
matches that in GoaT
|
|
15
|
+
"""
|
|
16
|
+
@dataclass(slots=True, frozen=True, kw_only=True)
|
|
17
|
+
class Config:
|
|
18
|
+
species_field: str | None = None
|
|
19
|
+
genus_field: str | None = None
|
|
20
|
+
family_field: str | None = None
|
|
21
|
+
superfamily_field: str | None = None
|
|
22
|
+
phylum_field: str | None = None
|
|
23
|
+
kingdom_field: str | None = None
|
|
24
|
+
superkingdom_field: str | None = None
|
|
25
|
+
domain_field: str | None = None
|
|
26
|
+
|
|
27
|
+
__slots__ = ['__config', '__goat_datasource', '_cached_taxa']
|
|
28
|
+
__config: Config
|
|
29
|
+
__goat_datasource: GoatDataSource
|
|
30
|
+
_cached_taxa: dict[str, DataObject]
|
|
31
|
+
|
|
32
|
+
def __init__(self, config: Config) -> None:
|
|
33
|
+
super().__init__()
|
|
34
|
+
self.__config = config
|
|
35
|
+
self.__goat_datasource = goat()
|
|
36
|
+
self._cached_taxa = {}
|
|
37
|
+
|
|
38
|
+
def _validate_data_object(self, obj: DataObject) -> None:
|
|
39
|
+
taxon_id = obj.get_field_by_name('TAXON_ID')
|
|
40
|
+
|
|
41
|
+
# Check whether we already have the information for this id in the cache.
|
|
42
|
+
# If we don't, fetch it from GoaT and add it to the cache
|
|
43
|
+
taxon: DataObject | None
|
|
44
|
+
if taxon_id in self._cached_taxa:
|
|
45
|
+
taxon = self._cached_taxa[taxon_id]
|
|
46
|
+
else:
|
|
47
|
+
taxon = self.__goat_datasource.get_one('taxon', taxon_id)
|
|
48
|
+
|
|
49
|
+
# Add this taxon to cached taxa.
|
|
50
|
+
# Error if GoaT has no taxon with this id
|
|
51
|
+
if taxon is not None:
|
|
52
|
+
self._cached_taxa[taxon_id] = taxon
|
|
53
|
+
else:
|
|
54
|
+
self.add_error(
|
|
55
|
+
object_id=obj.id,
|
|
56
|
+
detail=f'Invalid Taxon ID: {taxon_id}',
|
|
57
|
+
field='taxon_id'
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# We can't validate a taxon that doesn't exist, so after this error move on
|
|
61
|
+
# to the next DataObject
|
|
62
|
+
return
|
|
63
|
+
|
|
64
|
+
# Check that each associated taxonomy rank for this taxon matches those in GoaT
|
|
65
|
+
taxonomic_ranks = ('species', 'genus', 'family', 'superfamily',
|
|
66
|
+
'phylum', 'kingdom', 'superkingdom', 'domain')
|
|
67
|
+
for rank in taxonomic_ranks:
|
|
68
|
+
# From the rank to check, get the name of its field in the data object we're validating
|
|
69
|
+
# from the validator config. If this field name is `None`, then this taxonomic rank
|
|
70
|
+
# isn't being checked (likely because the data object does not have this field)
|
|
71
|
+
field_name: str | None = getattr(self.__config, f'{rank}_field')
|
|
72
|
+
if field_name is None:
|
|
73
|
+
continue
|
|
74
|
+
|
|
75
|
+
# Fetch the values of these taxonomic ranks
|
|
76
|
+
value_in_data_object = obj.get_field_by_name(field_name)
|
|
77
|
+
value_in_goat = taxon.get_field_by_name(f'{rank}.scientific_name')
|
|
78
|
+
|
|
79
|
+
# Ensure the value in the data object matches the one in GoaT
|
|
80
|
+
if value_in_data_object != value_in_goat:
|
|
81
|
+
self.add_warning(
|
|
82
|
+
object_id=obj.id,
|
|
83
|
+
detail=(f'Value for {field_name} ({value_in_data_object}) '
|
|
84
|
+
f'does not match the value in GoaT ({value_in_goat})'),
|
|
85
|
+
field=field_name,
|
|
86
|
+
)
|
|
@@ -142,7 +142,7 @@ tol/eln/sanitise.py,sha256=fMj-VrQTnw4zn2X0wnjWQAI8gWAa8RYqNuv23LXQssI,406
|
|
|
142
142
|
tol/ena/__init__.py,sha256=T3TCqaHpgi2Uk2PjPGu60GaG2V8cTrHJlVLtZfLFhTQ,174
|
|
143
143
|
tol/ena/client.py,sha256=ldmm7Z9_auQf1zVWjsFLXYgbKvGtSHTsr88YO3zfv2Y,6731
|
|
144
144
|
tol/ena/converter.py,sha256=nxbo4IFzzOvKNMq3Aeiw5iDqVWvY33nTngLppjHAoGY,1144
|
|
145
|
-
tol/ena/ena_datasource.py,sha256=
|
|
145
|
+
tol/ena/ena_datasource.py,sha256=QRj4pXFeAaC0qPdmtPCrCD3DKVsXhu1jYZ97P3AHlnA,9184
|
|
146
146
|
tol/ena/ena_methods.py,sha256=jgpLssZq-F-vgkO-fYu4jrXenmNkdFpFKAY3VKp5HHE,9209
|
|
147
147
|
tol/ena/factory.py,sha256=3IJCmGLo59PWbGmNqmKho5WYG330OjL8SoZYstIwHt4,3423
|
|
148
148
|
tol/ena/filter.py,sha256=UzOx5ivXvA0TY2QuNzFmS-zDPVNnaAx07DMVkAwVsAE,3370
|
|
@@ -335,8 +335,8 @@ tol/treeval/treeval_datasource.py,sha256=GzY6JwH67b5QdV-UVdCFJfgGAIuZ96J2nl53YxZ
|
|
|
335
335
|
tol/utils/__init__.py,sha256=764-Na1OaNGUDWpMIu51ZtXG7n_nB5MccUFK6LmkWRI,138
|
|
336
336
|
tol/utils/csv.py,sha256=mihww25fSn72c4h-RFeqD_pFIG6KHZP4v1_C0rx81ws,421
|
|
337
337
|
tol/utils/s3.py,sha256=aoYCwJ-qcMqFrpxmViFqPa0O1jgp0phtztO3-0CSNjw,491
|
|
338
|
-
tol/validators/__init__.py,sha256=
|
|
339
|
-
tol/validators/allowed_keys.py,sha256=
|
|
338
|
+
tol/validators/__init__.py,sha256=sF9i4rxi-NsJkqLvvxj2k_I_QPRgkhQZc9ErSqT7quk,1465
|
|
339
|
+
tol/validators/allowed_keys.py,sha256=eLZnr6DVqK6ru6b-T-1hvzSdAxlWZkTNZuohrbO0vS8,1525
|
|
340
340
|
tol/validators/allowed_values.py,sha256=-Yy3Sqo1WYacGKlot_dn3M2o7Oj5MXOioJrJmrWCCxs,1536
|
|
341
341
|
tol/validators/allowed_values_from_datasource.py,sha256=9cVwllBbzfCls8UsojazfCInt9_AakA0_H9pBO1wSL4,3173
|
|
342
342
|
tol/validators/assert_on_condition.py,sha256=eBGgSVfIQ6e45SheM-ZDg7daXJjyZxRVS5L8AWvbXag,2027
|
|
@@ -351,6 +351,7 @@ tol/validators/regex.py,sha256=dLAi_vQt9_DsT6wQZmbYC7X5-Wp15l0leUE6XkPaItg,2602
|
|
|
351
351
|
tol/validators/regex_by_value.py,sha256=XM5EnT4vgD17rfpR3bUE9I56IemSw26BI9MZtMakd4E,2582
|
|
352
352
|
tol/validators/specimens_have_same_taxon.py,sha256=BaJcZ38ZprPcuGTIorSxxC9uGN0_lj6HS6B54EObcuY,2183
|
|
353
353
|
tol/validators/sts_fields.py,sha256=aYbzy15btEg4-ocDT1qrspe7-atoWRrOJ_KmuPU6J14,8936
|
|
354
|
+
tol/validators/taxon_matches_goat.py,sha256=-5UJus3WZOc_ji5Kmat43nI6Auhr924BVi-igEnFP6Q,3482
|
|
354
355
|
tol/validators/tolid.py,sha256=VOb6lNFz11H_0KaWX8_nvsw8xJEa6KrjB0p-5lkcqog,3885
|
|
355
356
|
tol/validators/types.py,sha256=jMVpckRp8RS93f7usf58YH_K-5rKWgZIYs7bO9dHhQc,2914
|
|
356
357
|
tol/validators/unique_value_check.py,sha256=sFvDooYkKeORvULGEOTsgIcxlbe0AXDWxY3Gbr3j0KI,1282
|
|
@@ -359,9 +360,9 @@ tol/validators/unique_whole_organisms.py,sha256=RdqA1GzIf3LTdrmNGGdxv0aW2udDY2P9
|
|
|
359
360
|
tol/validators/value_check.py,sha256=DdNx_B1gns01zgBg5N6Bwia46Aukw6MAteM-M37Kv1k,1122
|
|
360
361
|
tol/validators/interfaces/__init__.py,sha256=jtOxnwnwqV_29xjmmMcS_kvlt-pQiWwQYJn2YRP07_w,172
|
|
361
362
|
tol/validators/interfaces/condition_evaluator.py,sha256=nj8Cb8hi47OBy6OVNfeLhF-Pjwtr8MiOSymYL6hfVes,3766
|
|
362
|
-
tol_sdk-1.8.
|
|
363
|
-
tol_sdk-1.8.
|
|
364
|
-
tol_sdk-1.8.
|
|
365
|
-
tol_sdk-1.8.
|
|
366
|
-
tol_sdk-1.8.
|
|
367
|
-
tol_sdk-1.8.
|
|
363
|
+
tol_sdk-1.8.9.dist-info/licenses/LICENSE,sha256=RF9Jacy-9BpUAQQ20INhTgtaNBkmdTolYCHtrrkM2-8,1077
|
|
364
|
+
tol_sdk-1.8.9.dist-info/METADATA,sha256=MgDxVlsVbpKw9s2vFEdeeTUYpv9hdL48ETxeGTbGF-Y,3142
|
|
365
|
+
tol_sdk-1.8.9.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
366
|
+
tol_sdk-1.8.9.dist-info/entry_points.txt,sha256=jH3HfTwxjzog7E3lq8CKpUWGIRY9FSXbyL6CpUmv6D0,36
|
|
367
|
+
tol_sdk-1.8.9.dist-info/top_level.txt,sha256=PwKMQLphyZNvagBoriVbl8uwHXQl8IC1niawVG0iXMM,10
|
|
368
|
+
tol_sdk-1.8.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|