valuesets 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of valuesets might be problematic. Click here for more details.

Files changed (248) hide show
  1. valuesets/__init__.py +7 -0
  2. valuesets/_version.py +8 -0
  3. valuesets/datamodel/valuesets.py +13796 -0
  4. valuesets/datamodel/valuesets_dataclass.py +24503 -0
  5. valuesets/datamodel/valuesets_pydantic.py +13796 -0
  6. valuesets/enums/__init__.py +590 -0
  7. valuesets/enums/academic/__init__.py +1 -0
  8. valuesets/enums/academic/research.py +559 -0
  9. valuesets/enums/analytical_chemistry/__init__.py +1 -0
  10. valuesets/enums/analytical_chemistry/mass_spectrometry.py +198 -0
  11. valuesets/enums/bio/__init__.py +1 -0
  12. valuesets/enums/bio/biological_colors.py +238 -0
  13. valuesets/enums/bio/cell_cycle.py +180 -0
  14. valuesets/enums/bio/currency_chemicals.py +52 -0
  15. valuesets/enums/bio/developmental_stages.py +103 -0
  16. valuesets/enums/bio/genome_features.py +182 -0
  17. valuesets/enums/bio/genomics.py +91 -0
  18. valuesets/enums/bio/go_aspect.py +32 -0
  19. valuesets/enums/bio/go_causality.py +58 -0
  20. valuesets/enums/bio/go_evidence.py +129 -0
  21. valuesets/enums/bio/human_developmental_stages.py +62 -0
  22. valuesets/enums/bio/insdc_geographic_locations.py +591 -0
  23. valuesets/enums/bio/insdc_missing_values.py +49 -0
  24. valuesets/enums/bio/lipid_categories.py +67 -0
  25. valuesets/enums/bio/mouse_developmental_stages.py +62 -0
  26. valuesets/enums/bio/plant_biology.py +86 -0
  27. valuesets/enums/bio/plant_developmental_stages.py +54 -0
  28. valuesets/enums/bio/plant_sex.py +81 -0
  29. valuesets/enums/bio/protein_evidence.py +61 -0
  30. valuesets/enums/bio/proteomics_standards.py +123 -0
  31. valuesets/enums/bio/psi_mi.py +306 -0
  32. valuesets/enums/bio/relationship_to_oxygen.py +37 -0
  33. valuesets/enums/bio/sequence_alphabets.py +449 -0
  34. valuesets/enums/bio/sequence_chemistry.py +357 -0
  35. valuesets/enums/bio/sequencing_platforms.py +302 -0
  36. valuesets/enums/bio/structural_biology.py +320 -0
  37. valuesets/enums/bio/taxonomy.py +238 -0
  38. valuesets/enums/bio/trophic_levels.py +85 -0
  39. valuesets/enums/bio/uniprot_species.py +344 -0
  40. valuesets/enums/bio/viral_genome_types.py +47 -0
  41. valuesets/enums/bioprocessing/__init__.py +1 -0
  42. valuesets/enums/bioprocessing/scale_up.py +249 -0
  43. valuesets/enums/business/__init__.py +1 -0
  44. valuesets/enums/business/human_resources.py +275 -0
  45. valuesets/enums/business/industry_classifications.py +181 -0
  46. valuesets/enums/business/management_operations.py +228 -0
  47. valuesets/enums/business/organizational_structures.py +236 -0
  48. valuesets/enums/business/quality_management.py +181 -0
  49. valuesets/enums/business/supply_chain.py +232 -0
  50. valuesets/enums/chemistry/__init__.py +1 -0
  51. valuesets/enums/chemistry/chemical_entities.py +315 -0
  52. valuesets/enums/chemistry/reaction_directionality.py +65 -0
  53. valuesets/enums/chemistry/reactions.py +256 -0
  54. valuesets/enums/clinical/__init__.py +1 -0
  55. valuesets/enums/clinical/nih_demographics.py +177 -0
  56. valuesets/enums/clinical/phenopackets.py +254 -0
  57. valuesets/enums/common_value_sets.py +8791 -0
  58. valuesets/enums/computing/__init__.py +1 -0
  59. valuesets/enums/computing/file_formats.py +294 -0
  60. valuesets/enums/computing/maturity_levels.py +196 -0
  61. valuesets/enums/computing/mime_types.py +227 -0
  62. valuesets/enums/confidence_levels.py +168 -0
  63. valuesets/enums/contributor.py +30 -0
  64. valuesets/enums/core.py +42 -0
  65. valuesets/enums/data/__init__.py +1 -0
  66. valuesets/enums/data/data_absent_reason.py +53 -0
  67. valuesets/enums/data_science/__init__.py +1 -0
  68. valuesets/enums/data_science/binary_classification.py +87 -0
  69. valuesets/enums/data_science/emotion_classification.py +66 -0
  70. valuesets/enums/data_science/priority_severity.py +73 -0
  71. valuesets/enums/data_science/quality_control.py +46 -0
  72. valuesets/enums/data_science/sentiment_analysis.py +50 -0
  73. valuesets/enums/data_science/text_classification.py +97 -0
  74. valuesets/enums/demographics.py +206 -0
  75. valuesets/enums/ecological_interactions.py +151 -0
  76. valuesets/enums/energy/__init__.py +1 -0
  77. valuesets/enums/energy/energy.py +343 -0
  78. valuesets/enums/energy/fossil_fuels.py +29 -0
  79. valuesets/enums/energy/nuclear/__init__.py +1 -0
  80. valuesets/enums/energy/nuclear/nuclear_facilities.py +195 -0
  81. valuesets/enums/energy/nuclear/nuclear_fuel_cycle.py +96 -0
  82. valuesets/enums/energy/nuclear/nuclear_fuels.py +175 -0
  83. valuesets/enums/energy/nuclear/nuclear_operations.py +191 -0
  84. valuesets/enums/energy/nuclear/nuclear_regulatory.py +188 -0
  85. valuesets/enums/energy/nuclear/nuclear_safety.py +164 -0
  86. valuesets/enums/energy/nuclear/nuclear_waste.py +158 -0
  87. valuesets/enums/energy/nuclear/reactor_types.py +163 -0
  88. valuesets/enums/environmental_health/__init__.py +1 -0
  89. valuesets/enums/environmental_health/exposures.py +265 -0
  90. valuesets/enums/geography/__init__.py +1 -0
  91. valuesets/enums/geography/geographic_codes.py +741 -0
  92. valuesets/enums/health/__init__.py +12 -0
  93. valuesets/enums/health/vaccination.py +98 -0
  94. valuesets/enums/health.py +36 -0
  95. valuesets/enums/health_base.py +36 -0
  96. valuesets/enums/healthcare.py +45 -0
  97. valuesets/enums/industry/__init__.py +1 -0
  98. valuesets/enums/industry/extractive_industry.py +94 -0
  99. valuesets/enums/industry/mining.py +388 -0
  100. valuesets/enums/industry/safety_colors.py +201 -0
  101. valuesets/enums/investigation.py +27 -0
  102. valuesets/enums/materials_science/__init__.py +1 -0
  103. valuesets/enums/materials_science/characterization_methods.py +112 -0
  104. valuesets/enums/materials_science/crystal_structures.py +76 -0
  105. valuesets/enums/materials_science/material_properties.py +119 -0
  106. valuesets/enums/materials_science/material_types.py +104 -0
  107. valuesets/enums/materials_science/pigments_dyes.py +198 -0
  108. valuesets/enums/materials_science/synthesis_methods.py +109 -0
  109. valuesets/enums/medical/__init__.py +1 -0
  110. valuesets/enums/medical/clinical.py +277 -0
  111. valuesets/enums/medical/neuroimaging.py +119 -0
  112. valuesets/enums/mining_processing.py +302 -0
  113. valuesets/enums/physics/__init__.py +1 -0
  114. valuesets/enums/physics/states_of_matter.py +46 -0
  115. valuesets/enums/social/__init__.py +1 -0
  116. valuesets/enums/social/person_status.py +29 -0
  117. valuesets/enums/spatial/__init__.py +1 -0
  118. valuesets/enums/spatial/spatial_qualifiers.py +246 -0
  119. valuesets/enums/statistics/__init__.py +5 -0
  120. valuesets/enums/statistics/prediction_outcomes.py +31 -0
  121. valuesets/enums/statistics.py +31 -0
  122. valuesets/enums/time/__init__.py +1 -0
  123. valuesets/enums/time/temporal.py +254 -0
  124. valuesets/enums/units/__init__.py +1 -0
  125. valuesets/enums/units/measurements.py +310 -0
  126. valuesets/enums/visual/__init__.py +1 -0
  127. valuesets/enums/visual/colors.py +376 -0
  128. valuesets/generators/__init__.py +19 -0
  129. valuesets/generators/auto_slot_injector.py +280 -0
  130. valuesets/generators/enhanced_pydantic_generator.py +100 -0
  131. valuesets/generators/enum_slot_generator.py +201 -0
  132. valuesets/generators/modular_rich_generator.py +353 -0
  133. valuesets/generators/prefix_standardizer.py +198 -0
  134. valuesets/generators/rich_enum.py +127 -0
  135. valuesets/generators/rich_pydantic_generator.py +310 -0
  136. valuesets/generators/smart_slot_syncer.py +428 -0
  137. valuesets/generators/sssom_generator.py +394 -0
  138. valuesets/merged/merged_hierarchy.yaml +21649 -0
  139. valuesets/schema/README.md +3 -0
  140. valuesets/schema/academic/research.yaml +911 -0
  141. valuesets/schema/analytical_chemistry/mass_spectrometry.yaml +206 -0
  142. valuesets/schema/bio/bio_entities.yaml +364 -0
  143. valuesets/schema/bio/biological_colors.yaml +434 -0
  144. valuesets/schema/bio/cell_cycle.yaml +309 -0
  145. valuesets/schema/bio/currency_chemicals.yaml +70 -0
  146. valuesets/schema/bio/developmental_stages.yaml +226 -0
  147. valuesets/schema/bio/genome_features.yaml +342 -0
  148. valuesets/schema/bio/genomics.yaml +101 -0
  149. valuesets/schema/bio/go_aspect.yaml +39 -0
  150. valuesets/schema/bio/go_causality.yaml +119 -0
  151. valuesets/schema/bio/go_evidence.yaml +215 -0
  152. valuesets/schema/bio/insdc_geographic_locations.yaml +911 -0
  153. valuesets/schema/bio/insdc_missing_values.yaml +85 -0
  154. valuesets/schema/bio/lipid_categories.yaml +72 -0
  155. valuesets/schema/bio/plant_biology.yaml +125 -0
  156. valuesets/schema/bio/plant_developmental_stages.yaml +77 -0
  157. valuesets/schema/bio/plant_sex.yaml +108 -0
  158. valuesets/schema/bio/protein_evidence.yaml +63 -0
  159. valuesets/schema/bio/proteomics_standards.yaml +116 -0
  160. valuesets/schema/bio/psi_mi.yaml +400 -0
  161. valuesets/schema/bio/relationship_to_oxygen.yaml +46 -0
  162. valuesets/schema/bio/sequence_alphabets.yaml +1168 -0
  163. valuesets/schema/bio/sequence_chemistry.yaml +477 -0
  164. valuesets/schema/bio/sequencing_platforms.yaml +515 -0
  165. valuesets/schema/bio/structural_biology.yaml +428 -0
  166. valuesets/schema/bio/taxonomy.yaml +453 -0
  167. valuesets/schema/bio/trophic_levels.yaml +118 -0
  168. valuesets/schema/bio/uniprot_species.yaml +1209 -0
  169. valuesets/schema/bio/viral_genome_types.yaml +99 -0
  170. valuesets/schema/bioprocessing/scale_up.yaml +458 -0
  171. valuesets/schema/business/human_resources.yaml +752 -0
  172. valuesets/schema/business/industry_classifications.yaml +448 -0
  173. valuesets/schema/business/management_operations.yaml +602 -0
  174. valuesets/schema/business/organizational_structures.yaml +645 -0
  175. valuesets/schema/business/quality_management.yaml +502 -0
  176. valuesets/schema/business/supply_chain.yaml +688 -0
  177. valuesets/schema/chemistry/chemical_entities.yaml +639 -0
  178. valuesets/schema/chemistry/reaction_directionality.yaml +60 -0
  179. valuesets/schema/chemistry/reactions.yaml +442 -0
  180. valuesets/schema/clinical/nih_demographics.yaml +285 -0
  181. valuesets/schema/clinical/phenopackets.yaml +429 -0
  182. valuesets/schema/computing/file_formats.yaml +631 -0
  183. valuesets/schema/computing/maturity_levels.yaml +229 -0
  184. valuesets/schema/computing/mime_types.yaml +266 -0
  185. valuesets/schema/confidence_levels.yaml +206 -0
  186. valuesets/schema/contributor.yaml +30 -0
  187. valuesets/schema/core.yaml +55 -0
  188. valuesets/schema/data/data_absent_reason.yaml +82 -0
  189. valuesets/schema/data_science/binary_classification.yaml +125 -0
  190. valuesets/schema/data_science/emotion_classification.yaml +109 -0
  191. valuesets/schema/data_science/priority_severity.yaml +122 -0
  192. valuesets/schema/data_science/quality_control.yaml +68 -0
  193. valuesets/schema/data_science/sentiment_analysis.yaml +81 -0
  194. valuesets/schema/data_science/text_classification.yaml +135 -0
  195. valuesets/schema/demographics.yaml +238 -0
  196. valuesets/schema/ecological_interactions.yaml +298 -0
  197. valuesets/schema/energy/energy.yaml +595 -0
  198. valuesets/schema/energy/fossil_fuels.yaml +28 -0
  199. valuesets/schema/energy/nuclear/nuclear_facilities.yaml +463 -0
  200. valuesets/schema/energy/nuclear/nuclear_fuel_cycle.yaml +82 -0
  201. valuesets/schema/energy/nuclear/nuclear_fuels.yaml +421 -0
  202. valuesets/schema/energy/nuclear/nuclear_operations.yaml +480 -0
  203. valuesets/schema/energy/nuclear/nuclear_regulatory.yaml +200 -0
  204. valuesets/schema/energy/nuclear/nuclear_safety.yaml +352 -0
  205. valuesets/schema/energy/nuclear/nuclear_waste.yaml +332 -0
  206. valuesets/schema/energy/nuclear/reactor_types.yaml +394 -0
  207. valuesets/schema/environmental_health/exposures.yaml +355 -0
  208. valuesets/schema/generated_slots.yaml +1828 -0
  209. valuesets/schema/geography/geographic_codes.yaml +1018 -0
  210. valuesets/schema/health/vaccination.yaml +102 -0
  211. valuesets/schema/health.yaml +38 -0
  212. valuesets/schema/healthcare.yaml +53 -0
  213. valuesets/schema/industry/extractive_industry.yaml +89 -0
  214. valuesets/schema/industry/mining.yaml +888 -0
  215. valuesets/schema/industry/safety_colors.yaml +375 -0
  216. valuesets/schema/investigation.yaml +64 -0
  217. valuesets/schema/materials_science/characterization_methods.yaml +193 -0
  218. valuesets/schema/materials_science/crystal_structures.yaml +138 -0
  219. valuesets/schema/materials_science/material_properties.yaml +135 -0
  220. valuesets/schema/materials_science/material_types.yaml +151 -0
  221. valuesets/schema/materials_science/pigments_dyes.yaml +465 -0
  222. valuesets/schema/materials_science/synthesis_methods.yaml +186 -0
  223. valuesets/schema/medical/clinical.yaml +610 -0
  224. valuesets/schema/medical/neuroimaging.yaml +325 -0
  225. valuesets/schema/mining_processing.yaml +295 -0
  226. valuesets/schema/physics/states_of_matter.yaml +46 -0
  227. valuesets/schema/slot_mixins.yaml +143 -0
  228. valuesets/schema/social/person_status.yaml +28 -0
  229. valuesets/schema/spatial/spatial_qualifiers.yaml +466 -0
  230. valuesets/schema/statistics/prediction_outcomes.yaml +26 -0
  231. valuesets/schema/statistics.yaml +34 -0
  232. valuesets/schema/time/temporal.yaml +435 -0
  233. valuesets/schema/types.yaml +15 -0
  234. valuesets/schema/units/measurements.yaml +675 -0
  235. valuesets/schema/valuesets.yaml +100 -0
  236. valuesets/schema/visual/colors.yaml +778 -0
  237. valuesets/utils/__init__.py +6 -0
  238. valuesets/utils/comparison.py +102 -0
  239. valuesets/utils/expand_dynamic_enums.py +414 -0
  240. valuesets/utils/mapping_utils.py +236 -0
  241. valuesets/validators/__init__.py +11 -0
  242. valuesets/validators/enum_evaluator.py +669 -0
  243. valuesets/validators/oak_config.yaml +70 -0
  244. valuesets/validators/validate_with_ols.py +241 -0
  245. valuesets-0.3.1.dist-info/METADATA +395 -0
  246. valuesets-0.3.1.dist-info/RECORD +248 -0
  247. valuesets-0.3.1.dist-info/WHEEL +4 -0
  248. valuesets-0.3.1.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,394 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ SSSOM TSV generator for LinkML enum mappings.
4
+
5
+ Generates Simple Standard for Sharing Ontological Mappings (SSSOM) TSV files
6
+ from LinkML enum definitions with ontology mappings.
7
+ """
8
+
9
+ import csv
10
+ import logging
11
+ from pathlib import Path
12
+ from typing import List, Dict, Optional, Any
13
+ from datetime import datetime
14
+
15
+ from linkml_runtime.utils.schemaview import SchemaView
16
+ from linkml_runtime.linkml_model import EnumDefinition, PermissibleValue
17
+
18
+ # Import shared mapping utilities
19
+ try:
20
+ from ..utils.mapping_utils import extract_all_mappings, deduplicate_mappings
21
+ except ImportError:
22
+ # Fallback for running as script
23
+ import sys
24
+ from pathlib import Path
25
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
26
+ from valuesets.utils.mapping_utils import extract_all_mappings, deduplicate_mappings
27
+
28
+ try:
29
+ from oaklib import get_adapter
30
+ HAS_OAK = True
31
+ except ImportError:
32
+ HAS_OAK = False
33
+
34
+ logging.basicConfig(level=logging.INFO)
35
+ logger = logging.getLogger(__name__)
36
+
37
+ # SSSOM required prefixes
38
+ SSSOM_PREFIXES = {
39
+ "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
40
+ "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
41
+ "owl": "http://www.w3.org/2002/07/owl#",
42
+ "skos": "http://www.w3.org/2004/02/skos/core#",
43
+ "sssom": "https://w3id.org/sssom/",
44
+ "dcterms": "http://purl.org/dc/terms/",
45
+ "semapv": "https://w3id.org/semapv/",
46
+ }
47
+
48
+
49
+ class SSSOMGenerator:
50
+ """Generator for SSSOM TSV files from LinkML schemas."""
51
+
52
+ def __init__(self, oak_adapter_string: str = "sqlite:obo:", cache_labels: bool = True):
53
+ """
54
+ Initialize the SSSOM generator.
55
+
56
+ Args:
57
+ oak_adapter_string: OAK adapter configuration
58
+ cache_labels: Whether to cache ontology labels
59
+ """
60
+ self.oak_adapter_string = oak_adapter_string
61
+ self._label_cache = {} if cache_labels else None
62
+ self._per_prefix_adapters = {}
63
+ self._initialize_oak()
64
+
65
+ def _initialize_oak(self):
66
+ """Initialize OAK for label lookups."""
67
+ if not HAS_OAK:
68
+ logger.warning("OAK not installed - labels will not be retrieved")
69
+ return
70
+
71
+ # Similar to enum_evaluator, we'll create adapters on demand
72
+ if self.oak_adapter_string == "sqlite:obo:":
73
+ logger.info("Using dynamic OAK adapter selection")
74
+ else:
75
+ try:
76
+ self._per_prefix_adapters['_default'] = get_adapter(self.oak_adapter_string)
77
+ logger.info(f"Initialized OAK adapter: {self.oak_adapter_string}")
78
+ except Exception as e:
79
+ logger.warning(f"Could not initialize OAK: {e}")
80
+
81
+ def get_ontology_label(self, curie: str) -> Optional[str]:
82
+ """Get label for an ontology term."""
83
+ if not HAS_OAK:
84
+ return None
85
+
86
+ # Check cache
87
+ if self._label_cache is not None and curie in self._label_cache:
88
+ return self._label_cache[curie]
89
+
90
+ label = None
91
+ prefix = curie.split(":")[0].lower() if ":" in curie else None
92
+
93
+ # Get or create adapter
94
+ if self.oak_adapter_string == "sqlite:obo:" and prefix:
95
+ if prefix not in self._per_prefix_adapters:
96
+ try:
97
+ adapter_string = f"sqlite:obo:{prefix}"
98
+ self._per_prefix_adapters[prefix] = get_adapter(adapter_string)
99
+ logger.debug(f"Created adapter for {prefix}")
100
+ except:
101
+ # Try merged as fallback
102
+ try:
103
+ self._per_prefix_adapters[prefix] = get_adapter("sqlite:obo:merged")
104
+ except:
105
+ self._per_prefix_adapters[prefix] = None
106
+
107
+ adapter = self._per_prefix_adapters.get(prefix)
108
+ else:
109
+ adapter = self._per_prefix_adapters.get('_default')
110
+
111
+ # Get label
112
+ if adapter:
113
+ try:
114
+ label = adapter.label(curie)
115
+ except Exception as e:
116
+ logger.debug(f"Could not get label for {curie}: {e}")
117
+
118
+ # Cache result
119
+ if self._label_cache is not None:
120
+ self._label_cache[curie] = label
121
+
122
+ return label
123
+
124
+ def generate_mappings(self, schema_path: Path) -> List[Dict[str, Any]]:
125
+ """
126
+ Generate SSSOM mappings from a LinkML schema.
127
+
128
+ Args:
129
+ schema_path: Path to LinkML schema file
130
+
131
+ Returns:
132
+ List of mapping dictionaries
133
+ """
134
+ mappings = []
135
+
136
+ try:
137
+ sv = SchemaView(str(schema_path))
138
+ schema_id = sv.schema.id or str(schema_path)
139
+
140
+ # Process each enum
141
+ for enum_name, enum_def in sv.all_enums().items():
142
+ if not enum_def.permissible_values:
143
+ continue
144
+
145
+ # Build enum URI
146
+ if sv.schema.default_prefix:
147
+ prefix = sv.schema.default_prefix
148
+ enum_uri = f"{prefix}:{enum_name}"
149
+ else:
150
+ enum_uri = f"{schema_id}#{enum_name}"
151
+
152
+ # Process each permissible value
153
+ for value_name, pv in enum_def.permissible_values.items():
154
+ # Extract all mappings using shared utility
155
+ pv_mappings = extract_all_mappings(pv, include_meaning=True, include_annotations=True)
156
+
157
+ # Skip if no mappings
158
+ if not pv_mappings:
159
+ continue
160
+
161
+ # Deduplicate mappings
162
+ pv_mappings = deduplicate_mappings(pv_mappings)
163
+
164
+ # Build subject URI
165
+ subject_id = f"{enum_uri}.{value_name}"
166
+
167
+ # Process each mapping
168
+ for object_id, predicate, mapping_comment in pv_mappings:
169
+ # Get object label
170
+ object_label = self.get_ontology_label(object_id)
171
+
172
+ # Build comment
173
+ comment_parts = []
174
+ if pv.description:
175
+ comment_parts.append(pv.description)
176
+ if mapping_comment:
177
+ comment_parts.append(mapping_comment)
178
+ comment = "; ".join(comment_parts)
179
+
180
+ # Determine confidence based on predicate
181
+ confidence = 1.0
182
+ if predicate == "skos:closeMatch":
183
+ confidence = 0.9
184
+ elif predicate == "skos:narrowMatch" or predicate == "skos:broadMatch":
185
+ confidence = 0.8
186
+ elif predicate == "skos:relatedMatch":
187
+ confidence = 0.7
188
+
189
+ # Create mapping
190
+ mapping = {
191
+ "subject_id": subject_id,
192
+ "subject_label": pv.title or value_name,
193
+ "predicate_id": predicate,
194
+ "object_id": object_id,
195
+ "object_label": object_label or "",
196
+ "mapping_justification": "semapv:ManualMappingCuration",
197
+ "subject_source": schema_id,
198
+ "object_source": self._extract_ontology_source(object_id),
199
+ "mapping_tool": "linkml-valuesets",
200
+ "confidence": confidence,
201
+ "subject_type": "enum_value",
202
+ "object_type": "ontology_class",
203
+ "comment": comment
204
+ }
205
+
206
+ mappings.append(mapping)
207
+
208
+ except Exception as e:
209
+ logger.error(f"Error processing schema {schema_path}: {e}")
210
+
211
+ return mappings
212
+
213
+ def _extract_ontology_source(self, curie: str) -> str:
214
+ """Extract ontology source from CURIE."""
215
+ if ":" in curie:
216
+ prefix = curie.split(":")[0]
217
+ # Map common prefixes to ontology names
218
+ ontology_map = {
219
+ "NCIT": "ncit",
220
+ "CHEBI": "chebi",
221
+ "GO": "go",
222
+ "UBERON": "uberon",
223
+ "HP": "hp",
224
+ "MONDO": "mondo",
225
+ "ENVO": "envo",
226
+ "OBI": "obi",
227
+ "SNOMED": "snomed",
228
+ "LOINC": "loinc",
229
+ "MSIO": "msio",
230
+ "mesh": "mesh",
231
+ "IAO": "iao",
232
+ "FABIO": "fabio",
233
+ "PATO": "pato",
234
+ "GENO": "geno",
235
+ "GSSO": "gsso",
236
+ "MS": "ms",
237
+ "CRediT": "credit",
238
+ "TIME": "time",
239
+ "greg": "gregorian"
240
+ }
241
+ return ontology_map.get(prefix, prefix.lower())
242
+ return ""
243
+
244
+ def write_sssom_tsv(self, mappings: List[Dict[str, Any]], output_path: Path,
245
+ metadata: Optional[Dict[str, str]] = None):
246
+ """
247
+ Write mappings to SSSOM TSV file.
248
+
249
+ Args:
250
+ mappings: List of mapping dictionaries
251
+ output_path: Output file path
252
+ metadata: Optional metadata for SSSOM header
253
+ """
254
+ if not mappings:
255
+ logger.warning("No mappings to write")
256
+ return
257
+
258
+ # Prepare metadata
259
+ meta = metadata or {}
260
+ meta.setdefault("mapping_set_id", f"https://w3id.org/linkml/valuesets/mappings")
261
+ meta.setdefault("mapping_set_version", datetime.now().strftime("%Y-%m-%d"))
262
+ meta.setdefault("license", "https://creativecommons.org/publicdomain/zero/1.0/")
263
+ meta.setdefault("creator_id", "https://github.com/linkml/linkml-valuesets")
264
+
265
+ with open(output_path, 'w', newline='') as f:
266
+ # Write metadata header
267
+ f.write("#curie_map:\n")
268
+ for prefix, uri in SSSOM_PREFIXES.items():
269
+ f.write(f"# {prefix}: \"{uri}\"\n")
270
+ f.write("#\n")
271
+
272
+ for key, value in meta.items():
273
+ f.write(f"#{key}: {value}\n")
274
+ f.write("#\n")
275
+
276
+ # Define field order (SSSOM standard order)
277
+ fieldnames = [
278
+ "subject_id",
279
+ "subject_label",
280
+ "predicate_id",
281
+ "object_id",
282
+ "object_label",
283
+ "mapping_justification",
284
+ "subject_source",
285
+ "object_source",
286
+ "mapping_tool",
287
+ "confidence",
288
+ "subject_type",
289
+ "object_type",
290
+ "comment"
291
+ ]
292
+
293
+ # Write TSV data
294
+ writer = csv.DictWriter(f, fieldnames=fieldnames, delimiter='\t',
295
+ extrasaction='ignore')
296
+ writer.writeheader()
297
+ writer.writerows(mappings)
298
+
299
+ logger.info(f"Wrote {len(mappings)} mappings to {output_path}")
300
+
301
+ def generate_from_directory(self, schema_dir: Path, output_path: Path,
302
+ metadata: Optional[Dict[str, str]] = None):
303
+ """
304
+ Generate SSSOM TSV from all schemas in a directory.
305
+
306
+ Args:
307
+ schema_dir: Directory containing LinkML schemas
308
+ output_path: Output TSV file path
309
+ metadata: Optional SSSOM metadata
310
+ """
311
+ all_mappings = []
312
+
313
+ for schema_file in sorted(schema_dir.rglob("*.yaml")):
314
+ # Skip linkml model files
315
+ if "linkml_model" in str(schema_file):
316
+ continue
317
+
318
+ logger.info(f"Processing {schema_file.name}")
319
+ mappings = self.generate_mappings(schema_file)
320
+ all_mappings.extend(mappings)
321
+
322
+ self.write_sssom_tsv(all_mappings, output_path, metadata)
323
+ return all_mappings
324
+
325
+
326
+ def main():
327
+ """CLI entry point."""
328
+ import argparse
329
+
330
+ parser = argparse.ArgumentParser(
331
+ description="Generate SSSOM TSV from LinkML enum mappings"
332
+ )
333
+ parser.add_argument(
334
+ "input",
335
+ type=Path,
336
+ help="Input LinkML schema file or directory"
337
+ )
338
+ parser.add_argument(
339
+ "-o", "--output",
340
+ type=Path,
341
+ default=Path("mappings.sssom.tsv"),
342
+ help="Output SSSOM TSV file (default: mappings.sssom.tsv)"
343
+ )
344
+ parser.add_argument(
345
+ "--adapter",
346
+ default="sqlite:obo:",
347
+ help="OAK adapter string for label lookups"
348
+ )
349
+ parser.add_argument(
350
+ "--no-labels",
351
+ action="store_true",
352
+ help="Skip ontology label lookups"
353
+ )
354
+ parser.add_argument(
355
+ "--mapping-set-id",
356
+ help="Mapping set ID for SSSOM metadata"
357
+ )
358
+ parser.add_argument(
359
+ "--license",
360
+ help="License URL for SSSOM metadata"
361
+ )
362
+
363
+ args = parser.parse_args()
364
+
365
+ # Create generator
366
+ if args.no_labels:
367
+ generator = SSSOMGenerator(oak_adapter_string=None)
368
+ else:
369
+ generator = SSSOMGenerator(oak_adapter_string=args.adapter)
370
+
371
+ # Prepare metadata
372
+ metadata = {}
373
+ if args.mapping_set_id:
374
+ metadata["mapping_set_id"] = args.mapping_set_id
375
+ if args.license:
376
+ metadata["license"] = args.license
377
+
378
+ # Generate mappings
379
+ if args.input.is_file():
380
+ mappings = generator.generate_mappings(args.input)
381
+ generator.write_sssom_tsv(mappings, args.output, metadata)
382
+ elif args.input.is_dir():
383
+ generator.generate_from_directory(args.input, args.output, metadata)
384
+ else:
385
+ print(f"Error: {args.input} is not a file or directory")
386
+ return 1
387
+
388
+ print(f"Generated SSSOM TSV: {args.output}")
389
+ return 0
390
+
391
+
392
+ if __name__ == "__main__":
393
+ import sys
394
+ sys.exit(main())