sutradb 0.3.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sutradb/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ from .client import SutraClient
2
+ from .owl import OWLValidator, OWLViolation
3
+
4
+ __version__ = "0.1.0"
5
+ __all__ = ["SutraClient", "OWLValidator", "OWLViolation"]
sutradb/_version.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.3.2"
sutradb/client.py ADDED
@@ -0,0 +1,285 @@
1
+ """SutraDB Python client."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ import requests
8
+
9
+
10
+ class SutraError(Exception):
11
+ """Exception raised by the SutraDB client on request failures."""
12
+
13
+ def __init__(self, message: str, status_code: int | None = None) -> None:
14
+ super().__init__(message)
15
+ self.status_code = status_code
16
+
17
+
18
+ class SutraClient:
19
+ """Client for interacting with a SutraDB server.
20
+
21
+ Args:
22
+ endpoint: Base URL of the SutraDB HTTP server.
23
+ Defaults to ``http://localhost:3030``.
24
+ owl_validation: Enable client-side OWL constraint validation.
25
+ When True (default), inserts are checked against OWL axioms
26
+ stored in the database before being sent. Raises OWLViolation
27
+ on constraint violations. The database itself always accepts
28
+ all triples regardless of this setting.
29
+ """
30
+
31
+ def __init__(
32
+ self,
33
+ endpoint: str = "http://localhost:3030",
34
+ owl_validation: bool = True,
35
+ ) -> None:
36
+ self.endpoint = endpoint.rstrip("/")
37
+ self._session = requests.Session()
38
+ self._session.headers.update({"User-Agent": "sutradb-python/0.1.0"})
39
+ self._owl_validation = owl_validation
40
+ self._owl_validator = None
41
+
42
+ # ------------------------------------------------------------------
43
+ # Internal helpers
44
+ # ------------------------------------------------------------------
45
+
46
+ def _url(self, path: str) -> str:
47
+ return f"{self.endpoint}{path}"
48
+
49
+ def _request(
50
+ self,
51
+ method: str,
52
+ path: str,
53
+ *,
54
+ params: dict[str, Any] | None = None,
55
+ json: Any | None = None,
56
+ data: str | None = None,
57
+ headers: dict[str, str] | None = None,
58
+ ) -> requests.Response:
59
+ """Send an HTTP request and raise :class:`SutraError` on failure."""
60
+ try:
61
+ resp = self._session.request(
62
+ method,
63
+ self._url(path),
64
+ params=params,
65
+ json=json,
66
+ data=data,
67
+ headers=headers,
68
+ )
69
+ except requests.RequestException as exc:
70
+ raise SutraError(f"Connection error: {exc}") from exc
71
+
72
+ if not resp.ok:
73
+ raise SutraError(
74
+ f"HTTP {resp.status_code}: {resp.text}",
75
+ status_code=resp.status_code,
76
+ )
77
+ return resp
78
+
79
+ # ------------------------------------------------------------------
80
+ # OWL validation
81
+ # ------------------------------------------------------------------
82
+
83
+ def _ensure_owl_loaded(self) -> None:
84
+ """Lazy-load OWL ontology from the database on first validation."""
85
+ if self._owl_validator is not None:
86
+ return
87
+ try:
88
+ from .owl import OWLValidator
89
+
90
+ self._owl_validator = OWLValidator()
91
+ self._owl_validator.load_from_client(self)
92
+ except Exception:
93
+ # If we can't load the ontology, skip validation silently
94
+ self._owl_validator = None
95
+
96
+ def reload_owl(self) -> None:
97
+ """Force reload of OWL ontology from the database."""
98
+ self._owl_validator = None
99
+ self._ensure_owl_loaded()
100
+
101
+ # ------------------------------------------------------------------
102
+ # Public API
103
+ # ------------------------------------------------------------------
104
+
105
+ def health(self) -> bool:
106
+ """Check whether the server is reachable.
107
+
108
+ Returns:
109
+ ``True`` if the server responds to ``GET /health`` with a 2xx
110
+ status code, ``False`` otherwise.
111
+ """
112
+ try:
113
+ self._request("GET", "/health")
114
+ return True
115
+ except SutraError:
116
+ return False
117
+
118
+ def sparql(self, query: str) -> dict:
119
+ """Execute a SPARQL query and return the parsed JSON result.
120
+
121
+ Args:
122
+ query: A SPARQL 1.1 query string.
123
+
124
+ Returns:
125
+ The JSON response body as a Python dict (SPARQL JSON Results
126
+ format for SELECT/ASK, or a status dict for UPDATE).
127
+
128
+ Raises:
129
+ SutraError: If the server returns a non-2xx status code.
130
+ """
131
+ resp = self._request(
132
+ "GET",
133
+ "/sparql",
134
+ params={"query": query},
135
+ headers={"Accept": "application/sparql-results+json"},
136
+ )
137
+ return resp.json()
138
+
139
+ def insert_triples(
140
+ self, ntriples: str, batch_size: int = 5000
141
+ ) -> dict[str, Any]:
142
+ """Insert triples in N-Triples format, optionally in batches.
143
+
144
+ Args:
145
+ ntriples: One or more triples in N-Triples syntax (one per line).
146
+ batch_size: Maximum number of triples to send per HTTP request.
147
+
148
+ Returns:
149
+ A dict ``{"inserted": int, "errors": list[str]}`` summarising the
150
+ outcome across all batches.
151
+ """
152
+ # OWL validation (client-side, before sending to database)
153
+ if self._owl_validation:
154
+ self._ensure_owl_loaded()
155
+ if self._owl_validator and self._owl_validator.has_constraints():
156
+ from .owl import OWLViolation
157
+
158
+ violations = self._owl_validator.validate_ntriples(ntriples)
159
+ if violations:
160
+ raise violations[0] # Raise first violation
161
+
162
+ lines = [
163
+ line for line in ntriples.splitlines() if line.strip()
164
+ ]
165
+
166
+ total_inserted = 0
167
+ errors: list[str] = []
168
+
169
+ for start in range(0, len(lines), batch_size):
170
+ batch = "\n".join(lines[start : start + batch_size])
171
+ try:
172
+ resp = self._request(
173
+ "POST",
174
+ "/triples",
175
+ data=batch,
176
+ headers={"Content-Type": "application/n-triples"},
177
+ )
178
+ body = resp.json()
179
+ total_inserted += body.get("inserted", 0)
180
+ batch_errors = body.get("errors", [])
181
+ if batch_errors:
182
+ errors.extend(batch_errors)
183
+ except SutraError as exc:
184
+ errors.append(str(exc))
185
+
186
+ return {"inserted": total_inserted, "errors": errors}
187
+
188
+ def declare_vector(
189
+ self,
190
+ predicate: str,
191
+ dimensions: int,
192
+ m: int = 16,
193
+ ef_construction: int = 200,
194
+ metric: str = "cosine",
195
+ ) -> dict:
196
+ """Declare an HNSW-indexed vector predicate.
197
+
198
+ Args:
199
+ predicate: The IRI of the vector predicate (e.g.
200
+ ``"http://example.org/hasEmbedding"``).
201
+ dimensions: The fixed dimensionality of vectors for this predicate.
202
+ m: HNSW ``M`` parameter (max connections per node per layer).
203
+ ef_construction: HNSW ``ef_construction`` beam width.
204
+ metric: Distance metric (``"cosine"``, ``"euclidean"``, or
205
+ ``"dot"``).
206
+
207
+ Returns:
208
+ The server response as a dict, typically containing ``status`` and
209
+ ``predicate_id`` keys.
210
+
211
+ Raises:
212
+ SutraError: If the server rejects the declaration.
213
+ """
214
+ resp = self._request(
215
+ "POST",
216
+ "/vectors/declare",
217
+ json={
218
+ "predicate": predicate,
219
+ "dimensions": dimensions,
220
+ "m": m,
221
+ "ef_construction": ef_construction,
222
+ "metric": metric,
223
+ },
224
+ )
225
+ return resp.json()
226
+
227
+ def insert_vector(
228
+ self, predicate: str, subject: str, vector: list[float]
229
+ ) -> dict:
230
+ """Insert a single vector embedding.
231
+
232
+ Args:
233
+ predicate: The IRI of the vector predicate.
234
+ subject: The IRI of the subject node.
235
+ vector: The embedding as a list of floats.
236
+
237
+ Returns:
238
+ The server response as a dict, typically containing ``status`` and
239
+ ``triple_id`` keys.
240
+
241
+ Raises:
242
+ SutraError: If the server rejects the insert.
243
+ """
244
+ resp = self._request(
245
+ "POST",
246
+ "/vectors",
247
+ json={
248
+ "predicate": predicate,
249
+ "subject": subject,
250
+ "vector": vector,
251
+ },
252
+ )
253
+ return resp.json()
254
+
255
+ def insert_vectors_batch(
256
+ self,
257
+ predicate: str,
258
+ entries: list[tuple[str, list[float]]],
259
+ batch_size: int = 100,
260
+ ) -> dict[str, Any]:
261
+ """Insert multiple vectors in batches.
262
+
263
+ Args:
264
+ predicate: The IRI of the vector predicate.
265
+ entries: A list of ``(subject_iri, vector)`` tuples.
266
+ batch_size: Maximum number of vectors to send per HTTP request.
267
+
268
+ Returns:
269
+ A dict ``{"inserted": int, "errors": list[str]}`` summarising
270
+ the outcome across all batches.
271
+ """
272
+ total_inserted = 0
273
+ errors: list[str] = []
274
+
275
+ for start in range(0, len(entries), batch_size):
276
+ batch = entries[start : start + batch_size]
277
+ for subject, vector in batch:
278
+ try:
279
+ result = self.insert_vector(predicate, subject, vector)
280
+ if result.get("status") == "ok":
281
+ total_inserted += 1
282
+ except SutraError as exc:
283
+ errors.append(f"{subject}: {exc}")
284
+
285
+ return {"inserted": total_inserted, "errors": errors}
sutradb/jupyter.py ADDED
@@ -0,0 +1,124 @@
1
+ """Jupyter integration for SutraDB.
2
+
3
+ Provides %%sparql cell magic for executing SPARQL queries inline in
4
+ Jupyter notebooks with tabular result display.
5
+
6
+ Usage in a Jupyter notebook:
7
+
8
+ # First, load the extension
9
+ %load_ext sutradb.jupyter
10
+
11
+ # Then use the magic
12
+ %%sparql
13
+ SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 10
14
+
15
+ # Or with a custom endpoint
16
+ %%sparql http://localhost:8080
17
+ SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 10
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ from IPython.core.magic import register_cell_magic, needs_local_scope
23
+ import requests
24
+
25
+
26
+ _default_endpoint = "http://localhost:3030"
27
+
28
+
29
+ def _shorten(iri: str) -> str:
30
+ """Shorten an IRI for display."""
31
+ prefixes = {
32
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf:",
33
+ "http://www.w3.org/2000/01/rdf-schema#": "rdfs:",
34
+ "http://www.w3.org/2002/07/owl#": "owl:",
35
+ "http://www.w3.org/2001/XMLSchema#": "xsd:",
36
+ "http://www.wikidata.org/entity/": "wd:",
37
+ "http://www.wikidata.org/prop/direct/": "wdt:",
38
+ "http://sutra.dev/": "sutra:",
39
+ "http://schema.org/": "schema:",
40
+ }
41
+ for full, short in prefixes.items():
42
+ if iri.startswith(full):
43
+ return short + iri[len(full):]
44
+ return iri
45
+
46
+
47
+ @register_cell_magic
48
+ def sparql(line, cell):
49
+ """Execute a SPARQL query against SutraDB.
50
+
51
+ Usage:
52
+ %%sparql [endpoint]
53
+ SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 10
54
+ """
55
+ endpoint = line.strip() if line.strip() else _default_endpoint
56
+
57
+ try:
58
+ resp = requests.post(
59
+ f"{endpoint}/sparql",
60
+ data=cell,
61
+ headers={"Accept": "application/sparql-results+json"},
62
+ timeout=30,
63
+ )
64
+ if resp.status_code != 200:
65
+ print(f"Error: HTTP {resp.status_code}")
66
+ print(resp.text)
67
+ return
68
+
69
+ data = resp.json()
70
+ columns = data.get("head", {}).get("vars", [])
71
+ bindings = data.get("results", {}).get("bindings", [])
72
+
73
+ if not bindings:
74
+ print("No results.")
75
+ return
76
+
77
+ # Try to use pandas for nice display
78
+ try:
79
+ import pandas as pd
80
+
81
+ rows = []
82
+ for b in bindings:
83
+ row = {}
84
+ for col in columns:
85
+ val = b.get(col, {}).get("value", "")
86
+ row[col] = _shorten(val)
87
+ rows.append(row)
88
+ df = pd.DataFrame(rows, columns=columns)
89
+ from IPython.display import display
90
+
91
+ display(df)
92
+ except ImportError:
93
+ # Fallback: plain text table
94
+ # Header
95
+ widths = {c: max(len(c), 10) for c in columns}
96
+ for b in bindings[:20]:
97
+ for c in columns:
98
+ v = _shorten(b.get(c, {}).get("value", ""))
99
+ widths[c] = max(widths[c], min(len(v), 50))
100
+
101
+ header = " | ".join(c.ljust(widths[c]) for c in columns)
102
+ separator = "-+-".join("-" * widths[c] for c in columns)
103
+ print(header)
104
+ print(separator)
105
+ for b in bindings:
106
+ row = " | ".join(
107
+ _shorten(b.get(c, {}).get("value", ""))[:widths[c]].ljust(
108
+ widths[c]
109
+ )
110
+ for c in columns
111
+ )
112
+ print(row)
113
+ print(f"\n{len(bindings)} rows")
114
+
115
+ except requests.ConnectionError:
116
+ print(f"Error: Could not connect to SutraDB at {endpoint}")
117
+ except Exception as e:
118
+ print(f"Error: {e}")
119
+
120
+
121
+ def load_ipython_extension(ipython):
122
+ """Called when %load_ext sutradb.jupyter is executed."""
123
+ # The @register_cell_magic decorator handles registration
124
+ pass
sutradb/langchain.py ADDED
@@ -0,0 +1,150 @@
1
+ """LangChain integration for SutraDB.
2
+
3
+ Provides SutraDB as both a VectorStore and a knowledge graph for
4
+ Retrieval-Augmented Generation (RAG) pipelines.
5
+
6
+ Usage:
7
+ from sutradb.langchain import SutraVectorStore
8
+
9
+ vectorstore = SutraVectorStore(
10
+ endpoint="http://localhost:3030",
11
+ predicate="http://sutra.dev/hasEmbedding",
12
+ )
13
+
14
+ # Use with LangChain
15
+ retriever = vectorstore.as_retriever()
16
+ docs = retriever.get_relevant_documents("What is a transformer?")
17
+
18
+ Requires: pip install langchain-core
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ from typing import Any, Iterable, Optional
24
+
25
+ try:
26
+ from langchain_core.documents import Document
27
+ from langchain_core.vectorstores import VectorStore
28
+ from langchain_core.embeddings import Embeddings
29
+ except ImportError:
30
+ raise ImportError(
31
+ "langchain-core is required for LangChain integration. "
32
+ "Install it with: pip install langchain-core"
33
+ )
34
+
35
+ from .client import SutraClient
36
+
37
+
38
+ class SutraVectorStore(VectorStore):
39
+ """LangChain VectorStore backed by SutraDB.
40
+
41
+ Uses SutraDB's HNSW vector index for similarity search and
42
+ the RDF triple store for metadata/knowledge graph queries.
43
+ """
44
+
45
+ def __init__(
46
+ self,
47
+ endpoint: str = "http://localhost:3030",
48
+ predicate: str = "http://sutra.dev/hasEmbedding",
49
+ embedding: Optional[Embeddings] = None,
50
+ dimensions: int = 1024,
51
+ **kwargs: Any,
52
+ ):
53
+ self._client = SutraClient(endpoint, owl_validation=False)
54
+ self._predicate = predicate
55
+ self._embedding = embedding
56
+ self._dimensions = dimensions
57
+
58
+ # Ensure vector predicate is declared
59
+ try:
60
+ self._client.declare_vector(predicate, dimensions)
61
+ except Exception:
62
+ pass # May already exist
63
+
64
+ @property
65
+ def embeddings(self) -> Optional[Embeddings]:
66
+ return self._embedding
67
+
68
+ def add_texts(
69
+ self,
70
+ texts: Iterable[str],
71
+ metadatas: Optional[list[dict]] = None,
72
+ **kwargs: Any,
73
+ ) -> list[str]:
74
+ """Add texts with embeddings to SutraDB."""
75
+ if self._embedding is None:
76
+ raise ValueError("Embeddings model required for add_texts")
77
+
78
+ texts_list = list(texts)
79
+ vectors = self._embedding.embed_documents(texts_list)
80
+ ids = []
81
+
82
+ for i, (text, vector) in enumerate(zip(texts_list, vectors)):
83
+ # Generate a subject IRI
84
+ import hashlib
85
+ text_hash = hashlib.md5(text.encode()).hexdigest()[:12]
86
+ subject = f"http://sutra.dev/doc/{text_hash}"
87
+
88
+ # Insert vector
89
+ self._client.insert_vector(self._predicate, subject, vector)
90
+
91
+ # Insert text as a triple
92
+ escaped = text.replace('"', '\\"').replace('\n', '\\n')
93
+ ntriples = f'<{subject}> <http://sutra.dev/text> "{escaped}" .'
94
+
95
+ # Insert metadata
96
+ if metadatas and i < len(metadatas):
97
+ for key, value in metadatas[i].items():
98
+ escaped_val = str(value).replace('"', '\\"')
99
+ ntriples += f'\n<{subject}> <http://sutra.dev/meta/{key}> "{escaped_val}" .'
100
+
101
+ self._client.insert_triples(ntriples)
102
+ ids.append(subject)
103
+
104
+ return ids
105
+
106
+ def similarity_search(
107
+ self,
108
+ query: str,
109
+ k: int = 4,
110
+ **kwargs: Any,
111
+ ) -> list[Document]:
112
+ """Search for similar documents."""
113
+ if self._embedding is None:
114
+ raise ValueError("Embeddings model required for similarity_search")
115
+
116
+ query_vector = self._embedding.embed_query(query)
117
+ vec_str = " ".join(f"{v:.6f}" for v in query_vector)
118
+
119
+ sparql = (
120
+ f'SELECT ?doc ?text WHERE {{\n'
121
+ f' VECTOR_SIMILAR(?doc <{self._predicate}> '
122
+ f'"{vec_str}"^^<http://sutra.dev/f32vec>, 0.5, k:={k})\n'
123
+ f' OPTIONAL {{ ?doc <http://sutra.dev/text> ?text }}\n'
124
+ f'}}'
125
+ )
126
+
127
+ result = self._client.sparql(sparql)
128
+ docs = []
129
+ for row in result.get("results", {}).get("bindings", []):
130
+ doc_uri = row.get("doc", {}).get("value", "")
131
+ text = row.get("text", {}).get("value", "")
132
+ docs.append(Document(
133
+ page_content=text or doc_uri,
134
+ metadata={"source": doc_uri},
135
+ ))
136
+
137
+ return docs
138
+
139
+ @classmethod
140
+ def from_texts(
141
+ cls,
142
+ texts: list[str],
143
+ embedding: Embeddings,
144
+ metadatas: Optional[list[dict]] = None,
145
+ **kwargs: Any,
146
+ ) -> "SutraVectorStore":
147
+ """Create a SutraVectorStore from texts."""
148
+ store = cls(embedding=embedding, **kwargs)
149
+ store.add_texts(texts, metadatas=metadatas)
150
+ return store
sutradb/owl.py ADDED
@@ -0,0 +1,304 @@
1
+ """Client-side OWL validation for SutraDB Python SDK.
2
+
3
+ The database accepts all triples unconditionally. OWL validation
4
+ happens here in the SDK before sending data to the server. This
5
+ follows the "lean store, smart client" principle.
6
+
7
+ OWL validation is ENABLED by default. Disable it with:
8
+ client = SutraClient(owl_validation=False)
9
+
10
+ The validator loads OWL ontology triples from the database on first use,
11
+ caches them locally, and checks inserts against:
12
+ - rdfs:domain (property domain constraints)
13
+ - rdfs:range (property range constraints)
14
+ - rdfs:subClassOf (type hierarchy)
15
+ - owl:FunctionalProperty (max one value)
16
+ - owl:disjointWith (classes that can't overlap)
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from typing import Optional
22
+
23
+ RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
24
+ RDFS_DOMAIN = "http://www.w3.org/2000/01/rdf-schema#domain"
25
+ RDFS_RANGE = "http://www.w3.org/2000/01/rdf-schema#range"
26
+ RDFS_SUBCLASS_OF = "http://www.w3.org/2000/01/rdf-schema#subClassOf"
27
+ OWL_FUNCTIONAL = "http://www.w3.org/2002/07/owl#FunctionalProperty"
28
+ OWL_DISJOINT = "http://www.w3.org/2002/07/owl#disjointWith"
29
+ OWL_EQUIVALENT_CLASS = "http://www.w3.org/2002/07/owl#equivalentClass"
30
+ OWL_SAME_AS = "http://www.w3.org/2002/07/owl#sameAs"
31
+ OWL_INVERSE_OF = "http://www.w3.org/2002/07/owl#inverseOf"
32
+ OWL_SOME_VALUES_FROM = "http://www.w3.org/2002/07/owl#someValuesFrom"
33
+ OWL_ALL_VALUES_FROM = "http://www.w3.org/2002/07/owl#allValuesFrom"
34
+ OWL_ON_PROPERTY = "http://www.w3.org/2002/07/owl#onProperty"
35
+ RDFS_SUB_PROPERTY_OF = "http://www.w3.org/2000/01/rdf-schema#subPropertyOf"
36
+
37
+
38
+ class OWLViolation(Exception):
39
+ """Raised when a triple violates an OWL constraint."""
40
+
41
+ def __init__(self, message: str, constraint_type: str, triple: tuple):
42
+ super().__init__(message)
43
+ self.constraint_type = constraint_type
44
+ self.triple = triple
45
+
46
+
47
+ class OWLValidator:
48
+ """Client-side OWL constraint validator.
49
+
50
+ Loads ontology axioms from SutraDB and validates triples before insert.
51
+ """
52
+
53
+ def __init__(self):
54
+ self.domains: dict[str, str] = {} # property -> domain class
55
+ self.ranges: dict[str, str] = {} # property -> range class
56
+ self.subclass_of: dict[str, set[str]] = {} # class -> set of parent classes
57
+ self.sub_property_of: dict[str, set[str]] = {} # property -> parent properties
58
+ self.functional: set[str] = set() # functional properties
59
+ self.disjoint: dict[str, set[str]] = {} # class -> disjoint classes
60
+ self.equivalent_classes: dict[str, set[str]] = {} # class -> equivalent classes
61
+ self.same_as: dict[str, set[str]] = {} # entity -> same-as entities
62
+ self.inverse_of: dict[str, str] = {} # property -> inverse property
63
+ self.restrictions: list[dict] = [] # OWL restrictions (someValues, allValues)
64
+ self.entity_types: dict[str, set[str]] = {} # entity -> set of types
65
+ self._loaded = False
66
+
67
+ def load_from_client(self, client) -> None:
68
+ """Load OWL ontology triples from a SutraDB client."""
69
+ # Load domain constraints
70
+ result = client.sparql(
71
+ f'SELECT ?p ?d WHERE {{ ?p <{RDFS_DOMAIN}> ?d }}'
72
+ )
73
+ for row in result.get("results", {}).get("bindings", []):
74
+ p = row.get("p", {}).get("value", "")
75
+ d = row.get("d", {}).get("value", "")
76
+ if p and d:
77
+ self.domains[p] = d
78
+
79
+ # Load range constraints
80
+ result = client.sparql(
81
+ f'SELECT ?p ?r WHERE {{ ?p <{RDFS_RANGE}> ?r }}'
82
+ )
83
+ for row in result.get("results", {}).get("bindings", []):
84
+ p = row.get("p", {}).get("value", "")
85
+ r = row.get("r", {}).get("value", "")
86
+ if p and r:
87
+ self.ranges[p] = r
88
+
89
+ # Load subclass hierarchy
90
+ result = client.sparql(
91
+ f'SELECT ?c ?parent WHERE {{ ?c <{RDFS_SUBCLASS_OF}> ?parent }}'
92
+ )
93
+ for row in result.get("results", {}).get("bindings", []):
94
+ c = row.get("c", {}).get("value", "")
95
+ parent = row.get("parent", {}).get("value", "")
96
+ if c and parent:
97
+ self.subclass_of.setdefault(c, set()).add(parent)
98
+
99
+ # Load functional properties
100
+ result = client.sparql(
101
+ f'SELECT ?p WHERE {{ ?p <{RDF_TYPE}> <{OWL_FUNCTIONAL}> }}'
102
+ )
103
+ for row in result.get("results", {}).get("bindings", []):
104
+ p = row.get("p", {}).get("value", "")
105
+ if p:
106
+ self.functional.add(p)
107
+
108
+ # Load property hierarchy
109
+ result = client.sparql(
110
+ f'SELECT ?p ?parent WHERE {{ ?p <{RDFS_SUB_PROPERTY_OF}> ?parent }}'
111
+ )
112
+ for row in result.get("results", {}).get("bindings", []):
113
+ p = row.get("p", {}).get("value", "")
114
+ parent = row.get("parent", {}).get("value", "")
115
+ if p and parent:
116
+ self.sub_property_of.setdefault(p, set()).add(parent)
117
+
118
+ # Load equivalent classes
119
+ result = client.sparql(
120
+ f'SELECT ?a ?b WHERE {{ ?a <{OWL_EQUIVALENT_CLASS}> ?b }}'
121
+ )
122
+ for row in result.get("results", {}).get("bindings", []):
123
+ a = row.get("a", {}).get("value", "")
124
+ b = row.get("b", {}).get("value", "")
125
+ if a and b:
126
+ self.equivalent_classes.setdefault(a, set()).add(b)
127
+ self.equivalent_classes.setdefault(b, set()).add(a)
128
+
129
+ # Load owl:sameAs
130
+ result = client.sparql(
131
+ f'SELECT ?a ?b WHERE {{ ?a <{OWL_SAME_AS}> ?b }}'
132
+ )
133
+ for row in result.get("results", {}).get("bindings", []):
134
+ a = row.get("a", {}).get("value", "")
135
+ b = row.get("b", {}).get("value", "")
136
+ if a and b:
137
+ self.same_as.setdefault(a, set()).add(b)
138
+ self.same_as.setdefault(b, set()).add(a)
139
+
140
+ # Load owl:inverseOf
141
+ result = client.sparql(
142
+ f'SELECT ?p ?inv WHERE {{ ?p <{OWL_INVERSE_OF}> ?inv }}'
143
+ )
144
+ for row in result.get("results", {}).get("bindings", []):
145
+ p = row.get("p", {}).get("value", "")
146
+ inv = row.get("inv", {}).get("value", "")
147
+ if p and inv:
148
+ self.inverse_of[p] = inv
149
+ self.inverse_of[inv] = p
150
+
151
+ # Load entity types (for validation)
152
+ result = client.sparql(
153
+ f'SELECT ?e ?t WHERE {{ ?e <{RDF_TYPE}> ?t }} LIMIT 10000'
154
+ )
155
+ for row in result.get("results", {}).get("bindings", []):
156
+ e = row.get("e", {}).get("value", "")
157
+ t = row.get("t", {}).get("value", "")
158
+ if e and t:
159
+ self.entity_types.setdefault(e, set()).add(t)
160
+
161
+ self._loaded = True
162
+
163
+ def is_loaded(self) -> bool:
164
+ """Whether the ontology has been loaded."""
165
+ return self._loaded
166
+
167
+ def has_constraints(self) -> bool:
168
+ """Whether any OWL constraints exist in the database."""
169
+ return bool(
170
+ self.domains or self.ranges or self.functional or self.disjoint
171
+ )
172
+
173
+ def get_all_types(self, class_iri: str) -> set[str]:
174
+ """Get a class and all its ancestors via rdfs:subClassOf."""
175
+ result = {class_iri}
176
+ queue = [class_iri]
177
+ while queue:
178
+ current = queue.pop()
179
+ for parent in self.subclass_of.get(current, set()):
180
+ if parent not in result:
181
+ result.add(parent)
182
+ queue.append(parent)
183
+ return result
184
+
185
+ def validate_triple(
186
+ self, subject: str, predicate: str, obj: str
187
+ ) -> Optional[OWLViolation]:
188
+ """Validate a single triple against OWL constraints.
189
+
190
+ Returns None if valid, or an OWLViolation if invalid.
191
+ """
192
+ triple = (subject, predicate, obj)
193
+
194
+ # Domain check
195
+ if predicate in self.domains:
196
+ expected_domain = self.domains[predicate]
197
+ subject_types = self.entity_types.get(subject, set())
198
+ if subject_types:
199
+ all_types = set()
200
+ for t in subject_types:
201
+ all_types |= self.get_all_types(t)
202
+ if expected_domain not in all_types:
203
+ return OWLViolation(
204
+ f"Domain violation: {predicate} requires subject of type "
205
+ f"{expected_domain}, but {subject} has types {subject_types}",
206
+ "domain",
207
+ triple,
208
+ )
209
+
210
+ # Range check
211
+ if predicate in self.ranges and not obj.startswith('"'):
212
+ expected_range = self.ranges[predicate]
213
+ object_types = self.entity_types.get(obj, set())
214
+ if object_types:
215
+ all_types = set()
216
+ for t in object_types:
217
+ all_types |= self.get_all_types(t)
218
+ if expected_range not in all_types:
219
+ return OWLViolation(
220
+ f"Range violation: {predicate} requires object of type "
221
+ f"{expected_range}, but {obj} has types {object_types}",
222
+ "range",
223
+ triple,
224
+ )
225
+
226
+ # Disjoint class check (when assigning a type)
227
+ if predicate == RDF_TYPE:
228
+ existing_types = self.entity_types.get(subject, set())
229
+ for existing_type in existing_types:
230
+ disjoint = self.disjoint.get(existing_type, set())
231
+ if obj in disjoint:
232
+ return OWLViolation(
233
+ f"Disjoint violation: {subject} is already type "
234
+ f"{existing_type}, which is disjoint with {obj}",
235
+ "disjoint",
236
+ triple,
237
+ )
238
+
239
+ return None # Valid
240
+
241
+ def generate_verification_queries(self) -> list[tuple[str, str]]:
242
+ """Generate SPARQL queries that check for OWL constraint violations.
243
+
244
+ Returns a list of (description, sparql_query) tuples.
245
+ Each query returns rows that represent violations.
246
+ """
247
+ queries = []
248
+
249
+ # Domain violations
250
+ for prop, domain_class in self.domains.items():
251
+ queries.append((
252
+ f"Domain violation: {prop} requires subject of type {domain_class}",
253
+ f"SELECT ?s WHERE {{ ?s <{prop}> ?o . "
254
+ f"FILTER NOT EXISTS {{ ?s <{RDF_TYPE}> <{domain_class}> }} }}"
255
+ ))
256
+
257
+ # Range violations
258
+ for prop, range_class in self.ranges.items():
259
+ queries.append((
260
+ f"Range violation: {prop} requires object of type {range_class}",
261
+ f"SELECT ?o WHERE {{ ?s <{prop}> ?o . "
262
+ f"FILTER NOT EXISTS {{ ?o <{RDF_TYPE}> <{range_class}> }} }}"
263
+ ))
264
+
265
+ # Functional property violations (more than one value)
266
+ for prop in self.functional:
267
+ queries.append((
268
+ f"Functional violation: {prop} should have at most one value per subject",
269
+ f"SELECT ?s WHERE {{ ?s <{prop}> ?o1 . ?s <{prop}> ?o2 . "
270
+ f"FILTER(?o1 != ?o2) }}"
271
+ ))
272
+
273
+ # Disjoint class violations
274
+ for cls, disjoint_set in self.disjoint.items():
275
+ for other in disjoint_set:
276
+ queries.append((
277
+ f"Disjoint violation: {cls} and {other} cannot overlap",
278
+ f"SELECT ?x WHERE {{ ?x <{RDF_TYPE}> <{cls}> . "
279
+ f"?x <{RDF_TYPE}> <{other}> }}"
280
+ ))
281
+
282
+ return queries
283
+
284
+ def validate_ntriples(self, ntriples: str) -> list[OWLViolation]:
285
+ """Validate a block of N-Triples. Returns list of violations."""
286
+ violations = []
287
+ for line in ntriples.splitlines():
288
+ line = line.strip()
289
+ if not line or line.startswith("#"):
290
+ continue
291
+ # Simple N-Triples parsing (subject predicate object .)
292
+ parts = line.split(None, 2)
293
+ if len(parts) < 3:
294
+ continue
295
+ s = parts[0].strip("<>")
296
+ p = parts[1].strip("<>")
297
+ o_raw = parts[2].rstrip(" .")
298
+ o = o_raw.strip("<>") if o_raw.startswith("<") else o_raw
299
+
300
+ violation = self.validate_triple(s, p, o)
301
+ if violation:
302
+ violations.append(violation)
303
+
304
+ return violations
@@ -0,0 +1,7 @@
1
+ Metadata-Version: 2.4
2
+ Name: sutradb
3
+ Version: 0.3.2
4
+ Project-URL: Homepage, https://sutradb.org/
5
+ Project-URL: Repository, https://github.com/EmmaLeonhart/SutraDB
6
+ Provides-Extra: dev
7
+ Requires-Dist: pytest>=7.0; extra == 'dev'
@@ -0,0 +1,9 @@
1
+ sutradb/__init__.py,sha256=L5l3MVIjH2JVgYMQU70_YrrAJyIIEp-sAyS3XsHapSg,157
2
+ sutradb/_version.py,sha256=vNiWJ14r_cw5t_7UDqDQIVZvladKFGyHH2avsLpN7Vg,22
3
+ sutradb/client.py,sha256=wziV1VyT9zRJs9NbE9u0Gu-DzGYiYWRx6jAzvjGgass,9420
4
+ sutradb/jupyter.py,sha256=3mkpibI3pONYJmLHmn4VLpMFFPcvvCGGkLxrRjyufYU,3746
5
+ sutradb/langchain.py,sha256=qlKnXkk1U54KrSO84s48LxgzuEl7SWInDiRdOGNQgdQ,4770
6
+ sutradb/owl.py,sha256=alOZu6f57EGGVVTaO2q7r2H2kOsqYu1XJ9n5Q_s8tK4,12382
7
+ sutradb-0.3.2.dist-info/METADATA,sha256=V1b3Z_KXtELm6ksDTnwYm01MWQFMBq2xQhTk-V-lLps,223
8
+ sutradb-0.3.2.dist-info/WHEEL,sha256=e22IIVjxDyt0lABi4WpktFIGsmO_ebSDXLnPUbPK0E0,105
9
+ sutradb-0.3.2.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py2-none-any
5
+ Tag: py3-none-any