PyPI - tablassert - Versions diffs - 7.0.2__tar.gz → 7.1.0__tar.gz - Mend

tablassert 7.0.2tar.gz → 7.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

{tablassert-7.0.2 → tablassert-7.1.0}/CHANGELOG.md RENAMED Viewed

@@ -2,6 +2,12 @@
 All notable changes to this project are documented in this file.
+## Unreleased
+### Changes
+- Updated `fullmap` ranking to prioritize case-insensitive exact matches between normalized terms and preferred names.
+- Updated `fullmap` term de-duplication to keep first occurrences, improving deterministic output ordering.
 ## 7.0.2 - 2026-03-23
 ### Changes

{tablassert-7.0.2 → tablassert-7.1.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tablassert
-Version: 7.0.2
+Version: 7.1.0
 Summary: Tablassert is a highly performant declarative knowledge graph backend designed to extract knowledge assertions from tabular data while exporting NCATS Translator-compliant Knowledge Graph Exchange (KGX) NDJSON.
 Project-URL: Homepage, https://github.com/SkyeAv/Tablassert
 Project-URL: Source, https://github.com/SkyeAv/Tablassert
@@ -14,6 +14,7 @@ Requires-Python: >=3.13
 Requires-Dist: diskcache>=5.6.3
 Requires-Dist: duckdb>=1.5.0
 Requires-Dist: fastexcel>=0.19.0
+Requires-Dist: lazy-loader>=0.5
 Requires-Dist: loguru>=0.7.3
 Requires-Dist: mkdocs>=1.6.1
 Requires-Dist: onnxruntime>=1.24.3

{tablassert-7.0.2 → tablassert-7.1.0}/docs/api/fullmap.md RENAMED Viewed

@@ -103,10 +103,11 @@ Returns a Polars LazyFrame with these columns added:
 The function executes a SQL query that:
-1. **Builds an in-memory term table** by collecting distinct terms from both NLP levels and registering them in DuckDB as `PARQUET` via `conn.register("PARQUET", df.to_arrow())`.
+1. **Builds an in-memory term table** by collecting terms from both NLP levels, deduplicating by keeping first occurrences for deterministic ordering, and registering them in DuckDB as `PARQUET` via `conn.register("PARQUET", df.to_arrow())`.
 2. **Ranks matches** by:
    - Category priority (if `prioritize` specified)
+   - Preferred-name exactness (case-insensitive exact match of normalized term to preferred name)
    - NLP level (exact case match preferred over normalized)
    - Category frequency (if `column_context=True`)

{tablassert-7.0.2 → tablassert-7.1.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "tablassert"
-version = "7.0.2"
+version = "7.1.0"
 description = "Tablassert is a highly performant declarative knowledge graph backend designed to extract knowledge assertions from tabular data while exporting NCATS Translator-compliant Knowledge Graph Exchange (KGX) NDJSON."
 authors = [
     { name = "Skye Lane Goetz", email = "sgoetz@isbscience.org" }
@@ -25,6 +25,7 @@ dependencies = [
     "diskcache>=5.6.3",
     "duckdb>=1.5.0",
     "fastexcel>=0.19.0",
+    "lazy-loader>=0.5",
     "loguru>=0.7.3",
     "mkdocs>=1.6.1",
     "onnxruntime>=1.24.3",

{tablassert-7.0.2 → tablassert-7.1.0}/src/tablassert/downloader.py RENAMED Viewed

@@ -1,10 +1,17 @@
+from __future__ import annotations
 from pathlib import Path
 from time import sleep
-from typing import Optional
+from typing import TYPE_CHECKING, Optional
-import pyexcel
+import lazy_loader as Lazy
 from playwright.sync_api import sync_playwright
+if TYPE_CHECKING:
+    import pyexcel
+else:
+    pyexcel = Lazy.load("pyexcel")
 def modernize_xls(p: Path) -> Path:
     xlsx: Path = p.with_suffix(".xlsx")

{tablassert-7.0.2 → tablassert-7.1.0}/src/tablassert/enums.py RENAMED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 from enum import Enum

{tablassert-7.0.2 → tablassert-7.1.0}/src/tablassert/fullmap.py RENAMED Viewed

@@ -1,11 +1,18 @@
+from __future__ import annotations
 from operator import add
-from typing import Optional
+from typing import TYPE_CHECKING, Optional
-import polars as pl
+import lazy_loader as Lazy
 from tablassert.enums import Categories
 from tablassert.log import logger
+if TYPE_CHECKING:
+    import polars as pl
+else:
+    pl = Lazy.load("polars")
 def distinct(lf: pl.LazyFrame, l0: str, l1: str) -> pl.LazyFrame:
     # ? Extract Unique Terms From Two Text Normalization Columns As LazyFrame
@@ -15,7 +22,7 @@ def distinct(lf: pl.LazyFrame, l0: str, l1: str) -> pl.LazyFrame:
     t1: pl.LazyFrame = lf.select(pl.col(l1).alias("term")).unique()
     t1 = t1.with_columns(pl.lit(1).alias("nlp level"))
-    terms: pl.LazyFrame = pl.concat([t0, t1]).unique(subset=["term"])
+    terms: pl.LazyFrame = pl.concat([t0, t1]).unique(subset=["term"], keep="first")
     bad: str = r"^\d+$|^(none|nan|na|null|unknown)$|^$"
     return terms.filter(~pl.col("term").str.contains(bad))
@@ -38,6 +45,9 @@ def query_builder(
         CASE
             {priority_case}
             ELSE 50
+        END * CASE
+            WHEN LOWER(CU.PREFERRED_NAME) = PA.term THEN 1
+            ELSE 10
         END AS PR
     FROM SYNONYMS SY
     JOIN SOURCES SO ON SY.SOURCE_ID = SO.SOURCE_ID
@@ -76,13 +86,16 @@ def query_distinct(
     results: pl.DataFrame = conn.execute(query).pl()  # pyright: ignore
     sort_by: list[str] = ["term", "PR", "NLP_LEVEL"]
+    descending: list[bool] = [False, False, False]
     if column_context:
         frequency: pl.DataFrame = results.group_by("CATEGORY_NAME").agg(pl.len().alias("FREQUENCY"))
         results = results.join(frequency, on="CATEGORY_NAME", how="left")
         sort_by += ["FREQUENCY"]
+        descending += [True]
-    results = results.sort(sort_by, descending=[False, False, False, True])
+    results = results.sort(sort_by, descending=descending)
     results = results.unique(subset=["term"], keep="first")
     return results

{tablassert-7.0.2 → tablassert-7.1.0}/src/tablassert/ingests.py RENAMED Viewed

@@ -1,10 +1,17 @@
+from __future__ import annotations
 from copy import deepcopy
 from pathlib import Path
-from typing import Any, Union
+from typing import TYPE_CHECKING, Any, Union
-import yaml
+import lazy_loader as Lazy
 from yaml import CLoader
+if TYPE_CHECKING:
+    import yaml
+else:
+    yaml = Lazy.load("yaml")
 def fastmerge(a: Union[list[Any], dict[str, Any]], b: Union[list[Any], dict[str, Any]]) -> Any:
     # ? Streamlined (Fast) Implementation Of Deepmerge Config

{tablassert-7.0.2 → tablassert-7.1.0}/src/tablassert/lib.py RENAMED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 import math
 import operator
 from functools import reduce
@@ -6,13 +8,9 @@ from multiprocessing import Pool
 from operator import add, eq, le
 from os.path import basename
 from pathlib import Path
-from typing import Any, Callable, Literal, Optional, Self, Union
+from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, Self, Union
-import duckdb
-import orjson
-import polars as pl
-import typer
-import xxhash
+import lazy_loader as Lazy
 from pydantic import Field, NonNegativeInt, PositiveInt
 from rich.progress import BarColumn, Progress, SpinnerColumn, TaskProgressColumn, TextColumn, TimeElapsedColumn
 from sqlite_utils import Database
@@ -26,6 +24,19 @@ from tablassert.models import Encoding, Graph, NodeEncoding, Section
 from tablassert.qc import fullmap_audit
 from tablassert.utils import STORE, mkhash, namespace_uuid
+if TYPE_CHECKING:
+    import duckdb
+    import orjson
+    import polars as pl
+    import typer
+    import xxhash
+else:
+    duckdb = Lazy.load("duckdb")
+    orjson = Lazy.load("orjson")
+    pl = Lazy.load("polars")
+    typer = Lazy.load("typer")
+    xxhash = Lazy.load("xxhash")
 # ? Newline To Make Progress Bar More Readable
 print("\n")

{tablassert-7.0.2 → tablassert-7.1.0}/src/tablassert/log.py RENAMED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 from pathlib import Path
 from loguru import logger

{tablassert-7.0.2 → tablassert-7.1.0}/src/tablassert/models.py RENAMED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 from pathlib import Path
 from typing import Literal, Optional, Union

{tablassert-7.0.2 → tablassert-7.1.0}/src/tablassert/qc.py RENAMED Viewed

@@ -1,13 +1,22 @@
+from __future__ import annotations
 from operator import add, eq, ge
 from pathlib import Path
-from typing import Literal, Optional
+from typing import TYPE_CHECKING, Literal, Optional
-import onnxruntime as ort
-import polars as pl
+import lazy_loader as Lazy
 from rapidfuzz import fuzz
-from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
+if TYPE_CHECKING:
+    import onnxruntime as ort
+    import polars as pl
+    import sentence_transformers
+else:
+    ort = Lazy.load("onnxruntime")
+    sentence_transformers = Lazy.load("sentence_transformers")
+    pl = Lazy.load("polars")
 from tablassert.log import logger
 from tablassert.utils import DISKCACHE
@@ -28,9 +37,11 @@ def get_biobert() -> object:
     if BIOBERT:
         return BIOBERT
     elif not BIOBERT and MODEL.exists():
-        BIOBERT = SentenceTransformer(str(MODEL), backend=MODEL_BACKEND, model_kwargs=MODEL_KWARGS)  # pyright: ignore
+        BIOBERT = sentence_transformers.SentenceTransformer(
+            str(MODEL), backend=MODEL_BACKEND, model_kwargs=MODEL_KWARGS
+        )  # pyright: ignore
     else:
-        BIOBERT = SentenceTransformer(
+        BIOBERT = sentence_transformers.SentenceTransformer(
             "pritamdeka/BioBERT-mnli-snli-scinli-scitail-mednli-stsb", backend=MODEL_BACKEND, model_kwargs=MODEL_KWARGS
         )  # pyright: ignore
         MODEL.mkdir(parents=True, exist_ok=True)

{tablassert-7.0.2 → tablassert-7.1.0}/src/tablassert/utils.py RENAMED Viewed

@@ -1,12 +1,20 @@
+from __future__ import annotations
 from functools import cache
 from pathlib import Path
-from typing import Any
+from typing import TYPE_CHECKING, Any
 from uuid import UUID, uuid3
-import polars as pl
-import xxhash
+import lazy_loader as Lazy
 from diskcache import Cache
+if TYPE_CHECKING:
+    import polars as pl
+    import xxhash
+else:
+    pl = Lazy.load("polars")
+    xxhash = Lazy.load("xxhash")
 STORE: Path = Path("./.storassert")
 STORE.mkdir(parents=True, exist_ok=True)

{tablassert-7.0.2 → tablassert-7.1.0}/uv.lock RENAMED Viewed

@@ -363,6 +363,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" },
 ]
+[[package]]
+name = "lazy-loader"
+version = "0.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "packaging" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/49/ac/21a1f8aa3777f5658576777ea76bfb124b702c520bbe90edf4ae9915eafa/lazy_loader-0.5.tar.gz", hash = "sha256:717f9179a0dbed357012ddad50a5ad3d5e4d9a0b8712680d4e687f5e6e6ed9b3", size = 15294, upload-time = "2026-03-06T15:45:09.054Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8a/a1/8d812e53a5da1687abb10445275d41a8b13adb781bbf7196ddbcf8d88505/lazy_loader-0.5-py3-none-any.whl", hash = "sha256:ab0ea149e9c554d4ffeeb21105ac60bed7f3b4fd69b1d2360a4add51b170b005", size = 8044, upload-time = "2026-03-06T15:45:07.668Z" },
+]
 [[package]]
 name = "lml"
 version = "0.2.0"
@@ -1649,12 +1661,13 @@ wheels = [
 [[package]]
 name = "tablassert"
-version = "7.0.2"
+version = "7.1.0"
 source = { editable = "." }
 dependencies = [
     { name = "diskcache" },
     { name = "duckdb" },
     { name = "fastexcel" },
+    { name = "lazy-loader" },
     { name = "loguru" },
     { name = "mkdocs" },
     { name = "onnxruntime" },
@@ -1693,6 +1706,7 @@ requires-dist = [
     { name = "diskcache", specifier = ">=5.6.3" },
     { name = "duckdb", specifier = ">=1.5.0" },
     { name = "fastexcel", specifier = ">=0.19.0" },
+    { name = "lazy-loader", specifier = ">=0.5" },
     { name = "loguru", specifier = ">=0.7.3" },
     { name = "mkdocs", specifier = ">=1.6.1" },
     { name = "onnxruntime", specifier = ">=1.24.3" },