PyPI - splink - Versions diffs - 4.0.0.dev8__tar.gz → 4.0.0.dev9__tar.gz - Mend

splink 4.0.0.dev8tar.gz → 4.0.0.dev9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (157) hide show

{splink-4.0.0.dev8 → splink-4.0.0.dev9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: splink
-Version: 4.0.0.dev8
+Version: 4.0.0.dev9
 Summary: Fast probabilistic data linkage at scale
 Home-page: https://github.com/moj-analytical-services/splink
 License: MIT

{splink-4.0.0.dev8 → splink-4.0.0.dev9}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "splink"
-version = "4.0.0.dev8"
+version = "4.0.0.dev9"
 description = "Fast probabilistic data linkage at scale"
 authors = ["Robin Linacre <robinlinacre@hotmail.com>", "Sam Lindsay", "Theodore Manassis", "Tom Hepworth", "Andy Bond", "Ross Kennedy"]
 license = "MIT"

{splink-4.0.0.dev8 → splink-4.0.0.dev9}/splink/__init__.py RENAMED Viewed

@@ -6,8 +6,8 @@ from splink.internals.datasets import splink_datasets
 from splink.internals.linker import Linker
 from splink.internals.settings_creator import SettingsCreator
-# The following is a workaround for the fact that dependencies of postgres, spark
-# and duckdb may not be installed, but we don't want this to prevent import
+# The following is a workaround for the fact that dependencies of particular backends
+# may not be installed, but we don't want this to prevent import
 # of the other backends.
 # This enables auto-complete to be used to import the various DBAPIs
@@ -15,7 +15,6 @@ from splink.internals.settings_creator import SettingsCreator
 # without importing them at runtime
 if TYPE_CHECKING:
     from splink.internals.duckdb.database_api import DuckDBAPI
-    from splink.internals.postgres.database_api import PostgresAPI
     from splink.internals.spark.database_api import SparkAPI
@@ -30,12 +29,8 @@ def __getattr__(name):
             from splink.internals.duckdb.database_api import DuckDBAPI
             return DuckDBAPI
-        elif name == "PostgresAPI":
-            from splink.internals.postgres.database_api import PostgresAPI
-            return PostgresAPI
     except ImportError as err:
-        if name in ["SparkAPI", "DuckDBAPI", "PostgresAPI"]:
+        if name in ["SparkAPI", "DuckDBAPI"]:
             raise ImportError(
                 f"{name} cannot be imported because its dependencies are not "
                 "installed. Please `pip install` the required package(s) as "
@@ -44,7 +39,7 @@ def __getattr__(name):
     raise AttributeError(f"module 'splink' has no attribute '{name}'") from None
-__version__ = "4.0.0.dev8"
+__version__ = "4.0.0.dev9"
 __all__ = [
@@ -52,9 +47,7 @@ __all__ = [
     "ColumnExpression",
     "DuckDBAPI",
     "Linker",
-    "PostgresAPI",
     "SettingsCreator",
     "SparkAPI",
     "splink_datasets",
-    "SQLiteAPI",
 ]

splink-4.0.0.dev9/splink/backends/duckdb.py ADDED Viewed

@@ -0,0 +1,3 @@
+from splink.internals.duckdb.database_api import DuckDBAPI
+__all__ = ["DuckDBAPI"]

splink-4.0.0.dev9/splink/backends/postgres.py ADDED Viewed

@@ -0,0 +1,3 @@
+from splink.internals.postgres.database_api import PostgresAPI
+__all__ = ["PostgresAPI"]

splink-4.0.0.dev9/splink/backends/spark.py ADDED Viewed

@@ -0,0 +1,4 @@
+from splink.internals.spark.database_api import SparkAPI
+from splink.internals.spark.jar_location import similarity_jar_location
+__all__ = ["similarity_jar_location", "SparkAPI"]

{splink-4.0.0.dev8 → splink-4.0.0.dev9}/splink/internals/accuracy.py RENAMED Viewed

@@ -446,7 +446,7 @@ def prediction_errors_from_labels_table(
     labels_tablename: str,
     include_false_positives: bool = True,
     include_false_negatives: bool = True,
-    threshold: float = 0.5,
+    threshold_match_probability: float = 0.5,
 ) -> SplinkDataFrame:
     pipeline = CTEPipeline()
     nodes_with_tf = compute_df_concat_with_tf(linker, pipeline)
@@ -457,13 +457,13 @@ def prediction_errors_from_labels_table(
     pipeline.enqueue_list_of_sqls(sqls)
     false_positives = f"""
-    (clerical_match_score < {threshold} and
-    match_probability > {threshold})
+    (clerical_match_score < {threshold_match_probability} and
+    match_probability > {threshold_match_probability})
     """
     false_negatives = f"""
-    (clerical_match_score > {threshold} and
-    match_probability < {threshold})
+    (clerical_match_score > {threshold_match_probability} and
+    match_probability < {threshold_match_probability})
     """
     where_conditions = []

{splink-4.0.0.dev8 → splink-4.0.0.dev9}/splink/internals/comparison_creator.py RENAMED Viewed

@@ -7,7 +7,11 @@ from splink.internals.column_expression import ColumnExpression
 from splink.internals.exceptions import SplinkException
 from .comparison import Comparison
-from .comparison_level_creator import ComparisonLevelCreator
+from .comparison_level_creator import (
+    ComparisonLevelCreator,
+    UnsuppliedNoneOr,
+    unsupplied_option,
+)
 class ComparisonCreator(ABC):
@@ -65,7 +69,6 @@ class ComparisonCreator(ABC):
         # create levels - let them raise errors if there are issues
         self.create_comparison_levels()
-    # TODO: property?
     @abstractmethod
     def create_comparison_levels(self) -> List[ComparisonLevelCreator]:
         pass
@@ -77,9 +80,11 @@ class ComparisonCreator(ABC):
         if self.term_frequency_adjustments:
             for cl in comparison_levels:
-                # TODO: Check that the column name a 'pure' column name and
-                # not a column expression with transforms applied
-                if cl.is_exact_match_level:
+                if (
+                    hasattr(cl, "col_expression")
+                    and cl.col_expression.is_pure_column_or_column_reference
+                    and cl.is_exact_match_level
+                ):
                     cl.term_frequency_adjustments = True
         if self.m_probabilities:
@@ -145,23 +150,37 @@ class ComparisonCreator(ABC):
     def configure(
         self,
         *,
-        term_frequency_adjustments: bool = False,
-        m_probabilities: List[float] = None,
-        u_probabilities: List[float] = None,
+        term_frequency_adjustments: UnsuppliedNoneOr[bool] = unsupplied_option,
+        m_probabilities: UnsuppliedNoneOr[List[float]] = unsupplied_option,
+        u_probabilities: UnsuppliedNoneOr[List[float]] = unsupplied_option,
     ) -> "ComparisonCreator":
         """
-        Configure the comparison creator with m and u probabilities. The first
+        Configure the comparison creator with options that are common to all
+        comparisons.
+        For m and u probabilities, the first
         element in the list corresponds to the first comparison level, usually
         an exact match level. Subsequent elements correspond comparison to
         levels in sequential order, through to the last element which is usually
         the 'ELSE' level.
+        All options have default options set initially. Any call to `.configure()`
+        will set any options that are supplied. Any subsequent calls to `.configure()`
+        will not override these values with defaults; to override values you must
+        explicitly provide a value corresponding to the default.
+        Generally speaking only a single call (at most) to `.configure()` should
+        be required.
         Args:
             term_frequency_adjustments (bool, optional): Whether term frequency
                 adjustments are switched on for this comparison. Only applied
-                to exact match levels. Default: False
+                to exact match levels.
+                Default corresponds to False.
             m_probabilities (list, optional): List of m probabilities
+                Default corresponds to None.
             u_probabilities (list, optional): List of u probabilities
+                Default corresponds to None.
         Example:
             ```py
@@ -175,9 +194,16 @@ class ComparisonCreator(ABC):
             ```
         """
-        self.term_frequency_adjustments = term_frequency_adjustments
-        self.m_probabilities = m_probabilities
-        self.u_probabilities = u_probabilities
+        configurables = {
+            "term_frequency_adjustments": term_frequency_adjustments,
+            "m_probabilities": m_probabilities,
+            "u_probabilities": u_probabilities,
+        }
+        for attribute_name, attribute_value in configurables.items():
+            if attribute_value is not unsupplied_option:
+                setattr(self, attribute_name, attribute_value)
         return self
     @property

{splink-4.0.0.dev8 → splink-4.0.0.dev9}/splink/internals/comparison_level_composition.py RENAMED Viewed

@@ -15,7 +15,6 @@ def _ensure_is_comparison_level_creator(
     if isinstance(cl, dict):
         from .comparison_level_library import CustomLevel
-        # TODO: proper dict => level method
         return CustomLevel(**cl)
     if isinstance(cl, ComparisonLevelCreator):
         return cl

{splink-4.0.0.dev8 → splink-4.0.0.dev9}/splink/internals/comparison_level_creator.py RENAMED Viewed

@@ -2,7 +2,7 @@ from __future__ import annotations
 from abc import ABC, abstractmethod
 from inspect import signature
-from typing import Any, final
+from typing import Any, TypeVar, Union, final
 from splink.internals.column_expression import ColumnExpression
 from splink.internals.dialects import SplinkDialect
@@ -10,6 +10,22 @@ from splink.internals.dialects import SplinkDialect
 from .comparison_level import ComparisonLevel
+class _UnsuppliedOption:
+    _instance: "_UnsuppliedOption" | None = None
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super(_UnsuppliedOption, cls).__new__(cls)
+        return cls._instance
+unsupplied_option = _UnsuppliedOption()
+T = TypeVar("T")
+# type alias - either the specified type, _UnsuppliedOption, or None
+UnsuppliedNoneOr = Union[T, _UnsuppliedOption, None]
 class ComparisonLevelCreator(ABC):
     # off by default - only a small subset should have tf adjustments
     term_frequency_adjustments = False
@@ -56,14 +72,14 @@ class ComparisonLevelCreator(ABC):
     def configure(
         self,
         *,
-        m_probability: float = None,
-        u_probability: float = None,
-        tf_adjustment_column: str = None,
-        tf_adjustment_weight: float = None,
-        tf_minimum_u_value: float = None,
-        is_null_level: bool = None,
-        label_for_charts: str = None,
-        disable_tf_exact_match_detection: bool = None,
+        m_probability: UnsuppliedNoneOr[float] = unsupplied_option,
+        u_probability: UnsuppliedNoneOr[float] = unsupplied_option,
+        tf_adjustment_column: UnsuppliedNoneOr[str] = unsupplied_option,
+        tf_adjustment_weight: UnsuppliedNoneOr[float] = unsupplied_option,
+        tf_minimum_u_value: UnsuppliedNoneOr[float] = unsupplied_option,
+        is_null_level: UnsuppliedNoneOr[bool] = unsupplied_option,
+        label_for_charts: UnsuppliedNoneOr[str] = unsupplied_option,
+        disable_tf_exact_match_detection: UnsuppliedNoneOr[bool] = unsupplied_option,
     ) -> "ComparisonLevelCreator":
         """
         Configure the comparison level with options which are common to all
@@ -71,29 +87,47 @@ class ComparisonLevelCreator(ABC):
         specification of a comparison level.  These options are usually not
         needed, but are available for advanced users.
+        All options have default options set initially. Any call to `.configure()`
+        will set any options that are supplied. Any subsequent calls to `.configure()`
+        will not override these values with defaults; to override values you must must
+        explicitly provide a value corresponding to the default.
+        Generally speaking only a single call (at most) to `.configure()` should
+        be required.
         Args:
             m_probability (float, optional): The m probability for this
-                comparison level. Defaults to None, meaning it is not set.
+                comparison level.
+                Default is equivalent to None, in which case a default initial value
+                will be provided for this level.
             u_probability (float, optional): The u probability for this
-                comparison level. Defaults to None, meaning it is not set.
+                comparison level.
+                Default is equivalent to None, in which case a default initial value
+                will be provided for this level.
             tf_adjustment_column (str, optional): Make term frequency adjustments for
-                this comparison level using this input column. Defaults to None,
-                meaning term-frequency adjustments will not be applied for this level.
+                this comparison level using this input column.
+                Default is equivalent to None, meaning that term-frequency adjustments
+                will not be applied for this level.
             tf_adjustment_weight (float, optional): Make term frequency adjustments
-                for this comparison level using this weight. Defaults to None,
-                meaning term-frequency adjustments are fully-weighted if turned on.
+                for this comparison level using this weight.
+                Default is equivalent to None, meaning term-frequency adjustments are
+                fully-weighted if turned on.
             tf_minimum_u_value (float, optional): When term frequency adjustments are
                 turned on, where the term frequency adjustment implies a u value below
-                this value, use this minimum value instead. Defaults to None, meaning
-                no minimum value.
+                this value, use this minimum value instead.
+                Defaults is equivalent to None, meaning no minimum value.
             is_null_level (bool, optional): If true, m and u values will not be
                 estimated and instead the match weight will be zero for this column.
-                Defaults to None, equivalent to False.
+                Default is equivalent to False.
             label_for_charts (str, optional): If provided, a custom label that will
-                be used for this level in any charts. Defaults to None, in which case
-                a default label will be provided.
+                be used for this level in any charts.
+                Default is equivalent to None, in which case a default label will be
+                provided for this level.
+            disable_tf_exact_match_detection (bool, optional): If true, if term
+                frequency adjustments are set, the corresponding adjustment will be
+                made using the u-value for _this_ level, rather than the usual case
+                where it is the u-value of the exact match level in the same comparison.
+                Default is equivalent to False.
         Returns:
             ComparisonLevelCreator: The instance of the ComparisonLevelCreator class
                 with the updated configuration.
@@ -101,7 +135,7 @@ class ComparisonLevelCreator(ABC):
         args = locals()
         del args["self"]
         for k, v in args.items():
-            if v is not None:
+            if v is not unsupplied_option:
                 setattr(self, k, v)
         return self

{splink-4.0.0.dev8 → splink-4.0.0.dev9}/splink/internals/comparison_level_library.py RENAMED Viewed

@@ -256,8 +256,11 @@ class ExactMatchLevel(ComparisonLevelCreator):
                 tf_adjustment_column=self.col_expression.raw_sql_expression,
                 tf_adjustment_weight=1.0,
             )
-        # TODO: how to 'turn off'?? configure doesn't currently allow
+        else:
+            self.configure(
+                tf_adjustment_column=None,
+                tf_adjustment_weight=None,
+            )
     def create_sql(self, sql_dialect: SplinkDialect) -> str:
         self.col_expression.sql_dialect = sql_dialect

{splink-4.0.0.dev8 → splink-4.0.0.dev9}/splink/internals/comparison_library.py RENAMED Viewed

@@ -615,7 +615,6 @@ class DateOfBirthComparison(ComparisonCreator):
             "year",
         ],
         datetime_format: str = None,
-        separate_1st_january: bool = False,
         invalid_dates_as_null: bool = True,
     ):
         """
@@ -644,10 +643,6 @@ class DateOfBirthComparison(ComparisonCreator):
                 Metrics for date differences. Defaults to ["month", "year", "year"].
             datetime_format (str, optional): The datetime format used to cast strings
                 to dates.  Only used if input is a string.
-            separate_1st_january (bool, optional): Used for when date of birth is
-                sometimes recorded as 1st of Jan when only the year is known / If True,
-                a level is included for for a  match on the year where at least one
-                side of the match is a date on the the 1st of January.
             invalid_dates_as_null (bool, optional): If True, treat invalid dates as null
                 as opposed to allowing e.g. an exact or levenshtein match where one side
                 or both are an invalid date.  Only used if input is a string.  Defaults
@@ -672,8 +667,6 @@ class DateOfBirthComparison(ComparisonCreator):
         self.datetime_format = datetime_format
-        self.separate_1st_january = separate_1st_january
         self.input_is_string = input_is_string
         self.invalid_dates_as_null = invalid_dates_as_null
@@ -693,36 +686,6 @@ class DateOfBirthComparison(ComparisonCreator):
             cll.NullLevel(null_col),
         ]
-        if self.input_is_string:
-            date_as_iso_string = self.datetime_parse_function(
-                self.datetime_format
-            ).cast_to_string()
-        else:
-            date_as_iso_string = self.col_expression.cast_to_string()
-        if self.separate_1st_january:
-            level = cll.And(
-                cll.Or(
-                    cll.LiteralMatchLevel(
-                        date_as_iso_string.substr(6, 5),
-                        literal_value="01-01",
-                        literal_datatype="string",
-                        side_of_comparison="left",
-                    ),
-                    cll.LiteralMatchLevel(
-                        date_as_iso_string.substr(6, 5),
-                        literal_value="01-01",
-                        literal_datatype="string",
-                        side_of_comparison="right",
-                    ),
-                ),
-                cll.ExactMatchLevel(date_as_iso_string.substr(0, 4)),
-            )
-            level.configure(label_for_charts="Exact match on year, 1st Jan only")
-            levels.append(level)
         levels.append(
             cll.ExactMatchLevel(self.col_expression).configure(
                 label_for_charts="Exact match on date of birth"

{splink-4.0.0.dev8 → splink-4.0.0.dev9}/splink/internals/database_api.py RENAMED Viewed

@@ -2,7 +2,6 @@ from __future__ import annotations
 import hashlib
 import logging
-import random
 import time
 from abc import ABC, abstractmethod
 from collections.abc import Sequence
@@ -47,8 +46,7 @@ class DatabaseAPI(ABC, Generic[TablishType]):
     def __init__(self) -> None:
         self._intermediate_table_cache: CacheDictWithLogging = CacheDictWithLogging()
-        # TODO: replace this:
-        self._cache_uid: str = str(random.choice(range(10000)))
+        self._cache_uid: str = ascii_uid(8)
     @final
     def _log_and_run_sql_execution(
@@ -80,7 +78,6 @@ class DatabaseAPI(ABC, Generic[TablishType]):
                 f"\n\nError was: {e}"
             ) from e
-    # TODO: rename this?
     @final
     def _sql_to_splink_dataframe(
         self, sql: str, templated_name: str, physical_name: str
@@ -140,9 +137,8 @@ class DatabaseAPI(ABC, Generic[TablishType]):
         use_cache: bool = True,
     ) -> SplinkDataFrame:
         # differences from _sql_to_splink_dataframe:
-        # this _calculates_ physical name, and
-        # handles debug_mode
-        # TODO: also maybe caching? but maybe that is even lower down
+        # this _calculates_ physical name, handles debug_mode,
+        # and checks cache before querying
         to_hash = (sql + self._cache_uid).encode("utf-8")
         hash = hashlib.sha256(to_hash).hexdigest()[:9]
         # Ensure hash is valid sql table name
@@ -342,9 +338,6 @@ class DatabaseAPI(ABC, Generic[TablishType]):
         input_tables = ensure_is_list(input_tables)
         return input_tables
-    # should probably also be responsible for cache
-    # TODO: stick this in a cache-api that lives on this
     def remove_splinkdataframe_from_cache(
         self, splink_dataframe: SplinkDataFrame
     ) -> None:

{splink-4.0.0.dev8 → splink-4.0.0.dev9}/splink/internals/dialects.py RENAMED Viewed

@@ -214,7 +214,6 @@ class DuckDBDialect(SplinkDialect):
     ) -> str:
         return f"regexp_extract({name}, '{pattern}', {capture_group})"
-    # TODO: roll out to other dialects, at least for now
     @property
     def infinity_expression(self):
         return "cast('infinity' as float8)"

{splink-4.0.0.dev8 → splink-4.0.0.dev9}/splink/internals/em_training_session.py RENAMED Viewed

@@ -11,7 +11,6 @@ from splink.internals.charts import (
     probability_two_random_records_match_iteration_chart,
 )
 from splink.internals.comparison import Comparison
-from splink.internals.comparison_level import ComparisonLevel
 from splink.internals.comparison_vector_values import (
     compute_comparison_vector_values_from_id_pairs_sqls,
 )
@@ -57,8 +56,6 @@ class EMTrainingSession:
         fix_u_probabilities: bool = False,
         fix_m_probabilities: bool = False,
         fix_probability_two_random_records_match: bool = False,
-        comparisons_to_deactivate: list[Comparison] = None,
-        comparison_levels_to_reverse_blocking_rule: list[ComparisonLevel] = None,
         estimate_without_term_frequencies: bool = False,
     ):
         logger.info("\n----- Starting EM training session -----\n")
@@ -77,20 +74,13 @@ class EMTrainingSession:
         self._blocking_rule_for_training = blocking_rule_for_training
         self.estimate_without_term_frequencies = estimate_without_term_frequencies
-        if comparison_levels_to_reverse_blocking_rule:
-            # TODO: atm this branch probably makes no sense. What would user pass?
-            # self._comparison_levels_to_reverse_blocking_rule = (
-            #     comparison_levels_to_reverse_blocking_rule
-            # )
-            raise ValueError("This path is broken for now.")
-        else:
-            self._comparison_levels_to_reverse_blocking_rule: list[
-                ComparisonAndLevelDict
-            ] = Settings._get_comparison_levels_corresponding_to_training_blocking_rule(  # noqa
-                blocking_rule_sql=blocking_rule_for_training.blocking_rule_sql,
-                sqlglot_dialect_name=self.db_api.sql_dialect.sqlglot_name,
-                comparisons=core_model_settings.comparisons,
-            )
+        self._comparison_levels_to_reverse_blocking_rule: list[
+            ComparisonAndLevelDict
+        ] = Settings._get_comparison_levels_corresponding_to_training_blocking_rule(  # noqa
+            blocking_rule_sql=blocking_rule_for_training.blocking_rule_sql,
+            sqlglot_dialect_name=self.db_api.sql_dialect.sqlglot_name,
+            comparisons=core_model_settings.comparisons,
+        )
         # batch together fixed probabilities rather than keep hold of the bools
         self.training_fixed_probabilities: set[str] = {
@@ -104,19 +94,16 @@ class EMTrainingSession:
         }
         # Remove comparison columns which are either 'used up' by the blocking rules
-        # or alternatively, if the user has manually provided a list to remove,
-        # use this instead
-        if not comparisons_to_deactivate:
-            comparisons_to_deactivate = []
-            br_cols = get_columns_used_from_sql(
-                blocking_rule_for_training.blocking_rule_sql,
-                self.db_api.sql_dialect.sqlglot_name,
-            )
-            for cc in core_model_settings.comparisons:
-                cc_cols = cc._input_columns_used_by_case_statement
-                cc_cols = [c.input_name for c in cc_cols]
-                if set(br_cols).intersection(cc_cols):
-                    comparisons_to_deactivate.append(cc)
+        comparisons_to_deactivate = []
+        br_cols = get_columns_used_from_sql(
+            blocking_rule_for_training.blocking_rule_sql,
+            self.db_api.sql_dialect.sqlglot_name,
+        )
+        for cc in core_model_settings.comparisons:
+            cc_cols = cc._input_columns_used_by_case_statement
+            cc_cols = [c.input_name for c in cc_cols]
+            if set(br_cols).intersection(cc_cols):
+                comparisons_to_deactivate.append(cc)
         cc_names_to_deactivate = [
             cc.output_column_name for cc in comparisons_to_deactivate
         ]

{splink-4.0.0.dev8 → splink-4.0.0.dev9}/splink/internals/linker.py RENAMED Viewed

@@ -74,7 +74,7 @@ class Linker:
         self,
         input_table_or_tables: str | list[str],
         settings: SettingsCreator | dict[str, Any] | Path | str,
-        database_api: DatabaseAPISubClass,
+        db_api: DatabaseAPISubClass,
         set_up_basic_logging: bool = True,
         input_table_aliases: str | list[str] | None = None,
         validate_settings: bool = True,
@@ -112,10 +112,12 @@ class Linker:
                 database) for link_only or link_and_dedupe.  For some linkers, such as
                 the DuckDBLinker and the SparkLinker, it's also possible to pass in
                 dataframes (Pandas and Spark respectively) rather than strings.
-            settings_dict (dict | Path, optional): A Splink settings dictionary, or a
-                path to a json defining a settingss dictionary or pre-trained model.
-                If not provided when the object is created, can later be added using
-                `linker.load_settings()` or `linker.load_model()` Defaults to None.
+            settings_dict (dict | Path | str): A Splink settings dictionary,
+                or a path (either as a pathlib.Path object, or a string) to a json file
+                defining a settings dictionary or pre-trained model.
+            db_api (DatabaseAPI): A `DatabaseAPI` object, which manages interactions
+                with the database. You can import these for use from
+                `splink.backends.{your_backend}`
             set_up_basic_logging (bool, optional): If true, sets ups up basic logging
                 so that Splink sends messages at INFO level to stdout. Defaults to True.
             input_table_aliases (Union[str, list], optional): Labels assigned to
@@ -133,7 +135,7 @@ class Linker:
             splink_logger = logging.getLogger("splink")
             splink_logger.setLevel(logging.INFO)
-        self._db_api = database_api
+        self._db_api = db_api
         # TODO: temp hack for compat
         self._intermediate_table_cache: CacheDictWithLogging = (
@@ -154,9 +156,7 @@ class Linker:
         # or overwrite it with the db api dialect?
         # Maybe overwrite it here and incompatibilities have to be dealt with
         # by comparisons/ blocking rules etc??
-        self._settings_obj = settings_creator.get_settings(
-            database_api.sql_dialect.name
-        )
+        self._settings_obj = settings_creator.get_settings(db_api.sql_dialect.name)
         # TODO: Add test of what happens if the db_api is for a different backend
         # to the sql_dialect set in the settings dict

splink 4.0.0.dev8__tar.gz → 4.0.0.dev9__tar.gz

splink 4.0.0.dev8tar.gz → 4.0.0.dev9tar.gz