veridica 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
veridica-0.1.0/LICENCE ADDED
@@ -0,0 +1,7 @@
1
+ Copyright 2025 Sebastiaan Swanenberg
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,97 @@
1
+ Metadata-Version: 2.3
2
+ Name: veridica
3
+ Version: 0.1.0
4
+ Summary: Fuzzy matching for the use in gherkin based automated tests.
5
+ License: MIT
6
+ Author: Sebastiaan Swanenberg
7
+ Author-email: Sebastiaan@staeble.it
8
+ Requires-Python: >=3.12,<4.0
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Description-Content-Type: text/markdown
14
+
15
+ # Veridica
16
+
17
+ This project was made for the use of fuzzy comparison algorithmic comparisons for the use in gherkin based test automation.
18
+
19
+ We found that it was required to have full control over the system to limit dependencies. However that does not mean that others cannot use the code that I have developed and will use.
20
+
21
+ ## Overview
22
+
23
+ Veridica provides a set of pure Python algorithms to compare strings reliably based on fixed math based algorithms.
24
+
25
+ It includes:
26
+
27
+ - Classic string similarity metrics Classic string to string matching based on algorithms like levenstein.
28
+
29
+ - One-to-many batch comparisons While one to one comparisons can give you a sence of what you are looking for, most usecases involve the comparion for one item to a set. Therefore an implementation was made using classic and recursive techniques that can be directly implemented.
30
+
31
+ - Timing utilities for measuring comparison performance For now limited to a wrapper function that can give back the time it took to complete for compartive data.
32
+
33
+ ## Installation
34
+
35
+ You can install the package via **PyPI** or from **source**.
36
+
37
+ ### Install from PyPI
38
+
39
+ ```bash
40
+ pip install veridica
41
+
42
+ ```
43
+
44
+ ### Install from Source (GitHub)
45
+
46
+ ```bash
47
+ git clone https://github.com/sebastiaanswanenberg/veridica.git
48
+ cd veridica
49
+ pip install .
50
+ ```
51
+
52
+ ## Usage
53
+
54
+ After installation, you can use `veridica` to compare strings one to one or compare a string to a set.
55
+
56
+ ### Example: Comparing strings one-to-one.
57
+
58
+ ```python
59
+ from veridica.similarity import levenshtein
60
+
61
+ ```
62
+
63
+ ### Example: Comparing string to a set of strings.
64
+
65
+ ```python
66
+ from veridica.similarity import levenshtein
67
+ from veridica.batch import compare_many
68
+
69
+
70
+ ```
71
+
72
+ ### Example: Time your string to set calculations.
73
+
74
+ ```python
75
+ from veridica.similarity import levenshtein
76
+ from veridica.batch import compare_many
77
+ from veridica.timing import timed
78
+
79
+ ```
80
+
81
+ ## Testing
82
+
83
+ This project includes a test suite. You can run tests using `pytest`:
84
+
85
+ ```bash
86
+ pip install pytest
87
+ pytest tests
88
+ ```
89
+
90
+ ## License
91
+
92
+ This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for more details.
93
+
94
+ ## Contributing
95
+
96
+ Contributions are welcome! Please feel free to submit pull requests or open issues if you have any suggestions or find bugs.
97
+
@@ -0,0 +1,82 @@
1
+ # Veridica
2
+
3
+ This project was made for the use of fuzzy comparison algorithmic comparisons for the use in gherkin based test automation.
4
+
5
+ We found that it was required to have full control over the system to limit dependencies. However that does not mean that others cannot use the code that I have developed and will use.
6
+
7
+ ## Overview
8
+
9
+ Veridica provides a set of pure Python algorithms to compare strings reliably based on fixed math based algorithms.
10
+
11
+ It includes:
12
+
13
+ - Classic string similarity metrics Classic string to string matching based on algorithms like levenstein.
14
+
15
+ - One-to-many batch comparisons While one to one comparisons can give you a sence of what you are looking for, most usecases involve the comparion for one item to a set. Therefore an implementation was made using classic and recursive techniques that can be directly implemented.
16
+
17
+ - Timing utilities for measuring comparison performance For now limited to a wrapper function that can give back the time it took to complete for compartive data.
18
+
19
+ ## Installation
20
+
21
+ You can install the package via **PyPI** or from **source**.
22
+
23
+ ### Install from PyPI
24
+
25
+ ```bash
26
+ pip install veridica
27
+
28
+ ```
29
+
30
+ ### Install from Source (GitHub)
31
+
32
+ ```bash
33
+ git clone https://github.com/sebastiaanswanenberg/veridica.git
34
+ cd veridica
35
+ pip install .
36
+ ```
37
+
38
+ ## Usage
39
+
40
+ After installation, you can use `veridica` to compare strings one to one or compare a string to a set.
41
+
42
+ ### Example: Comparing strings one-to-one.
43
+
44
+ ```python
45
+ from veridica.similarity import levenshtein
46
+
47
+ ```
48
+
49
+ ### Example: Comparing string to a set of strings.
50
+
51
+ ```python
52
+ from veridica.similarity import levenshtein
53
+ from veridica.batch import compare_many
54
+
55
+
56
+ ```
57
+
58
+ ### Example: Time your string to set calculations.
59
+
60
+ ```python
61
+ from veridica.similarity import levenshtein
62
+ from veridica.batch import compare_many
63
+ from veridica.timing import timed
64
+
65
+ ```
66
+
67
+ ## Testing
68
+
69
+ This project includes a test suite. You can run tests using `pytest`:
70
+
71
+ ```bash
72
+ pip install pytest
73
+ pytest tests
74
+ ```
75
+
76
+ ## License
77
+
78
+ This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for more details.
79
+
80
+ ## Contributing
81
+
82
+ Contributions are welcome! Please feel free to submit pull requests or open issues if you have any suggestions or find bugs.
@@ -0,0 +1,19 @@
1
+ [tool.poetry]
2
+ name = "veridica"
3
+ version = "0.1.0"
4
+ description = "Fuzzy matching for the use in gherkin based automated tests."
5
+ authors = ["Sebastiaan Swanenberg <Sebastiaan@staeble.it>"]
6
+ license = "MIT"
7
+ readme = "README.md"
8
+
9
+ [tool.poetry.dependencies]
10
+ python = "^3.12"
11
+
12
+ [tool.poetry.dev-dependencies]
13
+ pytest = "^7.4"
14
+ black = "^24.9"
15
+ isort = "^6.0"
16
+
17
+ [build-system]
18
+ requires = ["poetry-core"]
19
+ build-backend = "poetry.core.masonry.api"
File without changes
@@ -0,0 +1,18 @@
1
+ """One-to-many string comparison helpers."""
2
+
3
+ from typing import Iterable, Callable
4
+
5
+ def compare_many(target: str, candidates: Iterable[str], comparator: Callable[[str, str], float]) -> list[tuple[str, float]]:
6
+ """
7
+ Compare a single target string against many candidates using the given comparator that returns a list with a numeric score.
8
+
9
+ Args:
10
+ target (str): string to compare
11
+ candidates (Iterable[str]): strings to compare against
12
+ comparator (Callable[[str, str], float]): function returning a numeric score for similarity
13
+
14
+ Returns:
15
+ list[tuple[str, float]]: (candidate, score) pairs ordered by descending score (best matches first)
16
+ """
17
+ scores = [(candidate, float(comparator(target, candidate))) for candidate in candidates]
18
+ return sorted(scores, key=lambda pair: pair[1], reverse=True)
@@ -0,0 +1 @@
1
+ """Helper functions for the normalisation of scores when using multiple algorithms."""
@@ -0,0 +1,107 @@
1
+ """String similarity measurement functions."""
2
+
3
+ from collections import defaultdict
4
+
5
+ def levenshtein_distance(s1: str, s2: str) -> int:
6
+ """Calculate the levenstein distance between two strings"""
7
+
8
+ if s1 == s2:
9
+ return 0
10
+ rows = len(s1) + 1
11
+ cols = len(s2) + 1
12
+
13
+ if not s1:
14
+ return cols - 1
15
+ if not s2:
16
+ return rows - 1
17
+
18
+ prev = None
19
+ cur = range(cols)
20
+ for r in range(1, rows):
21
+ prev, cur = cur, [r] + [0] * (cols - 1)
22
+ for c in range(1, cols):
23
+ deletion = prev[c] + 1
24
+ insertion = cur[c - 1] + 1
25
+ edit = prev[c - 1] + (0 if s1[r - 1] == s2[c - 1] else 1)
26
+ cur[c] = min(edit, deletion, insertion)
27
+
28
+ return cur[-1]
29
+
30
+ def jaccard_distance(s1: str, s2: str) -> float:
31
+ """Calculate the Jaccard distance between two strings."""
32
+ # split on whitespace into token sets
33
+ set1 = set(s1.split())
34
+ set2 = set(s2.split())
35
+
36
+ # both empty -> identical
37
+ if not set1 and not set2:
38
+ return 0.0
39
+
40
+ intersection = set1 & set2
41
+ union = set1 | set2
42
+
43
+ # guard against division by zero (shouldn't happen due to check above)
44
+ if not union:
45
+ return 0.0
46
+
47
+ return 1.0 - (len(intersection) / len(union))
48
+
49
+ def hamming_distance(s1: str, s2: str) -> float:
50
+ """Calculate the Hamming distance between two strings."""
51
+ # ensure length of s1 >= s2
52
+ if len(s2) > len(s1):
53
+ s1, s2 = s2, s1
54
+
55
+ # distance is difference in length + differing chars
56
+ distance = len(s1) - len(s2)
57
+ for i, c in enumerate(s2):
58
+ if c != s1[i]:
59
+ distance += 1
60
+
61
+ return distance
62
+
63
+ def damerau_levenshtein_distance(s1: str, s2: str) -> int:
64
+ """Calculate the Damerau-Levenshtein distance between two strings."""
65
+
66
+ if not isinstance(s1, str):
67
+ return 0
68
+ if not isinstance(s2, str):
69
+ return 0
70
+
71
+ len1 = len(s1)
72
+ len2 = len(s2)
73
+ infinite = len1 + len2
74
+
75
+ # character array
76
+ da = defaultdict(int)
77
+
78
+ # distance matrix
79
+ score = [[0] * (len2 + 2) for x in range(len1 + 2)]
80
+
81
+ score[0][0] = infinite
82
+ for i in range(0, len1 + 1):
83
+ score[i + 1][0] = infinite
84
+ score[i + 1][1] = i
85
+ for i in range(0, len2 + 1):
86
+ score[0][i + 1] = infinite
87
+ score[1][i + 1] = i
88
+
89
+ for i in range(1, len1 + 1):
90
+ db = 0
91
+ for j in range(1, len2 + 1):
92
+ i1 = da[s2[j - 1]]
93
+ j1 = db
94
+ cost = 1
95
+ if s1[i - 1] == s2[j - 1]:
96
+ cost = 0
97
+ db = j
98
+
99
+ score[i + 1][j + 1] = min(
100
+ score[i][j] + cost,
101
+ score[i + 1][j] + 1,
102
+ score[i][j + 1] + 1,
103
+ score[i1][j1] + (i - i1 - 1) + 1 + (j - j1 - 1),
104
+ )
105
+ da[s1[i - 1]] = i
106
+
107
+ return score[len1 + 1][len2 + 1]
@@ -0,0 +1,26 @@
1
+ """Timing decorators and utilities for measuring comparison performance."""
2
+
3
+ import time
4
+ from functools import wraps
5
+ from typing import Callable, Any
6
+
7
+ def timed(func: Callable) -> Callable:
8
+ """
9
+ Decorator to measure the execution time of a function.
10
+
11
+ Usage:
12
+ @timed
13
+ def my_func(...):
14
+ ...
15
+
16
+ Returns:
17
+ function: wrapped function
18
+ """
19
+ @wraps(func)
20
+ def wrapper(*args, **kwargs) -> tuple[Any, float]:
21
+ start = time.perf_counter()
22
+ end = time.perf_counter()
23
+ elapsed = end - start
24
+ result = func(*args, **kwargs)
25
+ return elapsed, result
26
+ return wrapper