stark-engine 4.2.0__tar.gz → 4.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {stark_engine-4.2.0 → stark_engine-4.2.1}/PKG-INFO +1 -1
- {stark_engine-4.2.0 → stark_engine-4.2.1}/pyproject.toml +1 -1
- stark_engine-4.2.1/stark/tools/sliding_window_parser.py +139 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/LICENSE.md +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/README.md +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/__init__.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/core/__init__.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/core/command.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/core/commands_context.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/core/commands_manager.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/core/patterns/__init__.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/core/patterns/parsing.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/core/patterns/pattern.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/core/patterns/rules.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/core/types/__init__.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/core/types/number.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/core/types/object.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/core/types/slots.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/core/types/string.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/core/types/time.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/core/types/time_interval.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/core/types/word.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/general/blockage_detector.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/general/classproperty.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/general/dependencies.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/general/json_encoder.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/interfaces/gcloud.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/interfaces/protocols.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/interfaces/silero.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/interfaces/vosk.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/tools/common/span.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/tools/dictionary/!examples.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/tools/dictionary/__init__.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/tools/dictionary/dictionary.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/tools/dictionary/models.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/tools/dictionary/nl_dictionary_name.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/tools/dictionary/storage/__init__.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/tools/dictionary/storage/storage_memory.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/tools/dictionary/storage/storage_sqlite.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/tools/levenshtein/__init__.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/tools/levenshtein/levenshtein.pyi +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/tools/levenshtein/levenshtein.pyx +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/tools/phonetic/espeak_ng.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/tools/phonetic/ipa.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/tools/phonetic/simplephone.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/tools/strtools.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/voice_assistant/__init__.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/voice_assistant/mode.py +0 -0
- {stark_engine-4.2.0 → stark_engine-4.2.1}/stark/voice_assistant/voice_assistant.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "stark-engine"
|
|
3
|
-
version = "4.2.
|
|
3
|
+
version = "4.2.1"
|
|
4
4
|
description = "S.T.A.R.K - Speech and Text Algorithmic Recognition Kit. Modern framework for creating powerfull voice assistants."
|
|
5
5
|
authors = ["MarkParker5 <mark@parker-programs.com>"]
|
|
6
6
|
license = "CC BY-NC-SA 4.0"
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from typing import Awaitable, Callable
|
|
3
|
+
from stark.core.patterns.parsing import ParseError
|
|
4
|
+
from stark.tools.common.span import Span
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _token_span_to_char_span(tokens: list[str], span: Span, phrase: str) -> Span:
|
|
8
|
+
"""Convert a token span (by index) to a character span in the original phrase."""
|
|
9
|
+
if not tokens or not (0 <= span.start <= span.end <= len(tokens)):
|
|
10
|
+
return Span(0, 0)
|
|
11
|
+
# Find the start and end char positions of the tokens in the original phrase
|
|
12
|
+
positions = []
|
|
13
|
+
idx = 0
|
|
14
|
+
for token in tokens:
|
|
15
|
+
# skip leading spaces
|
|
16
|
+
while idx < len(phrase) and phrase[idx].isspace():
|
|
17
|
+
idx += 1
|
|
18
|
+
start = idx
|
|
19
|
+
idx += len(token)
|
|
20
|
+
end = idx
|
|
21
|
+
positions.append((start, end))
|
|
22
|
+
if not positions or span.start >= len(positions) or span.end > len(positions):
|
|
23
|
+
return Span(0, 0)
|
|
24
|
+
char_start = positions[span.start][0]
|
|
25
|
+
char_end = (
|
|
26
|
+
positions[span.end - 1][1]
|
|
27
|
+
if span.end > span.start
|
|
28
|
+
else positions[span.start][0]
|
|
29
|
+
)
|
|
30
|
+
return Span(char_start, char_end)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
async def _binary_cookie_trim[T](
|
|
34
|
+
tokens: list[str],
|
|
35
|
+
start: int,
|
|
36
|
+
end: int,
|
|
37
|
+
parser: Callable[[str], Awaitable[T]],
|
|
38
|
+
baseline_value: T,
|
|
39
|
+
phrase: str,
|
|
40
|
+
) -> tuple[Span, str, T]:
|
|
41
|
+
"""
|
|
42
|
+
Return minimal (char Span, substring, value) such that
|
|
43
|
+
parser(' '.join(tokens[span.start:span.end])) == baseline_value.
|
|
44
|
+
"""
|
|
45
|
+
# Binary search for the leftmost index such that tokens[left:end] still parses to baseline_value.
|
|
46
|
+
left = start
|
|
47
|
+
l_low, l_high = start, end - 1
|
|
48
|
+
while l_low <= l_high:
|
|
49
|
+
mid = (l_low + l_high) // 2
|
|
50
|
+
try:
|
|
51
|
+
r = await parser(" ".join(tokens[mid:end]))
|
|
52
|
+
except ParseError:
|
|
53
|
+
r = None
|
|
54
|
+
if r == baseline_value:
|
|
55
|
+
left = mid
|
|
56
|
+
l_low = mid + 1
|
|
57
|
+
else:
|
|
58
|
+
l_high = mid - 1
|
|
59
|
+
|
|
60
|
+
# Binary search for the rightmost index such that tokens[left:right] still parses to baseline_value.
|
|
61
|
+
right = end
|
|
62
|
+
r_low, r_high = left + 1, end
|
|
63
|
+
while r_low <= r_high:
|
|
64
|
+
mid = (r_low + r_high) // 2
|
|
65
|
+
try:
|
|
66
|
+
res = await parser(" ".join(tokens[left:mid]))
|
|
67
|
+
except ParseError:
|
|
68
|
+
res = None
|
|
69
|
+
if res == baseline_value:
|
|
70
|
+
right = mid
|
|
71
|
+
r_high = mid - 1
|
|
72
|
+
else:
|
|
73
|
+
r_low = mid + 1
|
|
74
|
+
token_span = Span(left, right)
|
|
75
|
+
char_span = _token_span_to_char_span(tokens, token_span, phrase)
|
|
76
|
+
substr = phrase[char_span.start : char_span.end]
|
|
77
|
+
return char_span, substr, baseline_value
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
async def sliding_window_parse[T](
|
|
81
|
+
phrase: str,
|
|
82
|
+
parser: Callable[[str], Awaitable[T]],
|
|
83
|
+
min_window: int = 1,
|
|
84
|
+
max_window: int | None = None,
|
|
85
|
+
concurrency: int | None = None,
|
|
86
|
+
find_one: bool = True,
|
|
87
|
+
) -> list[tuple[Span, str, T]]:
|
|
88
|
+
tokens: list[str] = phrase.split()
|
|
89
|
+
n: int = len(tokens)
|
|
90
|
+
if n == 0 or parser is None:
|
|
91
|
+
return None
|
|
92
|
+
if max_window is None:
|
|
93
|
+
max_window = n
|
|
94
|
+
|
|
95
|
+
if concurrency is not None and concurrency > 0:
|
|
96
|
+
# Use a semaphore to limit concurrency of parser calls.
|
|
97
|
+
sem = asyncio.Semaphore(concurrency)
|
|
98
|
+
|
|
99
|
+
async def try_window(i: int, j: int) -> T:
|
|
100
|
+
async with sem:
|
|
101
|
+
try:
|
|
102
|
+
return await parser(" ".join(tokens[i:j]))
|
|
103
|
+
except ParseError:
|
|
104
|
+
return None
|
|
105
|
+
else:
|
|
106
|
+
|
|
107
|
+
async def try_window(i: int, j: int) -> T:
|
|
108
|
+
try:
|
|
109
|
+
return await parser(" ".join(tokens[i:j]))
|
|
110
|
+
except ParseError:
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
# Slide a window of decreasing size over the tokens, left to right.
|
|
114
|
+
# Try parsing for each window. Once successful, trim to minimal window.
|
|
115
|
+
results: list[tuple[Span, str, T]] = []
|
|
116
|
+
for window_size in range(min(max_window, n), min_window - 1, -1):
|
|
117
|
+
for start in range(0, n - window_size + 1):
|
|
118
|
+
end = start + window_size
|
|
119
|
+
try:
|
|
120
|
+
res = await try_window(start, end)
|
|
121
|
+
except ParseError:
|
|
122
|
+
res = None
|
|
123
|
+
if res is None:
|
|
124
|
+
continue
|
|
125
|
+
char_span, substr, value = await _binary_cookie_trim(
|
|
126
|
+
tokens, start, end, parser, res, phrase
|
|
127
|
+
)
|
|
128
|
+
result = (char_span, substr, value)
|
|
129
|
+
if find_one:
|
|
130
|
+
return [result]
|
|
131
|
+
else:
|
|
132
|
+
results.append(result)
|
|
133
|
+
# TODO: limit next windows left edge to char_span.end
|
|
134
|
+
|
|
135
|
+
if results:
|
|
136
|
+
return results
|
|
137
|
+
|
|
138
|
+
# If no valid window is found, raise an error.
|
|
139
|
+
raise ParseError(f"No valid window found using parser={parser} in phrase={phrase}")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|