telugu-language-tools 5.1.0__py3-none-any.whl → 5.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of telugu-language-tools might be problematic. Click here for more details.
- telugu_engine/__init__.py +6 -22
- telugu_engine/enhanced_tense.py +184 -649
- telugu_engine/transliterator.py +95 -125
- {telugu_language_tools-5.1.0.dist-info → telugu_language_tools-5.5.0.dist-info}/METADATA +39 -7
- telugu_language_tools-5.5.0.dist-info/RECORD +12 -0
- telugu_engine/tense_engine.py +0 -391
- telugu_language_tools-5.1.0.dist-info/RECORD +0 -13
- {telugu_language_tools-5.1.0.dist-info → telugu_language_tools-5.5.0.dist-info}/WHEEL +0 -0
- {telugu_language_tools-5.1.0.dist-info → telugu_language_tools-5.5.0.dist-info}/licenses/LICENSE +0 -0
- {telugu_language_tools-5.1.0.dist-info → telugu_language_tools-5.5.0.dist-info}/top_level.txt +0 -0
telugu_engine/transliterator.py
CHANGED
|
@@ -1,14 +1,11 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Telugu Library v4.0
|
|
2
|
+
Telugu Library v4.3.0 — Enhanced Clusters
|
|
3
3
|
----------------------------------
|
|
4
|
-
Fixes based on
|
|
5
|
-
-
|
|
6
|
-
-
|
|
7
|
-
-
|
|
8
|
-
-
|
|
9
|
-
- Fixed syntax error in list initialization.
|
|
10
|
-
- Minor test corrections (taadu→తాదు).
|
|
11
|
-
|
|
4
|
+
Fixes based on user feedback:
|
|
5
|
+
- **Enhanced Clusters:** Added numerous 3- and 4-character consonant clusters (e.g., 'str', 'sht', 'skr', 'STh') to the 'clusters' dictionary for greater accuracy.
|
|
6
|
+
- **CRITICAL FIX (C+ri Matra):** Ensured consonant-r-i sequences are correctly parsed as C + R + I-matra.
|
|
7
|
+
- **Refined Nasal Handling:** Simplified internal nasal cluster handling to rely more heavily on the central 'clusters' map for complex cases like 'namste'.
|
|
8
|
+
- **Case Sensitivity Maintained:** Retains case distinction for retroflex consonants (T, D, N, S).
|
|
12
9
|
"""
|
|
13
10
|
|
|
14
11
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
@@ -34,16 +31,11 @@ def normalize_roman_input(text: str) -> str:
|
|
|
34
31
|
|
|
35
32
|
def eng_to_telugu_base(text: str, rules: dict) -> str:
|
|
36
33
|
"""
|
|
37
|
-
Core transliteration engine (v4.0
|
|
38
|
-
Handles:
|
|
39
|
-
• geminates (kk, ll, tt, pp, mm, …)
|
|
40
|
-
• long vowels in all positions (aa, ee, ii, uu, oo)
|
|
41
|
-
• clusters (dr, tr, pr, …)
|
|
42
|
-
• word-final vowels
|
|
34
|
+
Core transliteration engine (v4.3.0 REVISED).
|
|
43
35
|
"""
|
|
44
36
|
text = normalize_roman_input(text or "")
|
|
45
|
-
# V4.0
|
|
46
|
-
text = text.strip()
|
|
37
|
+
# V4.3.0: DO NOT lowercase.
|
|
38
|
+
text = text.strip()
|
|
47
39
|
|
|
48
40
|
consonants = rules.get("consonants", {})
|
|
49
41
|
vowels = rules.get("vowels", {})
|
|
@@ -55,17 +47,17 @@ def eng_to_telugu_base(text: str, rules: dict) -> str:
|
|
|
55
47
|
# Pre-sort consonant keys by length for longest-first matching
|
|
56
48
|
cons_keys = sorted(consonants.keys(), key=len, reverse=True)
|
|
57
49
|
|
|
58
|
-
result = []
|
|
50
|
+
result = []
|
|
59
51
|
i = 0
|
|
60
52
|
prev_was_consonant = False
|
|
61
53
|
|
|
62
54
|
def attach_matra(matra_key: str):
|
|
63
55
|
"""Attach matra to the last emitted consonant glyph."""
|
|
56
|
+
matra_key_lower = matra_key.lower()
|
|
64
57
|
if not result:
|
|
65
|
-
|
|
66
|
-
result.append(vowels.get(matra_key, ""))
|
|
58
|
+
result.append(vowels.get(matra_key_lower, ""))
|
|
67
59
|
return
|
|
68
|
-
result.append(matras.get(
|
|
60
|
+
result.append(matras.get(matra_key_lower, ""))
|
|
69
61
|
|
|
70
62
|
def emit_consonant(tok: str, join_prev=False):
|
|
71
63
|
nonlocal prev_was_consonant
|
|
@@ -75,37 +67,22 @@ def eng_to_telugu_base(text: str, rules: dict) -> str:
|
|
|
75
67
|
prev_was_consonant = True
|
|
76
68
|
|
|
77
69
|
while i < len(text):
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
chunk4 = text[i:i+4]
|
|
81
|
-
chunk3 = text[i:i+3]
|
|
82
|
-
chunk2 = text[i:i+2]
|
|
83
|
-
ch = text[i]
|
|
84
|
-
|
|
85
|
-
# NOTE: Original Rule 1 (r + vowel shortcut) has been removed (V4.0.7)
|
|
86
|
-
# C+V sequences are handled via standard consonant+vowel rules below.
|
|
70
|
+
chunk5, chunk4, chunk3, chunk2 = text[i:i+5], text[i:i+4], text[i:i+3], text[i:i+2]
|
|
71
|
+
ch = text[i]
|
|
87
72
|
|
|
88
|
-
# 1) Nasal clusters (longest first)
|
|
73
|
+
# 1) Nasal clusters (longest first, explicitly handled before general clusters)
|
|
89
74
|
nasal_map = {
|
|
90
|
-
#
|
|
91
|
-
"nchh": "ంఛ", "njh": "ంఝ", "nkh": "ంఖ", "ngh": "ంఘ",
|
|
92
|
-
"nth": "ంథ", "ndh": "ంధ", "mph": "ంఫ", "mbh": "ంభ",
|
|
93
|
-
# 3-char
|
|
94
|
-
"nch": "ంచ", "nj": "ంజ", "nT": "ంట", "nD": "ండ",
|
|
95
|
-
# 2-char homorganic
|
|
75
|
+
# Homorganic clusters
|
|
96
76
|
"nk": "ంక", "ng": "ంగ", "nt": "ంత",
|
|
97
|
-
"nd": "ండ",
|
|
98
|
-
|
|
99
|
-
#
|
|
100
|
-
"ms": "మ్స", "mr": "మ్ర", "ml": "మ్ల", "mv": "మ్వ",
|
|
101
|
-
"ns": "న్స", "ny": "న్య",
|
|
77
|
+
"nd": "ండ", "mp": "ంప", "mb": "ంబ",
|
|
78
|
+
# Pre-clustered units (e.g., from v4.1 fix for namste)
|
|
79
|
+
"namst": "నమ్స్త్", # Handles the initial part of namaste
|
|
102
80
|
}
|
|
103
81
|
matched = False
|
|
104
|
-
for L in (4, 3, 2):
|
|
82
|
+
for L in (5, 4, 3, 2):
|
|
105
83
|
if i + L <= len(text):
|
|
106
84
|
sub = text[i:i+L]
|
|
107
85
|
if sub in nasal_map:
|
|
108
|
-
# treat as a pre-formed syllabic piece
|
|
109
86
|
result.append(nasal_map[sub])
|
|
110
87
|
i += L
|
|
111
88
|
prev_was_consonant = True
|
|
@@ -114,26 +91,41 @@ def eng_to_telugu_base(text: str, rules: dict) -> str:
|
|
|
114
91
|
if matched:
|
|
115
92
|
continue
|
|
116
93
|
|
|
117
|
-
# 2) Geminate detection (kk, ll, …)
|
|
118
|
-
if len(chunk2) == 2 and chunk2[0] == chunk2[1] and chunk2[0] in consonants:
|
|
94
|
+
# 2) Geminate detection (kk, ll, TT, DD, …)
|
|
95
|
+
if len(chunk2) == 2 and chunk2[0] == chunk2[1] and chunk2[0] in (consonants.keys()):
|
|
119
96
|
if chunk2 in geminates:
|
|
120
|
-
# explicit mapping like "ల్ల"
|
|
121
97
|
result.append(geminates[chunk2])
|
|
122
|
-
|
|
123
|
-
# fallback: C + virama + C
|
|
98
|
+
elif chunk2[0] in consonants:
|
|
124
99
|
base = consonants[chunk2[0]]
|
|
125
100
|
result.append(base + "్" + base)
|
|
126
101
|
prev_was_consonant = True
|
|
127
102
|
i += 2
|
|
128
103
|
continue
|
|
129
104
|
|
|
130
|
-
# 3)
|
|
105
|
+
# 3) CRITICAL FIX: The C+R+i Matra sequence (e.g., 'kri')
|
|
106
|
+
# This resolves the conflict between 'kri' and vocalic 'kru'
|
|
107
|
+
if prev_was_consonant and len(chunk3) >= 2 and chunk2.lower() == 'ri':
|
|
108
|
+
# The previous token must have been a consonant. We now emit the 'r' consonant, virama, and 'i' matra.
|
|
109
|
+
# This is complex and often manually implemented: C + ్ + ర + ి
|
|
110
|
+
|
|
111
|
+
# Use 'r' consonant with virama
|
|
112
|
+
emit_consonant('r', join_prev=True)
|
|
113
|
+
|
|
114
|
+
# Add 'i' matra
|
|
115
|
+
attach_matra('i')
|
|
116
|
+
|
|
117
|
+
# Consumed 'ri' (2 chars) from the stream.
|
|
118
|
+
prev_was_consonant = False # Vowel consumes the consonant state
|
|
119
|
+
i += 2
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
# 4) Regular clusters (5→4→3→2 letters, including newly added ones)
|
|
131
124
|
for L in (5, 4, 3, 2):
|
|
132
125
|
sub = text[i:i+L]
|
|
133
126
|
if sub in clusters:
|
|
134
127
|
if prev_was_consonant:
|
|
135
128
|
result.append("్")
|
|
136
|
-
# expand tokens inside cluster, joining with virama
|
|
137
129
|
toks = clusters[sub]
|
|
138
130
|
for idx, tk in enumerate(toks):
|
|
139
131
|
emit_consonant(tk, join_prev=(idx > 0))
|
|
@@ -142,18 +134,19 @@ def eng_to_telugu_base(text: str, rules: dict) -> str:
|
|
|
142
134
|
break
|
|
143
135
|
if matched:
|
|
144
136
|
continue
|
|
145
|
-
|
|
146
|
-
#
|
|
147
|
-
|
|
137
|
+
|
|
138
|
+
# 5) Two-letter Vowels/Matras (aa, ee, ii, uu, oo, rii, ai, au)
|
|
139
|
+
chunk2_lower = chunk2.lower()
|
|
140
|
+
if chunk2_lower in vowels or chunk2_lower in matras:
|
|
148
141
|
if prev_was_consonant:
|
|
149
|
-
attach_matra(
|
|
142
|
+
attach_matra(chunk2_lower)
|
|
150
143
|
prev_was_consonant = False
|
|
151
144
|
else:
|
|
152
|
-
result.append(vowels
|
|
145
|
+
result.append(vowels.get(chunk2_lower, ""))
|
|
153
146
|
i += 2
|
|
154
147
|
continue
|
|
155
148
|
|
|
156
|
-
#
|
|
149
|
+
# 6) Two-letter consonants (e.g., 'sh', 'Dh') - case sensitive
|
|
157
150
|
if chunk2 in consonants:
|
|
158
151
|
if prev_was_consonant:
|
|
159
152
|
result.append("్")
|
|
@@ -161,25 +154,25 @@ def eng_to_telugu_base(text: str, rules: dict) -> str:
|
|
|
161
154
|
i += 2
|
|
162
155
|
continue
|
|
163
156
|
|
|
164
|
-
#
|
|
165
|
-
|
|
166
|
-
|
|
157
|
+
# 7) Single-letter Vowels/Matras (a, i, u, e, o, am, ah)
|
|
158
|
+
ch_lower = ch.lower()
|
|
159
|
+
if ch_lower in vowels or ch_lower in matras:
|
|
160
|
+
if ch_lower == 'a' and prev_was_consonant:
|
|
167
161
|
# inherent 'a' → no matra
|
|
168
162
|
prev_was_consonant = False
|
|
169
163
|
i += 1
|
|
170
164
|
continue
|
|
171
165
|
if prev_was_consonant:
|
|
172
|
-
attach_matra(
|
|
166
|
+
attach_matra(ch_lower)
|
|
173
167
|
prev_was_consonant = False
|
|
174
168
|
else:
|
|
175
|
-
result.append(vowels
|
|
169
|
+
result.append(vowels.get(ch_lower, ""))
|
|
176
170
|
i += 1
|
|
177
171
|
continue
|
|
178
172
|
|
|
179
|
-
#
|
|
173
|
+
# 8) Single-letter consonants (e.g., 'k', 'T', 'S') - case sensitive
|
|
180
174
|
matched_cons = None
|
|
181
175
|
for k in cons_keys:
|
|
182
|
-
# Note: Case sensitivity is maintained here thanks to V4.0.8 fix.
|
|
183
176
|
if text.startswith(k, i):
|
|
184
177
|
matched_cons = k
|
|
185
178
|
break
|
|
@@ -190,7 +183,7 @@ def eng_to_telugu_base(text: str, rules: dict) -> str:
|
|
|
190
183
|
i += len(matched_cons)
|
|
191
184
|
continue
|
|
192
185
|
|
|
193
|
-
#
|
|
186
|
+
# 9) Anything else (spaces/punct/digits)
|
|
194
187
|
result.append(ch)
|
|
195
188
|
prev_was_consonant = False
|
|
196
189
|
i += 1
|
|
@@ -203,7 +196,7 @@ def eng_to_telugu_base(text: str, rules: dict) -> str:
|
|
|
203
196
|
|
|
204
197
|
|
|
205
198
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
206
|
-
# Tables
|
|
199
|
+
# Tables (Clusters Enhanced in v4.3.0)
|
|
207
200
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
208
201
|
|
|
209
202
|
def get_geminates():
|
|
@@ -213,47 +206,34 @@ def get_geminates():
|
|
|
213
206
|
"tt": "త్త", "dd": "ద్ద", "pp": "ప్ప", "bb": "బ్బ",
|
|
214
207
|
"mm": "మ్మ", "yy": "య్య", "rr": "ర్ర", "ll": "ల్ల",
|
|
215
208
|
"vv": "వ్వ", "ss": "స్స", "nn": "న్న",
|
|
216
|
-
# Retroflex geminates via uppercase tokens if used:
|
|
217
209
|
"TT": "ట్ట", "DD": "డ్డ", "NN": "ణ్ణ",
|
|
218
210
|
}
|
|
219
211
|
|
|
220
212
|
def get_base_consonants(style="modern"):
|
|
221
|
-
"""Modern consonants (
|
|
222
|
-
# V4.0.7: Complex clusters 'ksha' and 'jna' removed; handled by the cluster mechanism (Rule 3).
|
|
213
|
+
"""Modern consonants (dental vs retroflex distinction is via case)."""
|
|
223
214
|
base = {
|
|
224
|
-
# stops/affricates
|
|
225
215
|
"k": "క", "kh": "ఖ", "g": "గ", "gh": "ఘ",
|
|
226
216
|
"c": "చ", "ch": "చ", "chh": "ఛ", "j": "జ", "jh": "ఝ",
|
|
227
217
|
"t": "త", "th": "థ", "d": "ద", "dh": "ధ", "n": "న",
|
|
228
|
-
# retroflex (UPPER tokens are preserved by V4.0.8 fix)
|
|
229
218
|
"T": "ట", "Th": "ఠ", "D": "డ", "Dh": "ఢ", "N": "ణ",
|
|
230
|
-
# labials
|
|
231
219
|
"p": "ప", "ph": "ఫ", "b": "బ", "bh": "భ", "m": "మ",
|
|
232
|
-
# sonorants
|
|
233
220
|
"y": "య", "r": "ర", "l": "ల", "v": "వ", "w": "వ",
|
|
234
|
-
|
|
235
|
-
"sh": "శ", # palatal ś
|
|
236
|
-
"S": "ష", # retroflex ṣ
|
|
237
|
-
"s": "స",
|
|
221
|
+
"sh": "శ", "S": "ష", "s": "స",
|
|
238
222
|
"h": "హ",
|
|
239
223
|
}
|
|
240
224
|
return base
|
|
241
225
|
|
|
242
226
|
def get_base_vowels(style="modern"):
|
|
243
|
-
"""Vowel letters."""
|
|
227
|
+
"""Vowel letters (keys must be lowercase for consistency)."""
|
|
244
228
|
return {
|
|
245
|
-
# short
|
|
246
229
|
"a": "అ", "i": "ఇ", "u": "ఉ", "e": "ఎ", "o": "ఒ",
|
|
247
|
-
# long
|
|
248
230
|
"aa": "ఆ", "ii": "ఈ", "uu": "ఊ", "ee": "ఏ", "oo": "ఓ",
|
|
249
|
-
# diphthongs
|
|
250
231
|
"ai": "ఐ", "au": "ఔ",
|
|
251
|
-
# special marks / vocalics
|
|
252
232
|
"am": "ం", "ah": "ః", "ri": "ఋ", "rii": "ౠ",
|
|
253
233
|
}
|
|
254
234
|
|
|
255
235
|
def get_base_matras(style="modern"):
|
|
256
|
-
"""Dependent vowel signs (
|
|
236
|
+
"""Dependent vowel signs (keys must be lowercase for consistency)."""
|
|
257
237
|
return {
|
|
258
238
|
"a": "",
|
|
259
239
|
"aa": "ా", "i": "ి", "ii": "ీ",
|
|
@@ -266,21 +246,32 @@ def get_base_matras(style="modern"):
|
|
|
266
246
|
}
|
|
267
247
|
|
|
268
248
|
def get_clusters(style="modern"):
|
|
269
|
-
"""Common consonant clusters in token space."""
|
|
249
|
+
"""Common consonant clusters in token space. (v4.3.0 Enhanced)"""
|
|
270
250
|
return {
|
|
271
|
-
# 4
|
|
272
|
-
"ksha": ["k", "S"],
|
|
251
|
+
# 4-Character Clusters (Complex conjuncts)
|
|
252
|
+
"ksha": ["k", "S"],
|
|
273
253
|
"shra": ["S", "r"],
|
|
274
254
|
"shna": ["S", "n"],
|
|
255
|
+
"SThr": ["S", "Th", "r"], # retroflex S, retroflex Th, r
|
|
256
|
+
"skr": ["s", "k", "r"], # s, k, r
|
|
257
|
+
"spl": ["s", "p", "l"], # s, p, l
|
|
258
|
+
|
|
259
|
+
# 3-Character Clusters (Highly requested)
|
|
260
|
+
"ndr": ["n", "d", "r"], # n, d, r
|
|
261
|
+
"str": ["s", "t", "r"], # s, t, r
|
|
262
|
+
"sht": ["sh", "T"], # sh, retroflex T
|
|
263
|
+
"bhr": ["bh", "r"], # bh, r
|
|
264
|
+
"mbr": ["m", "b", "r"], # m, b, r
|
|
265
|
+
"kst": ["k", "s", "t"], # k, s, t
|
|
266
|
+
"njn": ["n", "j", "n"], # n, j, n
|
|
267
|
+
|
|
268
|
+
# 2-Character Clusters (Base list)
|
|
275
269
|
"jna": ["j", "n"],
|
|
276
|
-
# 3
|
|
277
270
|
"tra": ["t", "r"], "dra": ["d", "r"], "pra": ["p", "r"],
|
|
278
271
|
"bhra": ["bh", "r"], "gva": ["g", "v"], "tna": ["t", "n"],
|
|
279
|
-
"ntr": ["n", "t", "r"], "ndr": ["n", "d", "r"],
|
|
280
|
-
# 2 (r/l/v clusters etc.)
|
|
281
272
|
"kr": ["k", "r"], "tr": ["t", "r"], "dr": ["d", "r"],
|
|
282
273
|
"gr": ["g", "r"], "pr": ["p", "r"], "br": ["b", "r"],
|
|
283
|
-
"
|
|
274
|
+
"sr": ["s", "r"], "nr": ["n", "r"],
|
|
284
275
|
"kl": ["k", "l"], "gl": ["g", "l"], "pl": ["p", "l"], "bl": ["b", "l"],
|
|
285
276
|
"kv": ["k", "v"], "tv": ["t", "v"], "dv": ["d", "v"],
|
|
286
277
|
"tn": ["t", "n"], "dn": ["d", "n"], "kn": ["k", "n"], "pn": ["p", "n"],
|
|
@@ -314,47 +305,26 @@ def eng_to_telugu(text: str, strip_final_virama: bool = True) -> str:
|
|
|
314
305
|
|
|
315
306
|
|
|
316
307
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
317
|
-
# Tests (updated for v4.0
|
|
308
|
+
# Tests (updated for v4.3.0)
|
|
318
309
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
319
310
|
|
|
320
311
|
if __name__ == "__main__":
|
|
321
312
|
print("=" * 80)
|
|
322
|
-
print("TELUGU LIBRARY v4.0
|
|
313
|
+
print("TELUGU LIBRARY v4.3.0 — ENHANCED CLUSTER TESTS")
|
|
323
314
|
print("=" * 80)
|
|
324
315
|
|
|
325
316
|
tests = [
|
|
326
|
-
#
|
|
327
|
-
("
|
|
328
|
-
("
|
|
329
|
-
("
|
|
330
|
-
("
|
|
331
|
-
("
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
("
|
|
336
|
-
("
|
|
337
|
-
("koodu", "కూడు", "oo/uu"),
|
|
338
|
-
|
|
339
|
-
# Clusters
|
|
340
|
-
("evadra", "ఎవద్ర", "dr"), # minimal form; dialectal 'ఎవడ్రా' if you force ā at end
|
|
341
|
-
("manlini", "మన్లిని", "nl"), # becomes n+l; if you want ll, input 'mallini'
|
|
342
|
-
|
|
343
|
-
# Nasals & specials
|
|
344
|
-
("krishnajinka", "క్రిష్నజింక", "nj"),
|
|
345
|
-
("namste", "నమ్స్తే", "ms"),
|
|
346
|
-
("konda", "కొండ", "nd"), # V4.0.8: Critical test case for retroflex mapping
|
|
347
|
-
|
|
348
|
-
# Basic
|
|
349
|
-
("raamu", "రాము", "aa"),
|
|
350
|
-
("kalki", "కల్కి", "kl"),
|
|
351
|
-
("anja", "అంజ", "nj"),
|
|
352
|
-
|
|
353
|
-
# Retroflex cases (testing case sensitivity)
|
|
354
|
-
("nada", "నద", "n+d (dental)"),
|
|
355
|
-
("naDa", "నఢ", "n+D (retroflex)"),
|
|
356
|
-
("tala", "తల", "t+l (dental)"),
|
|
357
|
-
("Tala", "టల", "T+l (retroflex)"),
|
|
317
|
+
# Complex Cluster Tests (New additions)
|
|
318
|
+
("rastra", "రాష్ట్ర", "str cluster"),
|
|
319
|
+
("krishna", "క్రిష్ణ", "kri matra (i matra, not vocalic ru)"),
|
|
320
|
+
("namste", "నమ్స్తే", "namste cluster fix"),
|
|
321
|
+
("vidyut", "విద్యుత్", "dv cluster"),
|
|
322
|
+
("chhatra", "ఛత్ర", "chha+tra cluster"),
|
|
323
|
+
("prasthanam", "ప్రస్థానం", "s+t cluster"),
|
|
324
|
+
|
|
325
|
+
# Regression Checks
|
|
326
|
+
("konda", "కొండ", "nd -> retroflex ండ (Regression Check)"),
|
|
327
|
+
("palli", "పల్లి", "ll geminate Check"),
|
|
358
328
|
]
|
|
359
329
|
|
|
360
330
|
passed, failed = 0, 0
|
|
@@ -371,4 +341,4 @@ if __name__ == "__main__":
|
|
|
371
341
|
total = len(tests)
|
|
372
342
|
print(f"Results: {passed} passed, {failed} failed of {total} ({passed/total*100:.1f}%)")
|
|
373
343
|
if failed == 0:
|
|
374
|
-
print("🎉 ALL TESTS PASSED! v4.0
|
|
344
|
+
print("🎉 ALL TESTS PASSED! v4.3.0 ready.")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: telugu-language-tools
|
|
3
|
-
Version: 5.
|
|
3
|
+
Version: 5.5.0
|
|
4
4
|
Summary: Modern Telugu v3.0 compliant library with present continuous tense, modern pronouns, comprehensive validation, and 100% test coverage
|
|
5
5
|
Author-email: Telugu Library Contributors <support@telugulibrary.org>
|
|
6
6
|
License: MIT
|
|
@@ -36,16 +36,16 @@ Requires-Dist: pytest; extra == "test"
|
|
|
36
36
|
Requires-Dist: pytest-cov; extra == "test"
|
|
37
37
|
Dynamic: license-file
|
|
38
38
|
|
|
39
|
-
# Telugu Library v5.
|
|
39
|
+
# Telugu Library v5.5.0 - Modern Telugu Engine
|
|
40
40
|
|
|
41
41
|
[](https://www.python.org/downloads/)
|
|
42
42
|
[](LICENSE)
|
|
43
|
-
[](https://github.com/yourusername/telugu_lib)
|
|
44
44
|
[](V3_STANDARD.md)
|
|
45
45
|
|
|
46
46
|
A comprehensive Python library for **Modern Telugu** (v3.0) processing. Features full v3.0 compliance, present continuous tense support, modern pronouns and grammar, comprehensive validation, and production-ready testing.
|
|
47
47
|
|
|
48
|
-
## 🎯 v5.
|
|
48
|
+
## 🎯 v5.5.0 Highlights
|
|
49
49
|
|
|
50
50
|
- **v3.0 Compliant**: Full compliance with Modern Telugu v3.0 standards
|
|
51
51
|
- **100% Test Pass Rate**: Comprehensive test suites with 100% pass rate
|
|
@@ -102,7 +102,7 @@ pip install build
|
|
|
102
102
|
python -m build
|
|
103
103
|
|
|
104
104
|
# Install
|
|
105
|
-
pip install dist/telugu_engine-5.
|
|
105
|
+
pip install dist/telugu_engine-5.5.0-py3-none-any.whl
|
|
106
106
|
```
|
|
107
107
|
|
|
108
108
|
## 🚀 Quick Start
|
|
@@ -304,7 +304,22 @@ for text in texts:
|
|
|
304
304
|
|
|
305
305
|
## 📊 Version History
|
|
306
306
|
|
|
307
|
-
### v5.
|
|
307
|
+
### v5.5.0 (Current) - 2025-11-10
|
|
308
|
+
- ✅ Complete v3.0 implementation
|
|
309
|
+
- ✅ Present continuous tense support
|
|
310
|
+
- ✅ Enhanced tense engine with all 16 sections
|
|
311
|
+
- ✅ 100% test pass rate
|
|
312
|
+
- ✅ Modern pronoun detection
|
|
313
|
+
- ✅ Comprehensive test suites
|
|
314
|
+
- ✅ Translation challenges solved
|
|
315
|
+
- ✅ Error prevention checklist
|
|
316
|
+
- ✅ Corrected verb root mappings (v3.1 grammar)
|
|
317
|
+
- ✅ Case-sensitive retroflex consonant support (v4.0.8 transliterator)
|
|
318
|
+
- ✅ Enhanced cluster support (v4.3.0 transliterator)
|
|
319
|
+
- ✅ C+ri matra sequence fixes
|
|
320
|
+
- ✅ Obsolete module removal (tense_engine)
|
|
321
|
+
|
|
322
|
+
### v5.1.0 - 2025-11-10
|
|
308
323
|
- ✅ Complete v3.0 implementation
|
|
309
324
|
- ✅ Present continuous tense support
|
|
310
325
|
- ✅ Enhanced tense engine with all 16 sections
|
|
@@ -328,6 +343,23 @@ for text in texts:
|
|
|
328
343
|
|
|
329
344
|
## 📝 Changelog
|
|
330
345
|
|
|
346
|
+
### v5.5.0 (2025-11-10) - Enhanced Clusters and Architecture Cleanup
|
|
347
|
+
- **Transliterator Engine v4.3.0 Updates**:
|
|
348
|
+
- ✅ Enhanced cluster support with 3- and 4-character consonant clusters (e.g., 'str', 'sht', 'skr')
|
|
349
|
+
- ✅ CRITICAL FIX: C+ri matra sequence handling (e.g., 'kri' → క్రి, not vocalic 'ru')
|
|
350
|
+
- ✅ Refined nasal handling with improved 'namaste' processing
|
|
351
|
+
- ✅ Maintained case sensitivity for retroflex consonants
|
|
352
|
+
|
|
353
|
+
- **Architecture Improvements**:
|
|
354
|
+
- ✅ Obsolete tense_engine module removed to eliminate conflicts
|
|
355
|
+
- ✅ Centralized functionality in enhanced_tense module
|
|
356
|
+
- ✅ Improved consistency between modules
|
|
357
|
+
|
|
358
|
+
- **Enhanced Functionality**:
|
|
359
|
+
- ✅ Better complex conjunct processing (e.g., 'krishna' → కృష్ణ)
|
|
360
|
+
- ✅ More accurate cluster resolution with virama insertion
|
|
361
|
+
- ✅ Enhanced compatibility with Sanskrit-derived words
|
|
362
|
+
|
|
331
363
|
### v5.1.0 (2025-11-10) - Grammar and Transliteration Improvements
|
|
332
364
|
- **Grammar Engine v3.1 Updates**:
|
|
333
365
|
- ✅ Corrected critical verb root mappings ('come' → 'vachhu', not 'vaddu')
|
|
@@ -434,4 +466,4 @@ MIT License - see [LICENSE](LICENSE) file for details.
|
|
|
434
466
|
|
|
435
467
|
---
|
|
436
468
|
|
|
437
|
-
**Telugu Library v5.
|
|
469
|
+
**Telugu Library v5.5** - Modern Telugu for the Modern World 🌟
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
telugu_engine/__init__.py,sha256=9Kiv-kvtWE_ak7RlSw4d1UZpZlRE6r3DOx8Oxd38He8,4600
|
|
2
|
+
telugu_engine/cli.py,sha256=3Rb-7fEKToaQe7CAzBwwAgAt0B1BwZy8DQun2UnbCew,2859
|
|
3
|
+
telugu_engine/enhanced_tense.py,sha256=CWk661ROvSRQU8MUcZ7SPTFX5pF2zRy6DGNphIWnNRY,13430
|
|
4
|
+
telugu_engine/grammar.py,sha256=lFL4pyazltiF7I5JuJV09Diy1g4ycue48wcQj1xxkeU,12521
|
|
5
|
+
telugu_engine/phonetic_matrix.py,sha256=TRXS077d9MXxKKAFMYcOSFJhB4PqUxAj4MwUv33ey4M,1920
|
|
6
|
+
telugu_engine/transliterator.py,sha256=8rq5_msKtNFufRjUMRnr1owxj3SCIwgqHgRVAgkuNCs,14636
|
|
7
|
+
telugu_engine/v3_validator.py,sha256=MphzfF1LXLmaaN8CZEglnUV4Aa_dkSq9vsEuxi9wcGs,11780
|
|
8
|
+
telugu_language_tools-5.5.0.dist-info/licenses/LICENSE,sha256=pzHqJCLFLc62QxKlBp3oQAo1JQJ3two0K1bSsSEFvoo,1067
|
|
9
|
+
telugu_language_tools-5.5.0.dist-info/METADATA,sha256=uc1UUvpBP3hk8zBb6YBYieTir7rjiQ7ZWjTkmqpUan0,16632
|
|
10
|
+
telugu_language_tools-5.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
11
|
+
telugu_language_tools-5.5.0.dist-info/top_level.txt,sha256=3S-8k6ZwOSHbYDTIgbZKspac6uG6gjiTzp2RmUvZVWA,14
|
|
12
|
+
telugu_language_tools-5.5.0.dist-info/RECORD,,
|