telugu-language-tools 5.1.0__py3-none-any.whl → 5.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of telugu-language-tools might be problematic. Click here for more details.
- telugu_engine/__init__.py +6 -22
- telugu_engine/enhanced_tense.py +184 -649
- telugu_engine/transliterator.py +95 -125
- {telugu_language_tools-5.1.0.dist-info → telugu_language_tools-5.5.0.dist-info}/METADATA +39 -7
- telugu_language_tools-5.5.0.dist-info/RECORD +12 -0
- telugu_engine/tense_engine.py +0 -391
- telugu_language_tools-5.1.0.dist-info/RECORD +0 -13
- {telugu_language_tools-5.1.0.dist-info → telugu_language_tools-5.5.0.dist-info}/WHEEL +0 -0
- {telugu_language_tools-5.1.0.dist-info → telugu_language_tools-5.5.0.dist-info}/licenses/LICENSE +0 -0
- {telugu_language_tools-5.1.0.dist-info → telugu_language_tools-5.5.0.dist-info}/top_level.txt +0 -0
telugu_engine/tense_engine.py
DELETED
|
@@ -1,391 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Tense Engine v3.0 - Integrated with Modern Grammar
|
|
3
|
-
==================================================
|
|
4
|
-
|
|
5
|
-
Fully integrated with grammar.py for modern Telugu patterns:
|
|
6
|
-
- Modern verb conjugation (Past Participle + Person Marker)
|
|
7
|
-
- 4-case system
|
|
8
|
-
- SOV syntax processing
|
|
9
|
-
- Vowel harmony
|
|
10
|
-
- Sandhi rules
|
|
11
|
-
|
|
12
|
-
Based on v3.0 standards - uses modern forms only!
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
from typing import Dict, List, Optional
|
|
16
|
-
from .grammar import (
|
|
17
|
-
conjugate_verb, apply_case, convert_svo_to_soV,
|
|
18
|
-
build_telugu_sentence, apply_sandhi, check_vowel_harmony,
|
|
19
|
-
PERSON_MARKERS, CASE_MARKERS
|
|
20
|
-
)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
# ============================================================================
|
|
24
|
-
# SECTION 1: TENSE DETECTION
|
|
25
|
-
# ============================================================================
|
|
26
|
-
|
|
27
|
-
def detect_tense(text: str) -> str:
|
|
28
|
-
"""
|
|
29
|
-
Detect tense from English text.
|
|
30
|
-
|
|
31
|
-
Simple heuristics - can be enhanced with NLP.
|
|
32
|
-
|
|
33
|
-
Args:
|
|
34
|
-
text: English text
|
|
35
|
-
|
|
36
|
-
Returns:
|
|
37
|
-
'past', 'present', 'future', or 'unknown'
|
|
38
|
-
"""
|
|
39
|
-
text_lower = text.lower()
|
|
40
|
-
|
|
41
|
-
# Past tense indicators
|
|
42
|
-
past_indicators = ['ed', 'was', 'were', 'did', 'had', 'went', 'came', 'ate', 'saw']
|
|
43
|
-
for indicator in past_indicators:
|
|
44
|
-
if indicator in text_lower:
|
|
45
|
-
return 'past'
|
|
46
|
-
|
|
47
|
-
# Present continuous
|
|
48
|
-
if 'ing' in text_lower:
|
|
49
|
-
return 'present'
|
|
50
|
-
|
|
51
|
-
# Present tense (default for simple statements)
|
|
52
|
-
if any(word in text_lower for word in ['is', 'are', 'am', 'do', 'does', 'go', 'eat', 'read']):
|
|
53
|
-
return 'present'
|
|
54
|
-
|
|
55
|
-
# Future tense
|
|
56
|
-
future_indicators = ['will', 'shall', 'going to', 'tomorrow', 'next']
|
|
57
|
-
for indicator in future_indicators:
|
|
58
|
-
if indicator in text_lower:
|
|
59
|
-
return 'future'
|
|
60
|
-
|
|
61
|
-
return 'unknown'
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def detect_person(text: str) -> str:
|
|
65
|
-
"""
|
|
66
|
-
Detect person from English text.
|
|
67
|
-
|
|
68
|
-
Args:
|
|
69
|
-
text: English text
|
|
70
|
-
|
|
71
|
-
Returns:
|
|
72
|
-
'1ps' (I), '2ps' (you), '3ps' (he/she/it), etc.
|
|
73
|
-
"""
|
|
74
|
-
text_lower = text.lower()
|
|
75
|
-
words = text_lower.split()
|
|
76
|
-
|
|
77
|
-
# First person
|
|
78
|
-
if any(word in words for word in ['i', "i'm", "i've", "i'll"]):
|
|
79
|
-
return '1ps'
|
|
80
|
-
if any(word in words for word in ['we', "we're", "we've", "we'll"]):
|
|
81
|
-
return '1pp'
|
|
82
|
-
|
|
83
|
-
# Second person
|
|
84
|
-
if any(word in words for word in ['you', "you're", "you've", "you'll"]):
|
|
85
|
-
# Check if plural
|
|
86
|
-
if any(word in text_lower for word in ['all', 'group', 'team', 'people']):
|
|
87
|
-
return '2pp'
|
|
88
|
-
return '2ps'
|
|
89
|
-
|
|
90
|
-
# Third person
|
|
91
|
-
if any(word in words for word in ['he', "he's", 'she', "she's", 'it', "it's"]):
|
|
92
|
-
return '3ps'
|
|
93
|
-
if any(word in words for word in ['they', "they're", "they've", "they'll"]):
|
|
94
|
-
return '3pp'
|
|
95
|
-
|
|
96
|
-
# Default to 3rd person singular
|
|
97
|
-
return '3ps'
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
# ============================================================================
|
|
101
|
-
# SECTION 2: MODERN VERB CONJUGATION
|
|
102
|
-
# ============================================================================
|
|
103
|
-
|
|
104
|
-
def get_verb_root(verb: str) -> str:
|
|
105
|
-
"""
|
|
106
|
-
Get verb root for conjugation.
|
|
107
|
-
|
|
108
|
-
Map common English verbs to Telugu roots.
|
|
109
|
-
|
|
110
|
-
Args:
|
|
111
|
-
verb: English verb (e.g., 'read', 'eat', 'come')
|
|
112
|
-
|
|
113
|
-
Returns:
|
|
114
|
-
Telugu verb root
|
|
115
|
-
"""
|
|
116
|
-
verb_map = {
|
|
117
|
-
'read': 'chaduvu',
|
|
118
|
-
'eat': 'tinu',
|
|
119
|
-
'come': 'vaddu',
|
|
120
|
-
'go': 'velli',
|
|
121
|
-
'do': 'cheyyu',
|
|
122
|
-
'be': 'raavu',
|
|
123
|
-
'have': 'untundi',
|
|
124
|
-
'say': 'annanu',
|
|
125
|
-
'give': 'istunnaru',
|
|
126
|
-
'take': 'tesukunnaru',
|
|
127
|
-
'see': 'chusi',
|
|
128
|
-
'know': 'telisi',
|
|
129
|
-
'think': '脑li',
|
|
130
|
-
'look': 'chusi',
|
|
131
|
-
'come': 'vachcharu',
|
|
132
|
-
'work': 'pani',
|
|
133
|
-
'make': 'make',
|
|
134
|
-
'know': 'mariyu',
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
return verb_map.get(verb.lower(), verb.lower())
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
def conjugate_english_verb(verb: str, tense: str, person: str) -> str:
|
|
141
|
-
"""
|
|
142
|
-
Conjugate English verb to Telugu using modern pattern.
|
|
143
|
-
|
|
144
|
-
Args:
|
|
145
|
-
verb: English verb (base form)
|
|
146
|
-
tense: 'past', 'present', 'future'
|
|
147
|
-
person: '1ps', '1pp', '2ps', '2pp', '3ps', '3pp'
|
|
148
|
-
|
|
149
|
-
Returns:
|
|
150
|
-
Conjugated Telugu verb
|
|
151
|
-
"""
|
|
152
|
-
# Get Telugu root
|
|
153
|
-
root = get_verb_root(verb)
|
|
154
|
-
|
|
155
|
-
# For past tense, use modern participle + marker pattern
|
|
156
|
-
if tense == 'past':
|
|
157
|
-
return conjugate_verb(root, 'past', person)
|
|
158
|
-
|
|
159
|
-
# For present/future, use simple forms for now
|
|
160
|
-
# TODO: Implement proper present/future conjugation
|
|
161
|
-
if tense == 'present':
|
|
162
|
-
# Present continuous: ROOT + తున్నా
|
|
163
|
-
if person == '1ps':
|
|
164
|
-
return conjugate_verb(root, 'present', '1ps').replace('ిన', 'ి తున్నాను')
|
|
165
|
-
elif person == '3ps':
|
|
166
|
-
return conjugate_verb(root, 'present', '3ps').replace('ిన', 'ి తున్నాడు')
|
|
167
|
-
elif person == '3pp':
|
|
168
|
-
return conjugate_verb(root, 'present', '3pp').replace('ిన', 'ి తున్నారు')
|
|
169
|
-
elif tense == 'future':
|
|
170
|
-
# Future: ROOT + తా
|
|
171
|
-
if person == '1ps':
|
|
172
|
-
return conjugate_verb(root, 'future', '1ps').replace('ిన', 'ు తాను')
|
|
173
|
-
elif person == '3ps':
|
|
174
|
-
return conjugate_verb(root, 'future', '3ps').replace('ిన', 'ు తాడు')
|
|
175
|
-
elif person == '3pp':
|
|
176
|
-
return conjugate_verb(root, 'future', '3pp').replace('ిన', 'ు తారు')
|
|
177
|
-
|
|
178
|
-
# Fallback: just return root
|
|
179
|
-
return root
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
# ============================================================================
|
|
183
|
-
# SECTION 3: SENTENCE PROCESSING
|
|
184
|
-
# ============================================================================
|
|
185
|
-
|
|
186
|
-
def process_simple_sentence(sentence: str) -> str:
|
|
187
|
-
"""
|
|
188
|
-
Process simple English sentence to Telugu.
|
|
189
|
-
|
|
190
|
-
Example:
|
|
191
|
-
"Ramu reads book" → "రాము పుస్తకం చదువుతాడు"
|
|
192
|
-
|
|
193
|
-
Args:
|
|
194
|
-
sentence: English sentence (SVO order)
|
|
195
|
-
|
|
196
|
-
Returns:
|
|
197
|
-
Telugu sentence (SOV order with grammar)
|
|
198
|
-
"""
|
|
199
|
-
# Step 1: Detect parts
|
|
200
|
-
parts = convert_svo_to_soV(sentence)
|
|
201
|
-
if not parts or not parts.get('subject'):
|
|
202
|
-
return sentence
|
|
203
|
-
|
|
204
|
-
subject = parts['subject']
|
|
205
|
-
obj = parts.get('object', '')
|
|
206
|
-
verb = parts.get('verb', '')
|
|
207
|
-
|
|
208
|
-
# Step 2: Detect tense and person
|
|
209
|
-
tense = detect_tense(sentence)
|
|
210
|
-
person = detect_person(sentence)
|
|
211
|
-
|
|
212
|
-
# Step 3: Transliterate parts
|
|
213
|
-
from .transliterator import eng_to_telugu
|
|
214
|
-
|
|
215
|
-
subject_telugu = eng_to_telugu(subject)
|
|
216
|
-
obj_telugu = eng_to_telugu(obj) if obj else ''
|
|
217
|
-
verb_telugu = eng_to_telugu(verb)
|
|
218
|
-
|
|
219
|
-
# Step 4: Apply case markers
|
|
220
|
-
subject_telugu = apply_case(subject_telugu, 'nominative')
|
|
221
|
-
if obj_telugu:
|
|
222
|
-
obj_telugu = apply_case(obj_telugu, 'accusative')
|
|
223
|
-
|
|
224
|
-
# Step 5: Conjugate verb
|
|
225
|
-
verb_root = get_verb_root(verb)
|
|
226
|
-
verb_conjugated = conjugate_verb(verb_root, tense, person)
|
|
227
|
-
|
|
228
|
-
# Step 6: Build sentence in SOV order
|
|
229
|
-
telugu_parts = [subject_telugu]
|
|
230
|
-
if obj_telugu:
|
|
231
|
-
telugu_parts.append(obj_telugu)
|
|
232
|
-
telugu_parts.append(verb_conjugated)
|
|
233
|
-
|
|
234
|
-
result = ' '.join(telugu_parts)
|
|
235
|
-
|
|
236
|
-
# Step 7: Apply sandhi
|
|
237
|
-
result = apply_final_sandhi(result)
|
|
238
|
-
|
|
239
|
-
return result
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
def apply_final_sandhi(text: str) -> str:
|
|
243
|
-
"""
|
|
244
|
-
Apply final sandhi to complete sentence.
|
|
245
|
-
|
|
246
|
-
Simple version - can be enhanced.
|
|
247
|
-
|
|
248
|
-
Args:
|
|
249
|
-
text: Telugu text
|
|
250
|
-
|
|
251
|
-
Returns:
|
|
252
|
-
Text with sandhi applied
|
|
253
|
-
"""
|
|
254
|
-
# For now, just return as-is
|
|
255
|
-
# TODO: Add proper sandhi rules
|
|
256
|
-
return text
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
# ============================================================================
|
|
260
|
-
# SECTION 4: ADVANCED SENTENCE PROCESSING
|
|
261
|
-
# ============================================================================
|
|
262
|
-
|
|
263
|
-
def process_complex_sentence(sentence: str) -> str:
|
|
264
|
-
"""
|
|
265
|
-
Process complex sentences (with modifiers, etc.).
|
|
266
|
-
|
|
267
|
-
Args:
|
|
268
|
-
sentence: Complex English sentence
|
|
269
|
-
|
|
270
|
-
Returns:
|
|
271
|
-
Telugu translation
|
|
272
|
-
"""
|
|
273
|
-
# For now, fall back to simple processing
|
|
274
|
-
return process_simple_sentence(sentence)
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
def apply_formality(text: str, formality: str = 'informal') -> str:
|
|
278
|
-
"""
|
|
279
|
-
Apply formality markers to text.
|
|
280
|
-
|
|
281
|
-
Args:
|
|
282
|
-
text: Telugu text
|
|
283
|
-
formality: 'informal', 'formal', 'honorific'
|
|
284
|
-
|
|
285
|
-
Returns:
|
|
286
|
-
Text with formality markers
|
|
287
|
-
"""
|
|
288
|
-
if formality == 'formal':
|
|
289
|
-
# Add respectful markers
|
|
290
|
-
if not text.endswith('గారు') and not text.endswith('వారు'):
|
|
291
|
-
text += 'గారు'
|
|
292
|
-
elif formality == 'honorific':
|
|
293
|
-
# Add very respectful markers
|
|
294
|
-
if not text.endswith('వారు'):
|
|
295
|
-
text += 'వారు'
|
|
296
|
-
|
|
297
|
-
return text
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
# ============================================================================
|
|
301
|
-
# SECTION 5: VALIDATION
|
|
302
|
-
# ============================================================================
|
|
303
|
-
|
|
304
|
-
def validate_tense_conjugation(verb: str, tense: str, person: str) -> bool:
|
|
305
|
-
"""
|
|
306
|
-
Validate that conjugation follows v3.0 modern patterns.
|
|
307
|
-
|
|
308
|
-
Args:
|
|
309
|
-
verb: Verb form
|
|
310
|
-
tense: Tense
|
|
311
|
-
person: Person
|
|
312
|
-
|
|
313
|
-
Returns:
|
|
314
|
-
True if valid modern pattern
|
|
315
|
-
"""
|
|
316
|
-
# Check for archaic patterns to avoid
|
|
317
|
-
archaic_patterns = ['చేసితిని', 'చేసితిరి', 'వాండ్రు', 'ఏను']
|
|
318
|
-
|
|
319
|
-
for pattern in archaic_patterns:
|
|
320
|
-
if pattern in verb:
|
|
321
|
-
return False
|
|
322
|
-
|
|
323
|
-
# Check for modern patterns
|
|
324
|
-
modern_patterns = ['సినాను', 'సినారు', 'వాళ్ళు', 'నేను']
|
|
325
|
-
|
|
326
|
-
# For past tense, should have participle
|
|
327
|
-
if tense == 'past' and 'సిన' not in verb:
|
|
328
|
-
# Could be other past forms, just log warning
|
|
329
|
-
pass
|
|
330
|
-
|
|
331
|
-
return True
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
# ============================================================================
|
|
335
|
-
# SECTION 6: PUBLIC API
|
|
336
|
-
# ============================================================================
|
|
337
|
-
|
|
338
|
-
__all__ = [
|
|
339
|
-
'detect_tense',
|
|
340
|
-
'detect_person',
|
|
341
|
-
'conjugate_english_verb',
|
|
342
|
-
'process_simple_sentence',
|
|
343
|
-
'process_complex_sentence',
|
|
344
|
-
'apply_formality',
|
|
345
|
-
'validate_tense_conjugation',
|
|
346
|
-
]
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
# ============================================================================
|
|
350
|
-
# SECTION 7: EXAMPLE USAGE
|
|
351
|
-
# ============================================================================
|
|
352
|
-
|
|
353
|
-
if __name__ == "__main__":
|
|
354
|
-
print("\n" + "="*70)
|
|
355
|
-
print(" TENSE ENGINE v3.0 - EXAMPLES")
|
|
356
|
-
print("="*70 + "\n")
|
|
357
|
-
|
|
358
|
-
# Test simple sentences
|
|
359
|
-
test_cases = [
|
|
360
|
-
("I read", "past", "1ps"),
|
|
361
|
-
("He came", "past", "3ps"),
|
|
362
|
-
("They ate", "past", "3pp"),
|
|
363
|
-
("You go", "present", "2ps"),
|
|
364
|
-
]
|
|
365
|
-
|
|
366
|
-
print("1. Verb Conjugation:")
|
|
367
|
-
for sentence, tense, person in test_cases:
|
|
368
|
-
# Extract verb from sentence
|
|
369
|
-
words = sentence.split()
|
|
370
|
-
verb = words[-1] if words else 'read'
|
|
371
|
-
result = conjugate_english_verb(verb, tense, person)
|
|
372
|
-
print(f" {sentence} ({tense}, {person}) → {result}")
|
|
373
|
-
|
|
374
|
-
print("\n2. Sentence Processing:")
|
|
375
|
-
sentences = [
|
|
376
|
-
"Ramu reads book",
|
|
377
|
-
"Sita ate rice",
|
|
378
|
-
"I came",
|
|
379
|
-
"They will go",
|
|
380
|
-
]
|
|
381
|
-
|
|
382
|
-
for sentence in sentences:
|
|
383
|
-
result = process_simple_sentence(sentence)
|
|
384
|
-
print(f" '{sentence}' → '{result}'")
|
|
385
|
-
|
|
386
|
-
print("\n3. Formality:")
|
|
387
|
-
text = process_simple_sentence("You came")
|
|
388
|
-
print(f" Informal: {text}")
|
|
389
|
-
print(f" Formal: {apply_formality(text, 'formal')}")
|
|
390
|
-
|
|
391
|
-
print("\n" + "="*70 + "\n")
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
telugu_engine/__init__.py,sha256=A6QWYDMO33F1_HHnegaOhIKigJz41E4tCR1tPsSjHA0,4988
|
|
2
|
-
telugu_engine/cli.py,sha256=3Rb-7fEKToaQe7CAzBwwAgAt0B1BwZy8DQun2UnbCew,2859
|
|
3
|
-
telugu_engine/enhanced_tense.py,sha256=V9IkNw-vC2Xr8pRBwze72u5sSjwxjqAju6CGGRDd-Wk,28149
|
|
4
|
-
telugu_engine/grammar.py,sha256=lFL4pyazltiF7I5JuJV09Diy1g4ycue48wcQj1xxkeU,12521
|
|
5
|
-
telugu_engine/phonetic_matrix.py,sha256=TRXS077d9MXxKKAFMYcOSFJhB4PqUxAj4MwUv33ey4M,1920
|
|
6
|
-
telugu_engine/tense_engine.py,sha256=zN3ee2gBIkB1mHkYV9XWync4rVALdQvfA935kUve1bE,11217
|
|
7
|
-
telugu_engine/transliterator.py,sha256=duPMWFoZPTd5gQ1wXuk3K0L_HkBtVGae34i8Nz-hVLk,15228
|
|
8
|
-
telugu_engine/v3_validator.py,sha256=MphzfF1LXLmaaN8CZEglnUV4Aa_dkSq9vsEuxi9wcGs,11780
|
|
9
|
-
telugu_language_tools-5.1.0.dist-info/licenses/LICENSE,sha256=pzHqJCLFLc62QxKlBp3oQAo1JQJ3two0K1bSsSEFvoo,1067
|
|
10
|
-
telugu_language_tools-5.1.0.dist-info/METADATA,sha256=Q6Tloikow65vndw4STEYSv3IjwW7rFqzD0QieCPwNtg,15154
|
|
11
|
-
telugu_language_tools-5.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
12
|
-
telugu_language_tools-5.1.0.dist-info/top_level.txt,sha256=3S-8k6ZwOSHbYDTIgbZKspac6uG6gjiTzp2RmUvZVWA,14
|
|
13
|
-
telugu_language_tools-5.1.0.dist-info/RECORD,,
|
|
File without changes
|
{telugu_language_tools-5.1.0.dist-info → telugu_language_tools-5.5.0.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{telugu_language_tools-5.1.0.dist-info → telugu_language_tools-5.5.0.dist-info}/top_level.txt
RENAMED
|
File without changes
|