tamilstring 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tamilstring/utf8.py ADDED
@@ -0,0 +1,1125 @@
1
+ from .constant import SA_ROOT_LETTERS, TA_ROOT_CONSONENT, TAMIL_NUMURALS, TAMIL_SYMBOLS, TAMIL_VOWEL_CHARS, sanskrit_vowel_letters,tamil_consonent_letters, tamil_vowel_unicode_symbols,sanskrit_vowel_letters
2
+ import re
3
+
4
+
5
+ suffics = "ாிீுூெேைொோௌ"
6
+ vowel = "அஆஇஈஉஊஎஏஐஒஓஔ"
7
+ charector = "௦௧௨௩௪௫௬௭௮௯௰௱௲௳௴௵௶௷௹௺"
8
+ prefixs = "கஙசஞடணதநபமயரலவழளறனஶஜஷஸஹ"
9
+ clusters = "க்ஷ|ஶ்ர|ஸ்ர"
10
+ english = "\x00-\x7F"
11
+
12
+ def remove_non_ta_en(string):
13
+ """
14
+ it helps to remove any unicode other then english and tamil unicodes
15
+ Args:
16
+ string(str): you want app this function
17
+ Return:
18
+ string(str): removed unicode string
19
+ eg:
20
+ string1 = remove_non_ta_en("தைைமிாாழ்்ひらがな"))
21
+ print(string1)
22
+ "தைைமிாாழ்்"
23
+ string2 = remove_non_ta_en("Hanzi汉字/漢字"))
24
+ print(string2)
25
+ "Hanzi/"
26
+ string3 = remove_non_ta_en("Hindiहिन्दी"))
27
+ print(string3)
28
+ "Hindi"
29
+ string4 = remove_non_ta_en("Tamilالتاميل"))
30
+ print(string4)
31
+ "Tamil"
32
+ string5 = remove_non_ta_en("Tamil,English"))
33
+ print(string5)
34
+ "Tamil,English"
35
+ string6 = remove_non_ta_en("تاميلی"))
36
+ print(string6)
37
+ ""
38
+ """
39
+ return ''.join(re.findall(r'[\x00-\x7F\u0B80-\u0BFF]', string))
40
+
41
+ def get_letters(string,skip_en=False,only=None):
42
+ """
43
+ it helps to extract letters from the string
44
+ Args:
45
+ string(str): you want get letter from a perticular string
46
+ skip_en(bool): you can skip extracting english letter, it helps if a string that contains tamil string only
47
+ only(list): this helps you to extract only a specific kinf of letters
48
+ Return:
49
+ list
50
+ eg:
51
+ string1 = get_letters("தமிழ்")
52
+ print(string1)
53
+ ['த','மி','ழ்']
54
+ string1 = get_letters("ஶ்ரீனி")
55
+ print(string1)
56
+ ['ஶ்ரீ','னி']
57
+ string1 = get_letters("தமிழ் பலரின் தாய்மொழி ஆகும்")
58
+ print(string1)
59
+ ['த', 'மி', 'ழ்', ' ', 'ப', 'ல', 'ரி', 'ன்', ' ', 'தா', 'ய்', 'மொ', 'ழி', ' ', 'ஆ', 'கு', 'ம்']
60
+ string1 = get_letters("செம்மொழி (Classical language)")
61
+ print(string1)
62
+ ['செ', 'ம்', 'மொ', 'ழி', ' ', '(', 'C', 'l', 'a', 's', 's', 'i', 'c', 'a', 'l', ' ', 'l', 'a', 'n', 'g', 'u', 'a', 'g', 'e', ')']
63
+ string1 = get_letters("English")
64
+ print(string1)
65
+ ['E','n','g','l','i','s','h']
66
+ # for sanskrit letters
67
+ string1 = get_letters("ஜாபர்")
68
+ print(string1)
69
+ ['ஜா','ப','ர்']
70
+ # skip en
71
+ string1 = get_letters("செம்மொழி (Classical language)",skip_en=True)
72
+ print(string1)
73
+ ['செ', 'ம்', 'மொ', 'ழி']
74
+ """
75
+ if only != None:
76
+ PATTERN = regex_generator(only)
77
+ elif skip_en:
78
+ PATTERN = rf"(?:(?:{clusters})[{suffics}்]?|[{prefixs}][{suffics}்]?|[{vowel}{charector}ஃ])"
79
+ else:
80
+ PATTERN = rf"(?:(?:{clusters})[{suffics}்]?|[{prefixs}][{suffics}்]?|[{vowel}{charector}ஃ{english}])"
81
+
82
+ return re.findall(PATTERN, string)
83
+
84
+ def unmatch_indeces(string,skip_en=False):
85
+ """
86
+ it retruns the unmatched letters indeces of the string, particularly it hepls to resolve the OCR texts
87
+ Args:
88
+ string(str): you want to find the unmatched indeces
89
+ Return:
90
+ list(list):where the unmatched indexes
91
+ eg:
92
+ list1 = unmatch_indeces("தைைமிாாழ்்"))
93
+ print(list1)
94
+ [[2, 3], [5, 7], [9, 10]]
95
+ list2 = unmatch_indeces("தைைமிாாழ்்"))
96
+ print(list2)
97
+ [[2, 5], [7, 9], [11, 12]]
98
+ list3 = unmatch_indeces("தைைமிாாழ்்1#w)")
99
+ print(list3)
100
+ [[2, 3], [5, 7], [9, 14]]
101
+ """
102
+ PATTERN = "([{0}](?:[{1}்](?:[ஷர](?:[{1}்])?)?)?|[{2}{3}ஃ])".format(prefixs,suffics,vowel,charector)
103
+
104
+ #REMOVE_NON_TAMIL = r"[\u0B80-\u0BFF]"
105
+
106
+ PATTERN = f"[^\x00-\x7F]{PATTERN}" if skip_en == True else PATTERN
107
+
108
+ index_position, matches, unmatches = 0,[],[]
109
+ for match in re.finditer(PATTERN, string):
110
+ start, end = match.start(), match.end()
111
+ if index_position < start:
112
+ unmatches.append([index_position,start])
113
+ matches.append(string[start:end])
114
+ index_position = end
115
+ if index_position < len(string):
116
+ unmatches.append([index_position,len(string)])
117
+ return unmatches
118
+
119
+ def unmatched_unicode(string,skip_en=False):
120
+ """
121
+ it retruns the unmatched unicode for getting letters of the given string, particularly it hepls to resolve the OCR texts
122
+ Args:
123
+ string(str): you want to find the unmatched unicode
124
+ Return:
125
+ list(list):where the unmatched unicodes
126
+ eg:
127
+ string1 = get_letters("தமிழ்்"))
128
+ print(strin1)
129
+ ['த','மி','ழ்']
130
+ string2 = get_letters("்ா"))
131
+ print(strin2)
132
+ [],
133
+ string3 = get_letters("漢字"))
134
+ print(strin3)
135
+ [],
136
+ string4 = get_letters("க்ஷி்ாக்ஷு"))
137
+ print(strin4)
138
+ ['க்ஷி','க்ஷு']
139
+ """
140
+ indeces = unmatch_indeces(string,skip_en=False)
141
+ return [string[index] for index in indeces ]
142
+
143
+ def regex_generator(only):
144
+
145
+ if isinstance(only,bool) and only == True:
146
+ return "([{0}](?:[{1}்])?|[{2}{3}ஃ])".format(prefixs,suffics,vowel,charector)
147
+ else:
148
+
149
+ #\x41–\x5A => A–Z
150
+ #\x61–\x7A => a-z
151
+ #\x30–\x39 => 1-9
152
+ EN_LET = "\x41-\x5A\x61-\x7A\x30-\x39" if "EN_SYM" in only else ""
153
+
154
+ #\x21–\x2F => ! " # $ % & ' ( ) * + , - . /
155
+ #\x3A–\x40 => : ; < = > ? @
156
+ #\x5B–\x60 => [ \ ] \ ^ _
157
+ #\x7B–\x7E => `{ }
158
+ EN_SYM = "\x21-\x2F\x3A-\x40\x5B-\x60\x7B-\x7E" if "EN_SYM" in only else ""
159
+
160
+ # empty space
161
+ EN_SPC = "\x20" if "EN_SYM" in only else ""
162
+
163
+ IN_EN = EN_LET + EN_SYM + EN_SPC
164
+
165
+ if "SA_LET" in only:
166
+ PATTERN = "([{0}](?:[{1}்](?:[ஷர](?:[{1}்])?)?)?|[{2}{3}ஃ{4}])".format(prefixs,suffics,vowel,charector,english)
167
+ else:
168
+ PATTERN = "([{0}](?:[{1}்])?|[{2}{3}ஃ{4}])".format(prefixs,suffics,vowel,charector,english)
169
+ # TODO for only getting english
170
+ return PATTERN
171
+
172
+ def split_letter(letter):
173
+ """
174
+ it splits the composite letters into seperate consonent and vowel letters
175
+ Args:
176
+ letter(str)
177
+ Return:
178
+ typle()
179
+ eg:
180
+ print(split_letter("மா"))
181
+ ('ம்','ஆ')
182
+ print(split_letter("கோ"))
183
+ ('க்','ஓ')
184
+ print(split_letter("ணை"))
185
+ ('ண்','ஐ')
186
+ print(split_letter("க்ஷி"))
187
+ ("க்ஷ்","இ")
188
+ print(split_letter("ஶ"))
189
+ ('ஶ்', 'அ')
190
+ print(split_letter("க்ஷை"))
191
+ ('க்ஷ்', 'ஐ')
192
+ print(split_letter("௵"))
193
+ (None,None)
194
+ print(split_letter("ழ்"))
195
+ (None,None)
196
+ print(split_letter("ஃ"))
197
+ (None,None)
198
+ """
199
+ if is_composite(letter):
200
+ for vowel_let, vowel_sym in zip(vowel[1:],suffics):
201
+ if letter[-1] == vowel_sym:
202
+ return (letter[:-1]+'்', vowel_let)
203
+ else:
204
+ return (letter+'்', "அ")
205
+ else:
206
+ return (None,None)
207
+
208
+
209
+ def make_letter(letter1,letter2):
210
+ """
211
+ join consonent with vowel and return a sinle letter
212
+ Args:
213
+ letter1(str):
214
+ letter2(str):
215
+ Return:
216
+ letter(str)
217
+ eg:
218
+ print(make_letter("ழ்","ஔ"))
219
+ 'ழௌ'
220
+ print(make_letter("ப்","ஊ"))
221
+ 'பூ'
222
+ print(make_letter("க்","ஐ"))
223
+ 'கை'
224
+ print(make_letter("ம்","ஊ"))
225
+ 'மூ'
226
+ print(make_letter("ச்","ஆ"))
227
+ 'சா'
228
+ print(make_letter("ப்ப","ப்"))
229
+ None
230
+ print(make_letter("ச","ஆ"))
231
+ None
232
+ print(make_letter("ப்","ப"))
233
+ None
234
+ print(make_letter("ப்","ப்"))
235
+ None
236
+ """
237
+ if is_vowel(letter1) and is_consonent(letter2):
238
+ constant_ = letter2
239
+ vowel_ = letter1
240
+ for vowel_let, vowel_sym in zip(vowel[1:],suffics):
241
+ if vowel_ == vowel_let:
242
+ return constant_[:-1] + vowel_sym
243
+ else:
244
+ return constant_
245
+ elif is_vowel(letter2) and is_consonent(letter1):
246
+ constant_ = letter1
247
+ vowel_ = letter2
248
+ for vowel_let, vowel_sym in zip(vowel[1:],suffics):
249
+ if vowel_ == vowel_let:
250
+ return constant_[:-1] + vowel_sym
251
+ else:
252
+ return constant_[:-1]
253
+ else:
254
+ # TODO rasie error
255
+ return None
256
+
257
+
258
+ def is_vowel(unicodes):
259
+ """
260
+ it checks given letter is vowel or not?
261
+ Args:
262
+ letter(str):
263
+ Return:
264
+ letter(str):
265
+ eg:
266
+ print(is_vowel("அ"))
267
+ True
268
+ print(is_vowel("ஔ"))
269
+ True
270
+ print(is_vowel("ஓ"))
271
+ True
272
+ print(is_vowel("இ"))
273
+ True
274
+ print(is_vowel("a"))
275
+ False
276
+ print(is_vowel("க்ஷ்"))
277
+ False
278
+ print(is_vowel("௩"))
279
+ False
280
+ print(is_vowel("௫"))
281
+ False
282
+ """
283
+ letter = verify(unicodes)
284
+ if letter:
285
+ if unicodes in vowel:
286
+ return True
287
+ else:
288
+ return False
289
+
290
+
291
+ def is_consonent(unicodes):
292
+ """
293
+ it checks given letter is consonent or not?
294
+ Args:
295
+ letter(str):
296
+ Return:
297
+ letter(str):
298
+ eg:
299
+ print(is_consonent("க்"))
300
+ True
301
+ print(is_consonent("ழ்"))
302
+ True
303
+ print(is_consonent("க்ஷ்"))
304
+ True
305
+ print(is_consonent("ஞ்"))
306
+ True
307
+ print(is_consonent("a"))
308
+ False
309
+ print(is_consonent("ஷி"))
310
+ False
311
+ print(is_consonent("௩"))
312
+ False
313
+ print(is_consonent("௫"))
314
+ False
315
+ """
316
+ letter = verify(unicodes)
317
+ if letter:
318
+ if (unicodes[:-1] in prefixs or unicodes[:-1] == "க்ஷ") and unicodes[-1] == "்":
319
+ return True
320
+ else:
321
+ return False
322
+
323
+
324
+ def is_composite(unicodes):
325
+ """
326
+ it checks given letter is composite or not?
327
+ Args:
328
+ letter(str):
329
+ Return:
330
+ letter(str):
331
+ eg:
332
+ print(is_composite("க"))
333
+ True
334
+ print(is_composite("ழ"))
335
+ True
336
+ print(is_composite("க்ஷ"))
337
+ True
338
+ print(is_composite("க்ஷூ"))
339
+ True
340
+ print(is_composite("a"))
341
+ False
342
+ print(is_composite("ஶ்"))
343
+ False
344
+ print(is_composite("௩"))
345
+ False
346
+ print(is_composite("௫"))
347
+ False
348
+ """
349
+ letter = verify(unicodes)
350
+ if letter:
351
+ if (unicodes[:-1] in prefixs or unicodes[:-1] == "க்ஷ") and unicodes[-1] in suffics:
352
+ return True
353
+ elif unicodes in prefixs or unicodes == "க்ஷ":
354
+ return True
355
+ else:
356
+ return False
357
+
358
+
359
+ def verify(unicodes):
360
+ """
361
+ it checks the letter has length one or not
362
+ Args:
363
+ letter(str):
364
+ Return:
365
+ letter(str):
366
+ eg:
367
+ print(verify("க"))
368
+ True
369
+ print(verify("ழ"))
370
+ True
371
+ print(verify("a"))
372
+ True
373
+ print(verify("௩"))
374
+ True
375
+ print(verify("௫"))
376
+ True
377
+ print(verify("க்ஷ"))
378
+ True
379
+ print(verify("க்ஷூ"))
380
+ True
381
+ print(verify("க்ஷ்"))
382
+ True
383
+ print(verify("English"))
384
+ False
385
+ print(verify("தமிழ்"))
386
+ False
387
+ print(verify("Hi"))
388
+ False
389
+ """
390
+ letter_list = get_letters(unicodes)
391
+ if len(letter_list) != 1:
392
+ return False
393
+ else:
394
+ return True
395
+
396
+ def is_sa(letter):
397
+ """
398
+ it checks the given letter is belongs to sanskrit letters or not
399
+ Args:
400
+ letter(int):
401
+ Return:
402
+ bool:
403
+ eg
404
+ print(in_tamil("a"))
405
+ False
406
+ print(in_tamil("B"))
407
+ False
408
+ print(in_tamil("3"))
409
+ False
410
+ print(in_tamil("?"))
411
+ False:
412
+ print(in_tamil("ஃ")
413
+ False
414
+ print(in_tamil("௹"))
415
+ False
416
+ print(in_tamil("க்ஷ்")
417
+ True
418
+ print(in_tamil("ஷி"))
419
+ True
420
+ print(in_tamil("ஶ்ரீ")
421
+ True
422
+ print(in_tamil("我"))
423
+ False
424
+ print(in_tamil("ക")
425
+ False
426
+ """
427
+ #check_tamil = tamil_specific(letter) if tamil_specific(letter) != None else ""
428
+ if sa_composite(letter) or sa_consonent(letter) or letter == 'ஶ்ரீ':
429
+ return True
430
+ else:
431
+ return False
432
+
433
+ def is_ta(letter):
434
+ """
435
+ it checks the given letter is belongs to tamil letters or not?
436
+ Args:
437
+ letter(int):
438
+ Return:
439
+ bool:
440
+ eg:
441
+ print(in_tamil("a"))
442
+ False
443
+ print(in_tamil("B"))
444
+ False
445
+ print(in_tamil("3"))
446
+ False
447
+ print(in_tamil("?"))
448
+ False
449
+ print(in_tamil("ஃ"))
450
+ True
451
+ print(in_tamil("௹"))
452
+ True
453
+ print(in_tamil("அ"))
454
+ True
455
+ print(in_tamil("௩"))
456
+ True
457
+ print(in_tamil("ப்"))
458
+ True
459
+ print(in_tamil("கை"))
460
+ True
461
+ print(in_tamil("ஃ"))
462
+ True
463
+ print(in_tamil("௹"))
464
+ True
465
+ print(in_tamil("க்ஷ்"))
466
+ False
467
+ print(in_tamil("ஷி"))
468
+ False
469
+ print(in_tamil("ஶ்ரீ"))
470
+ False
471
+ print(in_tamil("我"))
472
+ False
473
+ print(in_tamil("ക"))
474
+ False
475
+ """
476
+ check_tamil = tamil_specific(letter) if tamil_specific(letter) != None else ""
477
+ if check_tamil.startswith("TA-"):
478
+ return True
479
+ return False
480
+
481
+ def is_en(char):
482
+ """
483
+ it checks the given letter is belongs to english letters or not
484
+ Args:ஶ்ரீ
485
+ letter(int):
486
+ Return:
487
+ bool:
488
+ eg:
489
+ print(in_tamil("a"))
490
+ True
491
+ print(in_tamil("B"))
492
+ True
493
+ print(in_tamil("3"))
494
+ True
495
+ print(in_tamil("?"))
496
+ True
497
+ print(in_tamil("ஃ"))
498
+ True
499
+ False
500
+ print(in_tamil("௹"))
501
+ False
502
+ print(in_tamil("அ"))
503
+ False
504
+ print(in_tamil("௩"))
505
+ False
506
+ print(in_tamil("ப்"))
507
+ False
508
+ print(in_tamil("கை"))
509
+ False
510
+ print(in_tamil("ஃ"))
511
+ False
512
+ print(in_tamil("௹"))
513
+ False
514
+ print(in_tamil("க்ஷ்"))
515
+ False
516
+ print(in_tamil("ஷி"))
517
+ False
518
+ print(in_tamil("ஶ்ரீ"))
519
+ False
520
+ print(in_tamil("我"))
521
+ False
522
+ print(in_tamil("ക"))
523
+ False
524
+ """
525
+ if len(char) == 1:
526
+ code = ord(char)
527
+ if 0x0000 <= code <= 0x007E:
528
+ return True
529
+ else:
530
+ return False
531
+
532
+ def in_tamil(unicodes):
533
+ """
534
+ it checks the given letter is used in tamil or not(it say true for both tamil and
535
+ sanskrit, it helps to avoid other langaule letters)
536
+ Args:
537
+ letter(int):
538
+ Return:
539
+ bool:
540
+ eg:
541
+ print(in_tamil("a"))
542
+ False
543
+ print(in_tamil("B"))
544
+ False
545
+ print(in_tamil("3"))
546
+ False
547
+ print(in_tamil("?"))
548
+ False
549
+ print(in_tamil("ஃ"))
550
+ True
551
+ print(in_tamil("௹"))
552
+ True
553
+ print(in_tamil("அ"))
554
+ True
555
+ print(in_tamil("ஔ"))
556
+ True
557
+ print(in_tamil("௩"))
558
+ True
559
+ print(in_tamil("ப்"))
560
+ True
561
+ print(in_tamil("கை"))
562
+ True
563
+ print(in_tamil("ஃ"))
564
+ True
565
+ print(in_tamil("௹"))
566
+ True
567
+ print(in_tamil("க்ஷ்"))
568
+ True
569
+ print(in_tamil("ஷி"))
570
+ True
571
+ print(in_tamil("ஶ்ரீ"))
572
+ True
573
+ print(in_tamil("我"))
574
+ True
575
+ print(in_tamil("ക"))
576
+ True
577
+ """
578
+ in_tamil_range = all(0x0B80 <= ord(char) <= 0x0BFF for char in unicodes)
579
+ if in_tamil_range:
580
+ return True
581
+ else:
582
+ return False
583
+
584
+ def english_specific(char):
585
+ """
586
+ it gives which kind in given letter is in tamil
587
+ Args:
588
+ letter(int):
589
+ Return:
590
+ bool:
591
+ eg:
592
+ print(english_specific("a"))
593
+ 'EN-LOW'
594
+ print(english_specific("B"))
595
+ 'EN-UPP'
596
+ print(english_specific("3"))
597
+ 'EN-NUM'
598
+ print(english_specific("?"))
599
+ 'TA-SYM'
600
+ print(english_specific("+"))
601
+ 'EN-SYM'
602
+ print(english_specific("ஃ"))
603
+ None
604
+ print(english_specific("௹"))
605
+ None
606
+ """
607
+ letter = None
608
+ code = ord(char)
609
+ if 0x41 <= code <= 0x5A:
610
+ letter = "EN-UPP"
611
+ elif 0x61 <= code <= 0x7A:
612
+ letter = "EN-LOW"
613
+ elif 0x30 <= code <= 0x39:
614
+ letter = "EN-NUM"
615
+ elif code < 0x80:
616
+ letter = "EN-SYM"
617
+ else:
618
+ letter = None
619
+ return letter
620
+
621
+ def tamil_specific(unicodes):
622
+ """
623
+ it gives which kind in given letter is in tamil
624
+ Args:
625
+ letter(int):
626
+ Return:
627
+ bool:
628
+ eg:
629
+
630
+ print(tamil_specific("அ"))
631
+ 'TA-VOL'
632
+ print(tamil_specific("ஔ"))
633
+ 'TA-VOL'
634
+ print(tamil_specific("௩"))
635
+ 'TA-NUM'
636
+ print(tamil_specific("ப்"))
637
+ 'TA-CON'
638
+ print(tamil_specific("கை"))
639
+ 'TA-COM'
640
+ print(tamil_specific("ஃ"))
641
+ 'TA-AUT'
642
+ print(tamil_specific("௹"))
643
+ 'TA-SYM'
644
+ print(tamil_specific("க்ஷ்"))
645
+ None
646
+ print(tamil_specific("ஷி"))
647
+ None
648
+ print(tamil_specific("ஶ்ரீ"))
649
+ None
650
+ print(tamil_specific("我"))
651
+ None
652
+ print(tamil_specific("ക"))
653
+ None
654
+ """
655
+ letter = None
656
+ if len(unicodes) == 1:
657
+ if is_vowel(unicodes):
658
+ return "TA-VOL"
659
+ elif unicodes in TA_ROOT_CONSONENT:
660
+ return "TA-CON"
661
+ elif 0x0BE6 <= ord(unicodes) <= 0x0BEF:
662
+ letter = "TA-NUM"
663
+ elif unicodes == "ஃ":
664
+ letter = "TA-AUT"
665
+ elif unicodes in TAMIL_SYMBOLS:
666
+ letter = "TA-SYM"
667
+ else:
668
+ letter = None
669
+ # Tamil Symbols
670
+ elif ta_consonent(unicodes):
671
+ return "TA-CON"
672
+ elif ta_composite(unicodes):
673
+ return "TA-COM"
674
+ else:
675
+ return None
676
+ return letter
677
+
678
+ def sanskrit_specific(unicodes):
679
+ """
680
+ it gives which kind in given letter is in sanskrit
681
+ Args:
682
+ letter(int):
683
+ Return:
684
+ bool:
685
+ eg:
686
+ print(sanskrit_specific("க்ஷ்"))
687
+ 'SA-CON'
688
+ print(sanskrit_specific("ஷி"))
689
+ 'SA-COM'
690
+ print(sanskrit_specific("ஶ்ரீ"))
691
+ 'TA-SYM'
692
+ print(sanskrit_specific("我"))
693
+ None
694
+ print(sanskrit_specific("ക"))
695
+ None
696
+ """
697
+ letter_type = None
698
+ if sa_consonent(unicodes):
699
+ letter_type = "SA-CON"
700
+ elif sa_composite(unicodes):
701
+ letter_type = "SA-COM"
702
+ elif unicodes == "ஶ்ரீ" :#TODO
703
+ letter_type = "SA-SYM"
704
+ else:
705
+ return None
706
+ return letter_type
707
+
708
+ def ta_consonent(unicodes):
709
+ """
710
+ it checks the given input is sanskrit composite?
711
+ Args:
712
+ letter(int):
713
+ Return:
714
+ bool:
715
+ eg:
716
+ print(ta_consonent("ப்"))
717
+ True
718
+ print(ta_consonent("க்ஷூ"))
719
+ False
720
+ print(ta_consonent("க்"))
721
+ True
722
+ print(ta_consonent("a"))
723
+ False
724
+ print(ta_consonent("ஶ்"))
725
+ False
726
+ print(ta_consonent("அ"))
727
+ False
728
+ """
729
+ if verify(unicodes):
730
+ if unicodes[-1] == "்" and unicodes[:-1] in TA_ROOT_CONSONENT:
731
+ return True
732
+ else:
733
+ return False
734
+ else:
735
+ return False
736
+
737
+ def ta_composite(unicodes):
738
+ """
739
+ it checks the given input is sanskrit composite?
740
+ Args:
741
+ letter(int):
742
+ Return:
743
+ bool:
744
+ eg:
745
+ print(ta_composite("பூ"))
746
+ True
747
+ print(ta_composite("க்ஷூ"))
748
+ False
749
+ print(ta_composite("க"))
750
+ True
751
+ print(ta_composite("a"))
752
+ False
753
+ print(ta_composite("ஶ்"))
754
+ True
755
+ print(ta_composite("அ"))
756
+ False
757
+ """
758
+ if verify(unicodes):
759
+ if unicodes[-1] in TAMIL_VOWEL_CHARS and unicodes[:-1] in TA_ROOT_CONSONENT or unicodes in TA_ROOT_CONSONENT:
760
+ return True
761
+ else:
762
+ return False
763
+ else:
764
+ return False
765
+
766
+
767
+ def sa_consonent(unicodes):
768
+ """
769
+ it checks the given input is sanskrit composite?
770
+ Args:
771
+ letter(int):
772
+ Return:
773
+ bool:
774
+ eg:
775
+ print(sa_consonent("க்ஷ்"))
776
+ True
777
+ print(sa_consonent("க்ஷூ"))
778
+ False
779
+ print(sa_consonent("க"))
780
+ False
781
+ print(sa_consonent("a"))
782
+ False
783
+ print(sa_consonent("ஶ்"))
784
+ True
785
+ print(sa_consonent("அ"))
786
+ False
787
+ """
788
+ if verify(unicodes):
789
+ if unicodes[-1] == "்" and unicodes[:-1] in SA_ROOT_LETTERS:
790
+ return True
791
+ else:
792
+ return False
793
+ else:
794
+ return False
795
+
796
+
797
+ def sa_composite(unicodes):
798
+ """
799
+ it checks the given input is sanskrit composite?
800
+ Args:
801
+ letter(int):
802
+ Return:
803
+ bool:
804
+ eg:
805
+ print(sa_composite("க்ஷ"))
806
+ True
807
+ print(sa_composite("க்ஷூ"))
808
+ True
809
+ print(sa_composite("க"))
810
+ False
811
+ print(sa_composite("a"))
812
+ False
813
+ print(sa_composite("ஶ்"))
814
+ False
815
+ print(sa_composite("அ"))
816
+ False
817
+ """
818
+ if verify(unicodes):
819
+ if unicodes[-1] != "்":
820
+ if unicodes[-1] in TAMIL_VOWEL_CHARS and unicodes[:-1] in SA_ROOT_LETTERS or unicodes in SA_ROOT_LETTERS:
821
+ return True
822
+ else:
823
+ return False
824
+ elif unicodes in SA_ROOT_LETTERS:
825
+ return True
826
+ else:
827
+ return False
828
+
829
+
830
+ def kind(unicodes):
831
+ """
832
+ it find's the give inputs kind it only works on only in tamil, english and sanskrit
833
+ Args:
834
+ letter(str):
835
+ Return:
836
+ letter(str):
837
+ eg:
838
+ print(kind("அ"))
839
+ 'TA-VOL'
840
+ print(kind("ஔ"))
841
+ 'TA-VOL'
842
+ print(kind("௩"))
843
+ 'TA-NUM'
844
+ print(kind("ப்"))
845
+ 'TA-CON'
846
+ print(kind("கை"))
847
+ 'TA-COM'
848
+ print(kind("ஃ"))
849
+ 'TA-AUT'
850
+ print(kind("௹"))
851
+ 'TA-SYM'
852
+ print(kind("க்ஷ்"))
853
+ 'SA-CON'
854
+ print(kind("ஷி"))
855
+ 'SA-COM'
856
+ print(kind("ஶ்ரீ"))
857
+ 'TA-SYM'
858
+ print(kind("我"))
859
+ 'UN-LAN'
860
+ print(kind("ക"))
861
+ 'UN-LAN'
862
+ """
863
+ letter = None
864
+ if is_en(unicodes):
865
+ letter = english_specific(unicodes)
866
+ elif in_tamil(unicodes):
867
+ tamil_kind = tamil_specific(unicodes)
868
+ if tamil_kind != None:
869
+ letter = tamil_kind
870
+ else:
871
+ letter = sanskrit_specific(unicodes)
872
+ else:
873
+ return "UN-LAN"
874
+ return letter
875
+
876
+
877
+
878
+
879
+ # numbers
880
+
881
+ en_to_ta_digit_dict = {
882
+ '0': "௦",
883
+ '1': "௧",
884
+ '2': "௨",
885
+ '3': "௩",
886
+ '4': "௪",
887
+ '5': "௫",
888
+ '6': "௬",
889
+ '7': "௭",
890
+ '8': "௮",
891
+ '9': "௯",
892
+ ".":'.'
893
+ }
894
+
895
+ def en_to_ta_integer(integer):
896
+ """
897
+ it convert's english numerical into tamil numericals
898
+ Args:
899
+ letter(int):
900
+ Return:
901
+ letter(str):
902
+ eg:
903
+ print(en_to_ta_integer(0))
904
+ '௧௧'
905
+ print(en_to_ta_integer(11))
906
+ '-௦'
907
+ print(en_to_ta_integer(-40))
908
+ '-௪௦'
909
+ print(en_to_ta_integer(249))
910
+ '௨௪௯'
911
+ print(en_to_ta_integer(81))
912
+ '௮௧'
913
+ print(en_to_ta_integer(-2))
914
+ '-௨'
915
+
916
+ """
917
+ sign = "" if integer > 0 else '-'
918
+ ta_num = ''.join(en_to_ta_digit_dict[d] for d in str(integer) if d in en_to_ta_digit_dict)
919
+ return sign + ta_num
920
+
921
+ ta_to_en_digit_dict = {
922
+ "௦": '0',
923
+ "௧": '1',
924
+ "௨": '2',
925
+ "௩": '3',
926
+ "௪": '4',
927
+ "௫": '5',
928
+ "௬": '6',
929
+ "௭": '7',
930
+ "௮": '8',
931
+ "௯": '9',
932
+ ".":'.'
933
+ }
934
+
935
+ def ta_to_en_integer(integer):
936
+ """
937
+ it convert's tamil numerical into english numericals
938
+ Args:
939
+ letter(str):
940
+ Return:
941
+ letter(int):
942
+ eg:
943
+ print(en_to_ta_integer('௧௧'))
944
+ 11
945
+ print(en_to_ta_integer('-௦'))
946
+ 0
947
+ print(en_to_ta_integer('-௪௦'))
948
+ -40
949
+ print(en_to_ta_integer('௨௪௯'))
950
+ 249
951
+ print(en_to_ta_integer('௮௧'))
952
+ 81
953
+ print(en_to_ta_integer('-௨'))
954
+ -2
955
+ """
956
+ if is_ta_numbers(integer):
957
+ sign = -1 if integer.startswith('-') > 0 else 1
958
+ en_num = ''.join(ta_to_en_digit_dict[d] for d in str(integer) if d in ta_to_en_digit_dict)
959
+ if '.' in integer:
960
+ return sign * float(en_num)
961
+ else:
962
+ return sign * int(en_num)
963
+ else:
964
+ raise ValueError("unexpected character in tamil numurals")
965
+
966
+ def is_ta_numbers(integer):
967
+ """
968
+ it convert's tamil numerical into english numericals
969
+ Args:
970
+ letter(str):
971
+ Return:
972
+ letter(int):
973
+ eg:
974
+ """
975
+ sign = slice(1,None) if integer.startswith('-') > 0 else slice(None,None)
976
+ return_bool = all([num in TAMIL_NUMURALS or num == "." for num in str(integer)[sign]])
977
+ return return_bool
978
+
979
+ def add(int1,int2):
980
+ """
981
+ it convert's tamil numerical into english numericals
982
+ Args:
983
+ letter(str):
984
+ Return:
985
+ letter(int):
986
+ eg:
987
+ print(add('௧௧','௧௧',))
988
+ '௨௨'
989
+ print(add('-௦','-௦',))
990
+ '-௦'
991
+ print(add('-௪௦','-௪௦',))
992
+ '-௮௦'
993
+ print(add('௪௯','௪௯',))
994
+ '௯௮'
995
+ print(add('௮௧','௮௧',))
996
+ '௧௬௨'
997
+ print(add('-௨','-௨',))
998
+ '-௪'
999
+ """
1000
+ return en_to_ta_integer( int(ta_to_en_integer(int1)) + int(ta_to_en_integer(int2)) )
1001
+
1002
+ def subract(int1,int2):
1003
+ """
1004
+ it convert's tamil numerical into english numericals
1005
+ Args:
1006
+ letter(str):
1007
+ Return:
1008
+ letter(int):
1009
+ eg:
1010
+ print(subract('௧௧','௧௧',))
1011
+ '-௦'
1012
+ print(subract('-௨','-௨',))
1013
+ '-௦'
1014
+ print(subract('-௪௦','-௪௦',))
1015
+ '-௦'
1016
+ print(subract('௪௯','௪௯',))
1017
+ '-௦'
1018
+ print(subract('௮௧','௮௧',))
1019
+ '-௦'
1020
+ print(subract('-௨','-௨',))
1021
+ '-௦'
1022
+ """
1023
+ return en_to_ta_integer( int(ta_to_en_integer(int1)) - int(ta_to_en_integer(int2)) )
1024
+
1025
+ def multiplay(int1,int2):
1026
+ """
1027
+ it convert's tamil numerical into english numericals
1028
+ Args:
1029
+ letter(str):
1030
+ Return:
1031
+ letter(int):
1032
+ eg:
1033
+ print(multiplay('௧௧','௧௧',))
1034
+ '௧௨௧'
1035
+ print(multiplay('-௨','-௨',))
1036
+ '௪'
1037
+ print(multiplay('-௪௦','-௪௦',))
1038
+ '௧௬௦௦'
1039
+ print(multiplay('௪௯','௪௯',))
1040
+ '௨௪௦௧'
1041
+ print(multiplay('௮௧','௮௧',))
1042
+ '௬௫௬௧'
1043
+ print(multiplay('-௨','-௨',))
1044
+ '௪'
1045
+ """
1046
+ return en_to_ta_integer( int(ta_to_en_integer(int1)) * int(ta_to_en_integer(int2)) )
1047
+
1048
+ def divide(int1,int2):
1049
+ """
1050
+ it convert's tamil numerical into english numericals
1051
+ Args:
1052
+ letter(str):
1053
+ Return:
1054
+ letter(int):
1055
+ eg:
1056
+ print(divide('௧௧','௧௧',))
1057
+ '௧'
1058
+ print(divide('-௨','-௨',))
1059
+ '௧'
1060
+ print(divide('-௪௦','-௪௦',))
1061
+ '௧'
1062
+ print(divide('௪௯','௪௯',))
1063
+ '௧'
1064
+ print(divide('௮௧','௮௧',))
1065
+ '௧'
1066
+ print(divide('-௨','-௨',))
1067
+ '௧'
1068
+
1069
+ """
1070
+ value = int(ta_to_en_integer(int1)) // int(ta_to_en_integer(int2))
1071
+ return en_to_ta_integer( value )
1072
+
1073
+ def flore_division(int1,int2):
1074
+ """
1075
+ it convert's tamil numerical into english numericals
1076
+ Args:
1077
+ letter(str):
1078
+ Return:
1079
+ letter(int):
1080
+ eg:
1081
+ print(flore_division('௧௧','௧௧',))
1082
+ '௧௦'
1083
+ print(flore_division('-௨','-௨',))
1084
+ '௧௦'
1085
+ print(flore_division('-௪௦','-௪௦',))
1086
+ '௧௦'
1087
+ print(flore_division('௪௯','௪௯',))
1088
+ '௧௦'
1089
+ print(flore_division('௮௧','௮௧',))
1090
+ '௧௦'
1091
+ print(flore_division('-௨','-௨',))
1092
+ '௧௦'
1093
+
1094
+ """
1095
+ return en_to_ta_integer( int(ta_to_en_integer(int1)) / int(ta_to_en_integer(int2)) )
1096
+
1097
+ def power(int1,int2):
1098
+ """
1099
+ it convert's tamil numerical into english numericals
1100
+ Args:
1101
+ letter(str):
1102
+ Return:
1103
+ letter(int):
1104
+ eg:
1105
+ print(power('௧','௧',))
1106
+ '௧'
1107
+ print(power('௨','௨',))
1108
+ '௪'
1109
+ print(power('௪','௪',))
1110
+ '௨௫௬'
1111
+ print(power('௯','௪',))
1112
+ '௬௫௬௧'
1113
+ print(power('௧','௮',))
1114
+ '௧'
1115
+ print(power('௪','௨',))
1116
+ '௧௬'
1117
+ """
1118
+ return en_to_ta_integer( int(ta_to_en_integer(int1)) ** int(ta_to_en_integer(int2)) )
1119
+
1120
+ def trange(start, stop=None, step=1):
1121
+ # If only one argument is given, it's the stop
1122
+ if stop is None:
1123
+ start, stop = 0, start
1124
+
1125
+ return [en_to_ta_integer(r) for r in range(start, stop, step)]