tamilstring 0.3.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tamilstring/__init__.py +3 -0
- tamilstring/constant.py +18 -0
- tamilstring/func.py +227 -0
- tamilstring/helper.py +395 -0
- tamilstring/utf8.py +150 -0
- tamilstring-0.3.25.dist-info/LICENCE +26 -0
- tamilstring-0.3.25.dist-info/METADATA +31 -0
- tamilstring-0.3.25.dist-info/RECORD +10 -0
- tamilstring-0.3.25.dist-info/WHEEL +5 -0
- tamilstring-0.3.25.dist-info/top_level.txt +1 -0
tamilstring/__init__.py
ADDED
tamilstring/constant.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
VOULES_LET = ["அ","ஆ","இ","ஈ","உ","ஊ","எ","ஏ","ஐ","ஒ","ஓ","ஔ"]
|
|
2
|
+
VOULES_SYM = ["ா", "ி", "ீ", "ு", "ூ", "ெ", "ே", "ை", "ொ", "ோ", "ௌ" ]
|
|
3
|
+
EXACT_VOULE = ["அ","ஆ","இ","ஈ","உ","ஊ","எ","ஏ","ஐ","ஒ","ஓ","ஔ"]
|
|
4
|
+
EXACT_CONSTANT =["க","ங","ச","ஞ","ட","ண","த","ந","ப","ம","ய","ர","ல","வ","ழ","ள","ற","ன"]
|
|
5
|
+
UTF_VOULE = ['்', "ா", "ி", "ீ", "ு", "ூ", "ெ", "ே", "ை", "ொ", "ோ", "ௌ" ]
|
|
6
|
+
AUTHAM = "ஃ"
|
|
7
|
+
sanskrit_letters = ["ஶ", "ஜ", "ஷ", "ஸ", "ஹ", "க்ஷ"]
|
|
8
|
+
sanskrit_mei_letters = ["ஶ்", "ஜ்", "ஷ்", "ஸ்", "ஹ்", "க்ஷ்"]
|
|
9
|
+
tamil_num = ["௦", "௧", "௨", "௩", "௪", "௫", "௬", "௭", "௮", "௯", "௰""௱","௲"]
|
|
10
|
+
tamil_sym=["௳","௴","௵","௶","௷","௹","௺"]
|
|
11
|
+
english_alp_sm = ["a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"]
|
|
12
|
+
english_alp_cp = ["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z"]
|
|
13
|
+
english_num = ["0","1","2","3","4","5","6","7","8","9"]
|
|
14
|
+
english_sym = [".",",","?",":",";"]
|
|
15
|
+
|
|
16
|
+
HARD_CONSTAND = ["க்","ச்","ட்","த்","ப்","ற்"]
|
|
17
|
+
SOFT_CONSTANT = ["ங்","ஞ்","ண்","ந்", "ம்","ன்"]
|
|
18
|
+
|
tamilstring/func.py
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
from .helper import String, Letter
|
|
2
|
+
from .constant import SOFT_CONSTANT,HARD_CONSTAND
|
|
3
|
+
|
|
4
|
+
letter_singleton_source = Letter(singleton = True)
|
|
5
|
+
letter_singleton_backup = letter_singleton_source.letter
|
|
6
|
+
|
|
7
|
+
string_singleton_source = String(singleton = True)
|
|
8
|
+
string_singleton_backup = string_singleton_source.string
|
|
9
|
+
|
|
10
|
+
def is_english(vowel):
|
|
11
|
+
take_letter_backup(vowel)
|
|
12
|
+
return_bool = True if letter_singleton_source.lang == "en" else False
|
|
13
|
+
restore_letter_backup()
|
|
14
|
+
return return_bool
|
|
15
|
+
|
|
16
|
+
def is_tamil(vowel):
|
|
17
|
+
take_letter_backup(vowel)
|
|
18
|
+
return_bool = True if letter_singleton_source.lang == "ta" else False
|
|
19
|
+
restore_letter_backup()
|
|
20
|
+
return return_bool
|
|
21
|
+
|
|
22
|
+
def is_sanskrit(vowel):
|
|
23
|
+
take_letter_backup(vowel)
|
|
24
|
+
return_bool = True if letter_singleton_source.lang == "sa" else False
|
|
25
|
+
restore_letter_backup()
|
|
26
|
+
return return_bool
|
|
27
|
+
|
|
28
|
+
def is_tamil_num(vowel):
|
|
29
|
+
take_letter_backup(vowel)
|
|
30
|
+
return_bool = True if letter_singleton_source.lang == "ta" and letter_singleton_source.kind == "NUM" else False
|
|
31
|
+
restore_letter_backup()
|
|
32
|
+
return return_bool
|
|
33
|
+
|
|
34
|
+
def is_english_num(vowel):
|
|
35
|
+
take_letter_backup(vowel)
|
|
36
|
+
return_bool = True if letter_singleton_source.lang == "en" and letter_singleton_source.kind == "NUM" else False
|
|
37
|
+
restore_letter_backup()
|
|
38
|
+
return return_bool
|
|
39
|
+
|
|
40
|
+
def is_tamil_char(vowel):
|
|
41
|
+
take_letter_backup(vowel)
|
|
42
|
+
return_bool = True if letter_singleton_source.kind == "CAR" else False
|
|
43
|
+
restore_letter_backup()
|
|
44
|
+
return return_bool
|
|
45
|
+
|
|
46
|
+
def is_vowel(vowel):
|
|
47
|
+
take_letter_backup(vowel)
|
|
48
|
+
return_bool = True if letter_singleton_source.is_voule else False
|
|
49
|
+
restore_letter_backup()
|
|
50
|
+
return return_bool
|
|
51
|
+
|
|
52
|
+
def is_constant(constant):
|
|
53
|
+
take_letter_backup(constant)
|
|
54
|
+
return_bool = True if letter_singleton_source.is_constant else False
|
|
55
|
+
restore_letter_backup()
|
|
56
|
+
return return_bool
|
|
57
|
+
|
|
58
|
+
def is_compound(compound):
|
|
59
|
+
take_letter_backup(compound)
|
|
60
|
+
return_bool = True if letter_singleton_source.is_compound else False
|
|
61
|
+
restore_letter_backup()
|
|
62
|
+
return return_bool
|
|
63
|
+
|
|
64
|
+
def is_aytham(aytham):
|
|
65
|
+
if aytham == "ஃ":
|
|
66
|
+
return True
|
|
67
|
+
else:
|
|
68
|
+
return False
|
|
69
|
+
|
|
70
|
+
def constant(letter):
|
|
71
|
+
take_letter_backup(letter)
|
|
72
|
+
return_constant = letter_singleton_source.constant
|
|
73
|
+
restore_letter_backup()
|
|
74
|
+
return return_constant
|
|
75
|
+
|
|
76
|
+
def vowel(letter):
|
|
77
|
+
take_letter_backup(letter)
|
|
78
|
+
return_constant = letter_singleton_source.voule
|
|
79
|
+
restore_letter_backup()
|
|
80
|
+
return return_constant
|
|
81
|
+
|
|
82
|
+
def hard_constant(letter):
|
|
83
|
+
take_letter_backup(letter)
|
|
84
|
+
constant_letter = letter_singleton_source.constant
|
|
85
|
+
return_constant = None
|
|
86
|
+
for related , constant in zip(HARD_CONSTAND,SOFT_CONSTANT):
|
|
87
|
+
if constant_letter == constant:
|
|
88
|
+
return_constant = related
|
|
89
|
+
break
|
|
90
|
+
restore_letter_backup()
|
|
91
|
+
return return_constant
|
|
92
|
+
|
|
93
|
+
def soft_constant(letter):
|
|
94
|
+
take_letter_backup(letter)
|
|
95
|
+
constant_letter = letter_singleton_source.constant
|
|
96
|
+
return_constant = None
|
|
97
|
+
for related , constant in zip(SOFT_CONSTANT,HARD_CONSTAND):
|
|
98
|
+
if constant_letter == constant:
|
|
99
|
+
return_constant = related
|
|
100
|
+
break
|
|
101
|
+
restore_letter_backup()
|
|
102
|
+
return return_constant
|
|
103
|
+
|
|
104
|
+
def get_constants(value,index=False):
|
|
105
|
+
take_string_backup(value)
|
|
106
|
+
return_list = []
|
|
107
|
+
for indces, letter in enumerate(string_singleton_source.capsules):
|
|
108
|
+
if letter[-1] == "CON":
|
|
109
|
+
return_list.append([indces,letter[1]])
|
|
110
|
+
restore_letter_backup()
|
|
111
|
+
if index:
|
|
112
|
+
return return_list
|
|
113
|
+
else:
|
|
114
|
+
return [ each[-1] for each in return_list ]
|
|
115
|
+
|
|
116
|
+
def get_vowels(value,index=False):
|
|
117
|
+
take_string_backup(value)
|
|
118
|
+
return_list = []
|
|
119
|
+
for indces, letter in enumerate(string_singleton_source.capsules):
|
|
120
|
+
if letter[-1] == "VOL":
|
|
121
|
+
return_list.append([indces,letter[1]])
|
|
122
|
+
restore_letter_backup()
|
|
123
|
+
if index:
|
|
124
|
+
return return_list
|
|
125
|
+
else:
|
|
126
|
+
return [ each[-1] for each in return_list ]
|
|
127
|
+
|
|
128
|
+
def get_compounds(value,index=False):
|
|
129
|
+
take_string_backup(value)
|
|
130
|
+
return_list = []
|
|
131
|
+
for indces, letter in enumerate(string_singleton_source.capsules):
|
|
132
|
+
if letter[-1] == "COM":
|
|
133
|
+
return_list.append([indces,letter[1]])
|
|
134
|
+
restore_letter_backup()
|
|
135
|
+
if index:
|
|
136
|
+
return return_list
|
|
137
|
+
else:
|
|
138
|
+
return [ each[-1] for each in return_list ]
|
|
139
|
+
|
|
140
|
+
def get_tamil(string,only=[]):
|
|
141
|
+
take_string_backup(string)
|
|
142
|
+
return_list = []
|
|
143
|
+
for letter in string_singleton_source.capsules:
|
|
144
|
+
if letter[0] == "ta":
|
|
145
|
+
if letter[-1] in only:
|
|
146
|
+
return_list.append(letter)
|
|
147
|
+
else:
|
|
148
|
+
return_list.append(letter)
|
|
149
|
+
restore_string_backup()
|
|
150
|
+
return capsule_letter(return_list)
|
|
151
|
+
|
|
152
|
+
def get_english(string,only=[]):
|
|
153
|
+
take_string_backup(string)
|
|
154
|
+
return_list = []
|
|
155
|
+
for letter in string_singleton_source.capsules:
|
|
156
|
+
if letter[0] == "en":
|
|
157
|
+
if letter[-1] in only:
|
|
158
|
+
return_list.append(letter)
|
|
159
|
+
else:
|
|
160
|
+
return_list.append(letter)
|
|
161
|
+
restore_string_backup()
|
|
162
|
+
return capsule_letter(return_list)
|
|
163
|
+
|
|
164
|
+
def get_sanskrit(string,only=[]):
|
|
165
|
+
take_string_backup(string)
|
|
166
|
+
return_list = []
|
|
167
|
+
for letter in string_singleton_source.capsules:
|
|
168
|
+
if letter[0] == "sa":
|
|
169
|
+
if letter[-1] in only:
|
|
170
|
+
return_list.append(letter)
|
|
171
|
+
else:
|
|
172
|
+
return_list.append(letter)
|
|
173
|
+
restore_string_backup()
|
|
174
|
+
return capsule_letter(return_list)
|
|
175
|
+
|
|
176
|
+
def get_tamil_numerals(string):
|
|
177
|
+
take_string_backup(string)
|
|
178
|
+
return_list = []
|
|
179
|
+
for letter in string_singleton_source.capsules:
|
|
180
|
+
if letter[0] == "ta":
|
|
181
|
+
if letter[-1] == "NUM":
|
|
182
|
+
return_list.append(letter)
|
|
183
|
+
restore_string_backup()
|
|
184
|
+
return capsule_letter(return_list)
|
|
185
|
+
|
|
186
|
+
def get_tamil_symbols(string):
|
|
187
|
+
take_string_backup(string)
|
|
188
|
+
return_list = []
|
|
189
|
+
for letter in string_singleton_source.capsules:
|
|
190
|
+
if letter[0] == "ta":
|
|
191
|
+
if letter[-1] == "CAR":
|
|
192
|
+
return_list.append(letter)
|
|
193
|
+
restore_string_backup()
|
|
194
|
+
return capsule_letter(return_list)
|
|
195
|
+
|
|
196
|
+
def capsule_letter(capsules):
|
|
197
|
+
if isinstance(capsules,tuple):
|
|
198
|
+
return capsules[1]
|
|
199
|
+
else:
|
|
200
|
+
return_letters = []
|
|
201
|
+
for letter in capsules:
|
|
202
|
+
return_letters.append(letter[1])
|
|
203
|
+
return return_letters
|
|
204
|
+
|
|
205
|
+
def take_letter_backup(value):
|
|
206
|
+
if isinstance(value, Letter):
|
|
207
|
+
letter_singleton_backup = value.letter
|
|
208
|
+
letter_singleton_source.letter = value
|
|
209
|
+
else:
|
|
210
|
+
if letter_singleton_source.letter != None:
|
|
211
|
+
letter_singleton_backup = letter_singleton_source.letter
|
|
212
|
+
letter_singleton_source.letter = value
|
|
213
|
+
|
|
214
|
+
def restore_letter_backup():
|
|
215
|
+
letter_singleton_source.letter =letter_singleton_backup
|
|
216
|
+
|
|
217
|
+
def take_string_backup(value):
|
|
218
|
+
if isinstance(value, String):
|
|
219
|
+
string_singleton_backup = value.string
|
|
220
|
+
string_singleton_source.string = value
|
|
221
|
+
else:
|
|
222
|
+
if string_singleton_source.string != None:
|
|
223
|
+
string_singleton_backup = string_singleton_source.string
|
|
224
|
+
string_singleton_source.string = value
|
|
225
|
+
|
|
226
|
+
def restore_string_backup():
|
|
227
|
+
string_singleton_source.string = string_singleton_backup
|
tamilstring/helper.py
ADDED
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
from . import utf8
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Letter:
|
|
5
|
+
_instance = None
|
|
6
|
+
def __new__(cls, *args, **kwargs):
|
|
7
|
+
singleton = kwargs.pop('singleton', False)
|
|
8
|
+
if singleton:
|
|
9
|
+
if cls._instance is None:
|
|
10
|
+
cls._instance = super().__new__(cls)
|
|
11
|
+
cls._instance._initialized = True
|
|
12
|
+
return cls._instance
|
|
13
|
+
else:
|
|
14
|
+
return super().__new__(cls)
|
|
15
|
+
|
|
16
|
+
def __init__(self, *args, **kwargs):
|
|
17
|
+
if not getattr(self, '_initialized', True):
|
|
18
|
+
super().__init__(*args, **kwargs)
|
|
19
|
+
self._initialized = True
|
|
20
|
+
if len(args) == 1:
|
|
21
|
+
args1 = args[0]
|
|
22
|
+
if isinstance(args1, str):
|
|
23
|
+
args1 = str(args1)
|
|
24
|
+
self.__root = utf8.get_letters(args1,capsule=True)[0]
|
|
25
|
+
else:
|
|
26
|
+
self.__root = None
|
|
27
|
+
self.output = kwargs.pop('obj', False)
|
|
28
|
+
|
|
29
|
+
def __add__(self,other):
|
|
30
|
+
if not isinstance(other,Letter):
|
|
31
|
+
other_letters = utf8.get_letters(other,capsule=True)
|
|
32
|
+
if len(other_letters) != 1:
|
|
33
|
+
other = take_string_backup(other)
|
|
34
|
+
else:
|
|
35
|
+
other = take_letter_backup(other)
|
|
36
|
+
if not isinstance(other, Letter):
|
|
37
|
+
if self.is_constant and other.singleton(0).is_voule:
|
|
38
|
+
other.string = utf8.make_compound(self.letter,other.singleton(0).letter) + other[1:]
|
|
39
|
+
return other if self.output else other.string
|
|
40
|
+
else:
|
|
41
|
+
other.string = self.letter+other[:]
|
|
42
|
+
return other if self.output else other.string
|
|
43
|
+
if self.is_constant and other.is_voule:
|
|
44
|
+
other = utf8.make_compound(self.letter,other.letter)
|
|
45
|
+
return Letter(other) if self.output else other
|
|
46
|
+
else:
|
|
47
|
+
other = self.letter + other.letter
|
|
48
|
+
return String(other) if self.output else other
|
|
49
|
+
|
|
50
|
+
def __sub__(self, other):
|
|
51
|
+
if not isinstance(other,Letter):
|
|
52
|
+
other_ = utf8.get_letters(other)
|
|
53
|
+
if len(other_) != 1:
|
|
54
|
+
raise ValueError("only tamil letter can be add.")
|
|
55
|
+
else:
|
|
56
|
+
other = Letter(other)
|
|
57
|
+
if self.is_compound:
|
|
58
|
+
if other.is_constant or other.is_voule:
|
|
59
|
+
if other.is_voule:
|
|
60
|
+
return Letter(self.constant) if self.output else self.constant
|
|
61
|
+
elif other.is_constant:
|
|
62
|
+
return Letter(self.voule) if self.output else self.voule
|
|
63
|
+
else:
|
|
64
|
+
raise ValueError("voule or constant can subract only from compound")
|
|
65
|
+
else:
|
|
66
|
+
raise ValueError("non compound kind can not subractable")
|
|
67
|
+
|
|
68
|
+
def __contains__(self, item):
|
|
69
|
+
if item in self.letter:
|
|
70
|
+
return True
|
|
71
|
+
else:
|
|
72
|
+
return False
|
|
73
|
+
|
|
74
|
+
def __str__(self):
|
|
75
|
+
return self.letter
|
|
76
|
+
|
|
77
|
+
def root(self,index):
|
|
78
|
+
if self.__root != None:
|
|
79
|
+
return self.__root[index]
|
|
80
|
+
else:
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def kind(self):
|
|
85
|
+
return self.root(-1)
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def lang(self):
|
|
89
|
+
return self.root(0)
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def letter(self):
|
|
93
|
+
return self.root(1)
|
|
94
|
+
|
|
95
|
+
@letter.setter
|
|
96
|
+
def letter(self, value):
|
|
97
|
+
if value != None:
|
|
98
|
+
self.__root = utf8.get_letters(value,capsule=True)[0]
|
|
99
|
+
else:
|
|
100
|
+
self.__root = None
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def capsule(self):
|
|
104
|
+
return self.__root
|
|
105
|
+
|
|
106
|
+
@capsule.setter
|
|
107
|
+
def capsule(self, value):
|
|
108
|
+
self.__root = value
|
|
109
|
+
# TODO to change _root sting
|
|
110
|
+
|
|
111
|
+
@property
|
|
112
|
+
def is_voule(self):
|
|
113
|
+
if self.kind == 'VOL':
|
|
114
|
+
return True
|
|
115
|
+
else:
|
|
116
|
+
return False
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def is_constant(self):
|
|
120
|
+
if self.kind == 'CON':
|
|
121
|
+
return True
|
|
122
|
+
else:
|
|
123
|
+
return False
|
|
124
|
+
|
|
125
|
+
@property
|
|
126
|
+
def is_compound(self):
|
|
127
|
+
if self.kind == 'COM':
|
|
128
|
+
return True
|
|
129
|
+
else:
|
|
130
|
+
return False
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def has_voule(self):
|
|
134
|
+
if self.kind == "VOL" or self.kind == "COM":
|
|
135
|
+
return True
|
|
136
|
+
else:
|
|
137
|
+
return False
|
|
138
|
+
|
|
139
|
+
@property
|
|
140
|
+
def has_constant(self):
|
|
141
|
+
if self.kind == "TA_CON" or self.kind == "COM":
|
|
142
|
+
return True
|
|
143
|
+
else:
|
|
144
|
+
return False
|
|
145
|
+
|
|
146
|
+
@property
|
|
147
|
+
def voule(self):
|
|
148
|
+
if self.kind == "VOL":
|
|
149
|
+
return self.letter
|
|
150
|
+
elif self.kind == "COM":
|
|
151
|
+
constant_ , voule_ = utf8.split_compound(self.letter)
|
|
152
|
+
return voule_
|
|
153
|
+
else:
|
|
154
|
+
return None
|
|
155
|
+
|
|
156
|
+
@property
|
|
157
|
+
def constant(self):
|
|
158
|
+
if self.kind == "CON":
|
|
159
|
+
return self.letter
|
|
160
|
+
elif self.kind == "COM":
|
|
161
|
+
constant_ , voule_ = utf8.split_compound(self.letter)
|
|
162
|
+
return constant_
|
|
163
|
+
else:
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
@property
|
|
167
|
+
def compound(self):
|
|
168
|
+
if self.kind == "COM":
|
|
169
|
+
return self.letter
|
|
170
|
+
else:
|
|
171
|
+
return None
|
|
172
|
+
|
|
173
|
+
@property
|
|
174
|
+
def split_letter(self):
|
|
175
|
+
return utf8.split_compound(self.letter)
|
|
176
|
+
|
|
177
|
+
def contain(self, other):
|
|
178
|
+
if len(other) > 2:
|
|
179
|
+
raise ValueError("it does not look like a seperate letter")
|
|
180
|
+
if not isinstance(other, Letter):
|
|
181
|
+
other = Letter(other)
|
|
182
|
+
if other.kind == self.kind:
|
|
183
|
+
if other.letter == self.letter:
|
|
184
|
+
return True
|
|
185
|
+
else:
|
|
186
|
+
return False
|
|
187
|
+
elif (other.is_compound and not self.is_compound):
|
|
188
|
+
return None
|
|
189
|
+
elif (self.is_compound and not other.is_compound) :
|
|
190
|
+
if other.letter in self.splitLetter:
|
|
191
|
+
return True
|
|
192
|
+
else:
|
|
193
|
+
return False
|
|
194
|
+
|
|
195
|
+
def get_match(self, other, output=False):
|
|
196
|
+
if not isinstance(other,Letter):
|
|
197
|
+
other = Letter(other)
|
|
198
|
+
output_value = (False,None,None)
|
|
199
|
+
if self.letter == other.letter:
|
|
200
|
+
output_value = (True,other.kind,"EXACT")
|
|
201
|
+
elif (other.is_compound and not self.is_compound):
|
|
202
|
+
if self.letter in other.split_letter[0]:
|
|
203
|
+
output_value = (True,other.kind,"CONTAINS")
|
|
204
|
+
elif (self.is_compound and not other.is_compound):
|
|
205
|
+
if other.letter == self.split_letter[1]:
|
|
206
|
+
output_value = (True,other.kind, "CONTAINS")
|
|
207
|
+
if output:
|
|
208
|
+
return output_value
|
|
209
|
+
else:
|
|
210
|
+
return output_value[0]
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
class String:
|
|
214
|
+
_instance = None
|
|
215
|
+
|
|
216
|
+
def __new__(cls, *args, **kwargs):
|
|
217
|
+
singleton = kwargs.pop('singleton', False)
|
|
218
|
+
if singleton:
|
|
219
|
+
if cls._instance is None:
|
|
220
|
+
cls._instance = super(String, cls).__new__(cls)
|
|
221
|
+
cls._instance._initialized = True
|
|
222
|
+
return cls._instance
|
|
223
|
+
else:
|
|
224
|
+
return super(String, cls).__new__(cls)
|
|
225
|
+
|
|
226
|
+
def __init__(self, *args, **kwargs):
|
|
227
|
+
if not getattr(self, '_initialized', True):
|
|
228
|
+
super(String, self).__init__(*args, **kwargs)
|
|
229
|
+
self._initialized = True
|
|
230
|
+
if len(args) == 1:
|
|
231
|
+
args1 = args[0]
|
|
232
|
+
if isinstance(args1, str):
|
|
233
|
+
args1 = str(args1)
|
|
234
|
+
self.__root = args1
|
|
235
|
+
else:
|
|
236
|
+
self.__root = None
|
|
237
|
+
self.output = kwargs.pop('obj', False)
|
|
238
|
+
self.position = 0
|
|
239
|
+
|
|
240
|
+
def __add__(self,other):
|
|
241
|
+
if not isinstance(other,String):
|
|
242
|
+
other = String(other)
|
|
243
|
+
if self.singleton(-1).is_constant and other.singleton(0).is_voule:
|
|
244
|
+
ret = String("".join(self.letters[:-1] + [ utf8.make_compound(self.letters[-1],
|
|
245
|
+
other.letters[0])] + other.letters[1:] ) )
|
|
246
|
+
return ret if self.output else ret.string
|
|
247
|
+
else:
|
|
248
|
+
ret = String("".join(self.letters + other.letters))
|
|
249
|
+
return ret if self.output else ret.string
|
|
250
|
+
|
|
251
|
+
def __sub__(self,other):
|
|
252
|
+
if not isinstance(other, Letter):
|
|
253
|
+
other = Letter(other)
|
|
254
|
+
if isinstance(other, Letter):
|
|
255
|
+
if self.singleton(-1).is_compound and ( other.is_voule or other.is_constant):
|
|
256
|
+
final_letter = self.singleton(-1).constant if other.kind == "VOL" else self.singleton(-1).voule
|
|
257
|
+
ret = String("".join( self.letters[:-1] ) + final_letter)
|
|
258
|
+
return ret if self.output else ret.string
|
|
259
|
+
else:
|
|
260
|
+
raise ValueError("can only subract string endings with voule or constant")
|
|
261
|
+
else:
|
|
262
|
+
raise ValueError("can only subract string endings with voule or constant")
|
|
263
|
+
|
|
264
|
+
@property
|
|
265
|
+
def letters(self):
|
|
266
|
+
return [l[1] for l in self.capsules]
|
|
267
|
+
|
|
268
|
+
@property
|
|
269
|
+
def capsules(self):
|
|
270
|
+
return utf8.get_letters(self.__root,capsule=True)
|
|
271
|
+
|
|
272
|
+
@property
|
|
273
|
+
def string(self):
|
|
274
|
+
return self.__root
|
|
275
|
+
|
|
276
|
+
@string.setter
|
|
277
|
+
def string(self,value):
|
|
278
|
+
self.__root = value
|
|
279
|
+
|
|
280
|
+
def has_contain(self, substring,):
|
|
281
|
+
if isinstance(substring, String):
|
|
282
|
+
subString = substring
|
|
283
|
+
else:
|
|
284
|
+
subString = String(substring)
|
|
285
|
+
matchValue, all_matches = [] ,[]
|
|
286
|
+
matchCount,tracer = 0,0
|
|
287
|
+
letter = Letter('ஆ',obj=True)
|
|
288
|
+
print(self.string,subString.string)
|
|
289
|
+
for index , letter_ in enumerate(self.capsules):
|
|
290
|
+
letter.capsule = letter_
|
|
291
|
+
if matchCount == len(subString.letters):
|
|
292
|
+
subString.position,matchCount= 0,0
|
|
293
|
+
all_matches.append((True,matchValue))
|
|
294
|
+
matchValue = []
|
|
295
|
+
tracer = index
|
|
296
|
+
checkMatch = letter.get_match(subString[subString.position],output=True )
|
|
297
|
+
if checkMatch[0]:
|
|
298
|
+
if checkMatch[-1] == "EXACT":
|
|
299
|
+
matchValue.append(letter_[1])
|
|
300
|
+
subString.position += 1
|
|
301
|
+
matchCount += 1
|
|
302
|
+
|
|
303
|
+
if checkMatch[-1] == "CONTAINS":
|
|
304
|
+
constant,voule = letter.split_letter
|
|
305
|
+
if checkMatch[1] == "VOL":
|
|
306
|
+
matchValue.append(voule)
|
|
307
|
+
if len(all_matches) != 0:
|
|
308
|
+
if all_matches[-1][0] == True:
|
|
309
|
+
all_matches.append((False,constant))
|
|
310
|
+
else:
|
|
311
|
+
all_matches[-1] = (False,all_matches[-1][0]+[constant])
|
|
312
|
+
subString.position += 1
|
|
313
|
+
matchCount += 1
|
|
314
|
+
else:
|
|
315
|
+
if index == tracer:
|
|
316
|
+
all_matches.append( (False,[l for l in self.letters[tracer:index+1]]) )
|
|
317
|
+
else:
|
|
318
|
+
all_matches[-1] = (False,[l for l in self.letters[tracer:index+1]])
|
|
319
|
+
self.position = index
|
|
320
|
+
return [(am[0],"".join(am[1]) ) for am in all_matches ]
|
|
321
|
+
|
|
322
|
+
def index_obj(self,index):
|
|
323
|
+
return Letter(self.letters[index])
|
|
324
|
+
|
|
325
|
+
def singleton(self,index,singleton = False):
|
|
326
|
+
return Letter(self.letters[index],singleton = True)
|
|
327
|
+
|
|
328
|
+
def letter(self,index):
|
|
329
|
+
return Letter(self.letters[index])
|
|
330
|
+
|
|
331
|
+
def __getitem__(self, index):
|
|
332
|
+
if isinstance(index, slice):
|
|
333
|
+
if self.string:
|
|
334
|
+
return "".join(self.letters[index.start:index.stop:index.step])
|
|
335
|
+
else:
|
|
336
|
+
return "".join(self.letters[index.start:index.stop:index.step])
|
|
337
|
+
else:
|
|
338
|
+
return self.letters[index]
|
|
339
|
+
|
|
340
|
+
def __setitem__(self, index, value):
|
|
341
|
+
if isinstance(index, slice):
|
|
342
|
+
start, stop, step = index.indices(len(self.letters))
|
|
343
|
+
previous_value = self.letters
|
|
344
|
+
if not isinstance(value, String):
|
|
345
|
+
other = String(value,singleton = True)
|
|
346
|
+
previous_value[start:stop:step] = other.letters
|
|
347
|
+
self.string = "".join(previous_value)
|
|
348
|
+
else:
|
|
349
|
+
previous_value = self.letters
|
|
350
|
+
previous_value[index] = value
|
|
351
|
+
self.string = "".join(previous_value)
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def __delattr__(self):
|
|
355
|
+
del self
|
|
356
|
+
|
|
357
|
+
def __iter__(self):
|
|
358
|
+
return iter(self.letters)
|
|
359
|
+
|
|
360
|
+
def __len__(self):
|
|
361
|
+
return len(self.letters)
|
|
362
|
+
|
|
363
|
+
def __contains__(self, other):
|
|
364
|
+
if not isinstance(other,str):
|
|
365
|
+
other = str(other)
|
|
366
|
+
if self.__root in other:
|
|
367
|
+
return True
|
|
368
|
+
else:
|
|
369
|
+
return False
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
letter_singleton_source = Letter(singleton = True)
|
|
373
|
+
letter_singleton_backup = None
|
|
374
|
+
|
|
375
|
+
def take_letter_backup(letter):
|
|
376
|
+
if letter_singleton_source.letter != None:
|
|
377
|
+
letter_singleton_backup = letter_singleton_source.letter
|
|
378
|
+
letter_singleton_source.letter = letter
|
|
379
|
+
return letter_singleton_source
|
|
380
|
+
|
|
381
|
+
def restore_letter_backup():
|
|
382
|
+
letter_singleton_source.letter = letter_singleton_backup
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
string_singleton_source = String(singleton = True)
|
|
386
|
+
string_singleton_backup = None
|
|
387
|
+
|
|
388
|
+
def take_string_backup(sting):
|
|
389
|
+
if string_singleton_source.string != None:
|
|
390
|
+
string_singleton_backup = string_singleton_source.string
|
|
391
|
+
string_singleton_source.string = sting
|
|
392
|
+
return string_singleton_source
|
|
393
|
+
|
|
394
|
+
def restore_string_backup():
|
|
395
|
+
string_singleton_source.string = string_singleton_backup
|
tamilstring/utf8.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
from .constant import VOULES_LET,VOULES_SYM
|
|
2
|
+
|
|
3
|
+
def english_range(code,specify=False):
|
|
4
|
+
letter = None
|
|
5
|
+
if 0x0000 <= code <= 0x007E:
|
|
6
|
+
if 0x41 <= code <= 0x5A:
|
|
7
|
+
letter = "UPP"
|
|
8
|
+
elif 0x61 <= code <= 0x7A:
|
|
9
|
+
letter = "LOW"
|
|
10
|
+
elif 0x30 <= code <= 0x39:
|
|
11
|
+
letter = "NUM"
|
|
12
|
+
elif code < 0x80:
|
|
13
|
+
letter = "SYM"
|
|
14
|
+
if specify:
|
|
15
|
+
return letter if letter != None else False
|
|
16
|
+
else:
|
|
17
|
+
return True if letter != None else False
|
|
18
|
+
|
|
19
|
+
def in_range(code):
|
|
20
|
+
if 0x0B80 <= code <= 0x0BFF:
|
|
21
|
+
return True
|
|
22
|
+
else:
|
|
23
|
+
return False
|
|
24
|
+
|
|
25
|
+
def tamil_range(code,specify=False):
|
|
26
|
+
letter = None
|
|
27
|
+
if in_range(code):
|
|
28
|
+
if code == 0x0BCD or code == 0x0B82:
|
|
29
|
+
letter = "CON"
|
|
30
|
+
elif 0x0B85 <= code <= 0x0B94:
|
|
31
|
+
letter = "VOL"
|
|
32
|
+
elif 0x0B95 <= code <= 0x0BB9:
|
|
33
|
+
if code == 0x0B9C or 0x0BB6 <= code <=0x0BB9:
|
|
34
|
+
pass
|
|
35
|
+
else:
|
|
36
|
+
letter = "COM"
|
|
37
|
+
elif 0x0BBE <= code <= 0x0BCC:
|
|
38
|
+
letter = "UNI"
|
|
39
|
+
elif 0x0BE6 <= code <= 0x0BEF:
|
|
40
|
+
letter = "NUM"
|
|
41
|
+
elif 0x0BF1 <= code <= 0x0BFA:
|
|
42
|
+
letter = "CAR"
|
|
43
|
+
elif code == 0x0B83:
|
|
44
|
+
letter = "AUT"
|
|
45
|
+
|
|
46
|
+
if specify:
|
|
47
|
+
return letter if letter != None else False
|
|
48
|
+
else:
|
|
49
|
+
return True if letter != None else False
|
|
50
|
+
|
|
51
|
+
def sanskrit_range(code,specify=False):
|
|
52
|
+
letter = None
|
|
53
|
+
if in_range(code):
|
|
54
|
+
if code == 0x0B9C or 0x0BB6 <= code <=0x0BB9:
|
|
55
|
+
letter = "COM"
|
|
56
|
+
if not specify:
|
|
57
|
+
return letter
|
|
58
|
+
else:
|
|
59
|
+
return True if letter != None else False
|
|
60
|
+
|
|
61
|
+
def get_letters(string,capsule=False,error=True):
|
|
62
|
+
rt = []
|
|
63
|
+
previous_compound = None
|
|
64
|
+
for index,char in enumerate(string):
|
|
65
|
+
code = ord(char )
|
|
66
|
+
|
|
67
|
+
english = english_range(code,specify=True)
|
|
68
|
+
if english != False:
|
|
69
|
+
rt.append(("en",char,english))
|
|
70
|
+
continue
|
|
71
|
+
|
|
72
|
+
tamil = tamil_range(code,specify=True)
|
|
73
|
+
if tamil != False:
|
|
74
|
+
if tamil == "VOL":
|
|
75
|
+
rt.append(("ta",char,"VOL"))
|
|
76
|
+
elif tamil == "UNI":
|
|
77
|
+
last = rt[-1] if rt else None
|
|
78
|
+
if last != None:
|
|
79
|
+
if last[-1] == "COM":
|
|
80
|
+
rt[-1] = (last[0] ,last[1]+char, "COM")
|
|
81
|
+
if last[1] == "ஶ்ர":
|
|
82
|
+
rt[-1] = (last[0] ,last[1]+char, "SYM")
|
|
83
|
+
elif tamil == "CON":
|
|
84
|
+
last = rt[-1] if rt else None
|
|
85
|
+
if last != None:
|
|
86
|
+
if last[-1] == "COM":
|
|
87
|
+
rt[-1] = (last[0] ,last[1]+char, "CON")
|
|
88
|
+
elif code == 0x0BB0:
|
|
89
|
+
last = rt[-1] if rt else None
|
|
90
|
+
if last != None:
|
|
91
|
+
if last[1] == "ஶ்":
|
|
92
|
+
rt[-1] = ("sa" ,last[1]+char, "SYM")
|
|
93
|
+
else:
|
|
94
|
+
rt.append(("ta",char,tamil ))
|
|
95
|
+
else:
|
|
96
|
+
rt.append(("ta",char,tamil ))
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
sanskrit = sanskrit_range(code,specify=True)
|
|
100
|
+
if sanskrit != None:
|
|
101
|
+
if code == 0x0BB7:
|
|
102
|
+
last = rt[-1] if rt else None
|
|
103
|
+
if last != None:
|
|
104
|
+
if last[1] == "க்":
|
|
105
|
+
rt[-1] = ("sa" ,last[1]+char, "COM")
|
|
106
|
+
else:
|
|
107
|
+
rt.append( ("sa",char,"COM" ) )
|
|
108
|
+
else:
|
|
109
|
+
rt.append( ("sa",char,"COM" ) )
|
|
110
|
+
else:
|
|
111
|
+
rt.append(("~",char,"UNK"))
|
|
112
|
+
|
|
113
|
+
if capsule:
|
|
114
|
+
return rt
|
|
115
|
+
else:
|
|
116
|
+
return [ letter[1] for letter in rt]
|
|
117
|
+
|
|
118
|
+
def remove_voule(letter):
|
|
119
|
+
pass
|
|
120
|
+
|
|
121
|
+
def split_compound(letter, strict=True):
|
|
122
|
+
letter = get_letters(letter, capsule=True)[0]
|
|
123
|
+
if letter[-1] == "COM":
|
|
124
|
+
for voule_let, voule_sym in zip(VOULES_LET[1:],VOULES_SYM):
|
|
125
|
+
if letter[1][-1] == voule_sym:
|
|
126
|
+
return (letter[1][:-1]+'்', voule_let)
|
|
127
|
+
else:
|
|
128
|
+
return (letter[1]+'்', "அ")
|
|
129
|
+
else:
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
def make_compound(letter1,letter2):
|
|
133
|
+
letter1 = get_letters(letter1, capsule=True)
|
|
134
|
+
letter2 = get_letters(letter2, capsule=True)
|
|
135
|
+
if len(letter1) == 1 and len(letter2) == 1:
|
|
136
|
+
letter1 , letter2 = letter1[0] , letter2[0]
|
|
137
|
+
if (letter1[-1] == "CON" and letter2[-1] == "VOL") or (letter1[-1] == "VOL" and letter2[-1] == "CON"):
|
|
138
|
+
constant = letter1 if letter1[-1] == "CON" else letter2
|
|
139
|
+
constant = constant[1][:-1]
|
|
140
|
+
voule = letter1 if letter1[-1] == "VOL" else letter2
|
|
141
|
+
voule = voule[1][-1]
|
|
142
|
+
for voule_let, voule_sym in zip(VOULES_LET[1:],VOULES_SYM):
|
|
143
|
+
if voule == voule_let:
|
|
144
|
+
return constant + voule_sym
|
|
145
|
+
else:
|
|
146
|
+
return constant
|
|
147
|
+
else:
|
|
148
|
+
raise ValueError("unjoinable letter types")
|
|
149
|
+
else:
|
|
150
|
+
raise ValueError("more then two letter in any one parameter")
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
|
|
2
|
+
### 4. Add a `LICENSE`
|
|
3
|
+
Choose a license for your project and add a `LICENSE` file. For example, if you choose the MIT License, your `LICENSE` file might look like this:
|
|
4
|
+
|
|
5
|
+
```text
|
|
6
|
+
MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) [year] [fullname]
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: tamilstring
|
|
3
|
+
Version: 0.3.25
|
|
4
|
+
Summary: tamilstring helps to handle tamil unicode characters lot more easier
|
|
5
|
+
Home-page: https://gitlab.com/boopalan-dev/tamilstring
|
|
6
|
+
Author: boopalan
|
|
7
|
+
Author-email: contact.boopalan@gmail.com
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.6
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENCE
|
|
14
|
+
Provides-Extra: dev
|
|
15
|
+
Requires-Dist: pytest>=6.0; extra == "dev"
|
|
16
|
+
Dynamic: author
|
|
17
|
+
Dynamic: author-email
|
|
18
|
+
Dynamic: classifier
|
|
19
|
+
Dynamic: description
|
|
20
|
+
Dynamic: description-content-type
|
|
21
|
+
Dynamic: home-page
|
|
22
|
+
Dynamic: provides-extra
|
|
23
|
+
Dynamic: requires-python
|
|
24
|
+
Dynamic: summary
|
|
25
|
+
|
|
26
|
+
# TamilString
|
|
27
|
+
|
|
28
|
+
tamilstring helps to handle tamil unicode characters lot more easier.
|
|
29
|
+
|
|
30
|
+
raise you issiues here => https://gitlab.com/boopalan-dev/tamilstring
|
|
31
|
+
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
tamilstring/__init__.py,sha256=DJloBmU0XnBf-3XH_RD5rtoYHwWEqzRW_LzlRq-xEao,66
|
|
2
|
+
tamilstring/constant.py,sha256=zxyD3AtyF0sQY5RiN56up7Hf1MTLFMwhU-B99gM7rIo,1305
|
|
3
|
+
tamilstring/func.py,sha256=nQmk8Dl_BBmbzqxAwvjQxxJhGgfiBakhaoCyN4_hwww,7125
|
|
4
|
+
tamilstring/helper.py,sha256=rwT_0R6So4DSbTO0XI0RuwW2bDFLb-I-CBcwG6reWas,13161
|
|
5
|
+
tamilstring/utf8.py,sha256=S6vSvwVvHD3TCPrtCF36937mve2zCrDUic4CnoDcgUs,4946
|
|
6
|
+
tamilstring-0.3.25.dist-info/LICENCE,sha256=AAxMH13pEByqrNic6bn1_UTH-u8QSNfjTdRgleNTd9Y,1247
|
|
7
|
+
tamilstring-0.3.25.dist-info/METADATA,sha256=I9Z-RmjxAXcov57eHGRSWg1fOoSSbNKDEhkN1qii-Rk,897
|
|
8
|
+
tamilstring-0.3.25.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
9
|
+
tamilstring-0.3.25.dist-info/top_level.txt,sha256=rS4SPFfzZjjtC3Cd-q0hatYgminewYspKC0xjimajHI,12
|
|
10
|
+
tamilstring-0.3.25.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
tamilstring
|