telugu-language-tools 4.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,108 @@
1
+ def count_telugu_chars(text):
2
+ """
3
+ Count the number of Telugu characters in a string.
4
+
5
+ Args:
6
+ text (str): Input text
7
+
8
+ Returns:
9
+ int: Number of Telugu characters
10
+ """
11
+ if text is None or not isinstance(text, str):
12
+ return 0
13
+ return sum(1 for ch in text if '\u0C00' <= ch <= '\u0C7F')
14
+
15
+
16
+ def count_english_chars(text):
17
+ """
18
+ Count the number of English characters in a string.
19
+
20
+ Args:
21
+ text (str): Input text
22
+
23
+ Returns:
24
+ int: Number of English characters (a-z, A-Z)
25
+ """
26
+ if text is None or not isinstance(text, str):
27
+ return 0
28
+ import re
29
+ return len(re.findall(r'[a-zA-Z]', text))
30
+
31
+
32
+ def count_digits(text):
33
+ """
34
+ Count the number of digits in a string.
35
+
36
+ Args:
37
+ text (str): Input text
38
+
39
+ Returns:
40
+ int: Number of digit characters
41
+ """
42
+ if text is None or not isinstance(text, str):
43
+ return 0
44
+ return sum(1 for ch in text if ch.isdigit())
45
+
46
+
47
+ def is_telugu_text(text):
48
+ """
49
+ Check if text contains Telugu characters.
50
+
51
+ Args:
52
+ text (str): Input text
53
+
54
+ Returns:
55
+ bool: True if text contains Telugu characters
56
+ """
57
+ if text is None or not isinstance(text, str):
58
+ return False
59
+ return any('\u0C00' <= ch <= '\u0C7F' for ch in text)
60
+
61
+
62
+ def split_telugu_words(text):
63
+ """
64
+ Split text into Telugu words.
65
+
66
+ Args:
67
+ text (str): Input text
68
+
69
+ Returns:
70
+ list: List of Telugu words
71
+ """
72
+ if text is None or not isinstance(text, str):
73
+ return []
74
+ import re
75
+ # Match Telugu characters
76
+ words = re.findall(r'[\u0C00-\u0C7F]+', text)
77
+ return words
78
+
79
+
80
+ def get_text_stats(text):
81
+ """
82
+ Get comprehensive statistics about the text.
83
+
84
+ Args:
85
+ text (str): Input text
86
+
87
+ Returns:
88
+ dict: Dictionary with text statistics
89
+ """
90
+ if text is None or not isinstance(text, str):
91
+ return {
92
+ 'total_chars': 0,
93
+ 'telugu_chars': 0,
94
+ 'english_chars': 0,
95
+ 'digits': 0,
96
+ 'telugu_words': 0,
97
+ 'is_telugu': False
98
+ }
99
+
100
+ stats = {
101
+ 'total_chars': len(text),
102
+ 'telugu_chars': count_telugu_chars(text),
103
+ 'english_chars': count_english_chars(text),
104
+ 'digits': count_digits(text),
105
+ 'telugu_words': len(split_telugu_words(text)),
106
+ 'is_telugu': is_telugu_text(text)
107
+ }
108
+ return stats