stringextn 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
stringextn/__init__.py ADDED
@@ -0,0 +1,7 @@
1
+ from .cases import *
2
+ from .contains import *
3
+ from .replace import *
4
+ from .clean import *
5
+ from .security import *
6
+ from .fuzzy import *
7
+ from .slug import *
stringextn/cases.py ADDED
@@ -0,0 +1,105 @@
1
+ import re
2
+
3
+ def to_snake(s: str) -> str:
4
+ """Convert a string to snake_case format.
5
+
6
+ Converts various string formats (camelCase, PascalCase, kebab-case, etc.)
7
+ to snake_case by inserting underscores before uppercase letters and
8
+ converting to lowercase.
9
+
10
+ Args:
11
+ s: The input string to convert.
12
+
13
+ Returns:
14
+ The string converted to snake_case format with all characters in lowercase
15
+ and words separated by underscores.
16
+
17
+ Raises:
18
+ None
19
+
20
+ Edge cases:
21
+ - Consecutive uppercase letters are treated individually.
22
+ - Spaces are converted to underscores.
23
+ - Empty string returns an empty string.
24
+ - Non-ASCII characters are preserved but not affected by case conversion.
25
+ """
26
+ s = re.sub(r'(.)([A-Z][a-z]+)', r'\1_\2', s)
27
+ s = re.sub(r'([a-z0-9])([A-Z])', r'\1_\2', s)
28
+ return s.replace(" ", "_").lower()
29
+
30
+ def to_camel(s: str) -> str:
31
+ """Convert a string to camelCase format.
32
+
33
+ Converts various string formats (snake_case, kebab-case, PascalCase, etc.)
34
+ to camelCase where the first word is lowercase and subsequent words are
35
+ title-cased without separators.
36
+
37
+ Args:
38
+ s: The input string to convert.
39
+
40
+ Returns:
41
+ The string converted to camelCase format with the first character
42
+ lowercase and subsequent words capitalized without separators.
43
+
44
+ Raises:
45
+ None
46
+
47
+ Edge cases:
48
+ - Empty string returns an empty string.
49
+ - String with only separators returns an empty string.
50
+ - Single word returns the word in lowercase.
51
+ - Separators recognized: underscores (_), hyphens (-), and spaces ( ).
52
+ """
53
+ parts = re.split(r'[_\-\s]', s)
54
+ return parts[0].lower() + "".join(p.title() for p in parts[1:])
55
+
56
+ def to_pascal(s: str) -> str:
57
+ """Convert a string to PascalCase format.
58
+
59
+ Converts various string formats (snake_case, kebab-case, camelCase, etc.)
60
+ to PascalCase where each word is title-cased and concatenated without
61
+ separators.
62
+
63
+ Args:
64
+ s: The input string to convert.
65
+
66
+ Returns:
67
+ The string converted to PascalCase format with the first character
68
+ and first character of each word capitalized without separators.
69
+
70
+ Raises:
71
+ None
72
+
73
+ Edge cases:
74
+ - Empty string returns an empty string.
75
+ - String with only separators returns an empty string.
76
+ - Single word returns the word with first character capitalized.
77
+ - Separators recognized: underscores (_), hyphens (-), and spaces ( ).
78
+ """
79
+ parts = re.split(r'[_\-\s]', s)
80
+ return "".join(p.title() for p in parts)
81
+
82
+ def to_kebab(s: str) -> str:
83
+ """Convert a string to kebab-case format.
84
+
85
+ Converts various string formats to kebab-case by first converting to
86
+ snake_case, then replacing underscores with hyphens. The result has all
87
+ lowercase letters with words separated by hyphens.
88
+
89
+ Args:
90
+ s: The input string to convert.
91
+
92
+ Returns:
93
+ The string converted to kebab-case format with all characters in lowercase
94
+ and words separated by hyphens.
95
+
96
+ Raises:
97
+ None
98
+
99
+ Edge cases:
100
+ - Consecutive uppercase letters are treated individually.
101
+ - Spaces are converted to hyphens.
102
+ - Empty string returns an empty string.
103
+ - Non-ASCII characters are preserved but not affected by case conversion.
104
+ """
105
+ return to_snake(s).replace("_", "-")
stringextn/clean.py ADDED
@@ -0,0 +1,135 @@
1
+ import re
2
+ import html
3
+ import unicodedata
4
+
5
+ EMOJI_PATTERN = re.compile(
6
+ "["
7
+ "\U0001F600-\U0001F64F"
8
+ "\U0001F300-\U0001F5FF"
9
+ "\U0001F680-\U0001F6FF"
10
+ "\U0001F1E0-\U0001F1FF"
11
+ "]+", flags=re.UNICODE
12
+ )
13
+
14
+ def remove_html(s: str) -> str:
15
+ """Remove all HTML tags from a string.
16
+
17
+ Removes any content enclosed in angle brackets (<...>) which represents
18
+ HTML/XML tags, leaving only the text content.
19
+
20
+ Args:
21
+ s: The input string potentially containing HTML tags.
22
+
23
+ Returns:
24
+ The string with all HTML tags removed, preserving the text content.
25
+
26
+ Raises:
27
+ None
28
+
29
+ Edge cases:
30
+ - Empty string returns an empty string.
31
+ - String with no HTML tags returns the original string unchanged.
32
+ - Malformed tags are handled by the greedy regex pattern.
33
+ - HTML entities (e.g., &lt;) are NOT unescaped; use html.unescape separately.
34
+ """
35
+ return re.sub(r'<.*?>', '', s)
36
+
37
+ def remove_emoji(s: str) -> str:
38
+ """Remove all emoji characters from a string.
39
+
40
+ Removes emoji characters in the Unicode ranges defined by EMOJI_PATTERN,
41
+ including emoticons, symbols, and flag sequences.
42
+
43
+ Args:
44
+ s: The input string potentially containing emoji characters.
45
+
46
+ Returns:
47
+ The string with all emoji characters removed.
48
+
49
+ Raises:
50
+ None
51
+
52
+ Edge cases:
53
+ - Empty string returns an empty string.
54
+ - String with no emoji returns the original string unchanged.
55
+ - Emoji in skin tone or zero-width-joiner sequences may not all be removed.
56
+ - Non-emoji Unicode characters are preserved.
57
+ """
58
+ return EMOJI_PATTERN.sub('', s)
59
+
60
+ def normalize_spaces(s: str) -> str:
61
+ """Normalize whitespace in a string.
62
+
63
+ Replaces consecutive whitespace characters (spaces, tabs, newlines, etc.)
64
+ with a single space and removes leading/trailing whitespace.
65
+
66
+ Args:
67
+ s: The input string with potentially irregular whitespace.
68
+
69
+ Returns:
70
+ The string with normalized whitespace: single spaces between words
71
+ and no leading or trailing whitespace.
72
+
73
+ Raises:
74
+ None
75
+
76
+ Edge cases:
77
+ - Empty string returns an empty string.
78
+ - String with only whitespace returns an empty string.
79
+ - Non-breaking spaces and other Unicode whitespace are treated as whitespace.
80
+ """
81
+ return re.sub(r'\s+', ' ', s).strip()
82
+
83
+ def normalize_unicode(s: str) -> str:
84
+ """Normalize Unicode characters to their canonical decomposed form.
85
+
86
+ Applies NFKD (Compatibility Decomposition) normalization, which decomposes
87
+ characters into their constituent parts and applies compatibility mappings.
88
+ Useful for handling accented characters and compatibility characters.
89
+
90
+ Args:
91
+ s: The input string with potentially non-normalized Unicode characters.
92
+
93
+ Returns:
94
+ The string with Unicode characters normalized to NFKD form.
95
+
96
+ Raises:
97
+ None
98
+
99
+ Edge cases:
100
+ - Empty string returns an empty string.
101
+ - ASCII-only strings are unchanged.
102
+ - Accented characters are decomposed into base character + combining marks.
103
+ - Some characters may be converted to different representations (e.g., ligatures).
104
+ """
105
+ return unicodedata.normalize("NFKD", s)
106
+
107
+ def clean_text(s: str) -> str:
108
+ """Perform comprehensive text cleaning on a string.
109
+
110
+ Applies a series of cleaning operations in sequence: HTML entity unescaping,
111
+ HTML tag removal, emoji removal, Unicode normalization, and whitespace
112
+ normalization. Provides a complete text sanitization pipeline.
113
+
114
+ Args:
115
+ s: The input string to clean.
116
+
117
+ Returns:
118
+ The cleaned string with HTML entities unescaped, tags removed, emoji
119
+ removed, Unicode normalized, and whitespace normalized.
120
+
121
+ Raises:
122
+ None
123
+
124
+ Edge cases:
125
+ - Empty string returns an empty string.
126
+ - Order of operations matters: HTML is processed before emoji and Unicode.
127
+ - HTML entities are decoded before tag removal (e.g., &lt;tag&gt; becomes <tag> then removed).
128
+ - The function calls remove_html, remove_emoji, normalize_unicode, and normalize_spaces internally.
129
+ """
130
+ s = html.unescape(s)
131
+ s = remove_html(s)
132
+ s = remove_emoji(s)
133
+ s = normalize_unicode(s)
134
+ s = normalize_spaces(s)
135
+ return s
stringextn/contains.py ADDED
@@ -0,0 +1,48 @@
1
+ def contains_any(s: str, items) -> bool:
2
+ """Check if a string contains any of the given items.
3
+
4
+ Returns True if the string contains at least one of the items in the
5
+ provided iterable. Uses substring matching for string items.
6
+
7
+ Args:
8
+ s: The string to search in.
9
+ items: An iterable of items to check for in the string.
10
+
11
+ Returns:
12
+ True if the string contains any of the items, False otherwise.
13
+
14
+ Raises:
15
+ TypeError: If items is not iterable.
16
+
17
+ Edge cases:
18
+ - Empty items iterable returns False.
19
+ - Empty string only returns True if items contains empty string.
20
+ - Case-sensitive substring matching.
21
+ - Matching is performed using the 'in' operator.
22
+ """
23
+ return any(i in s for i in items)
24
+
25
+ def contains_all(s: str, items) -> bool:
26
+ """Check if a string contains all of the given items.
27
+
28
+ Returns True if the string contains every item in the provided iterable.
29
+ Uses substring matching for string items. Order does not matter.
30
+
31
+ Args:
32
+ s: The string to search in.
33
+ items: An iterable of items to check for in the string.
34
+
35
+ Returns:
36
+ True if the string contains all of the items, False otherwise.
37
+
38
+ Raises:
39
+ TypeError: If items is not iterable.
40
+
41
+ Edge cases:
42
+ - Empty items iterable returns True.
43
+ - Empty string only returns True if items is empty.
44
+ - Case-sensitive substring matching.
45
+ - Order of items in the string does not matter.
46
+ - Matching is performed using the 'in' operator.
47
+ """
48
+ return all(i in s for i in items)
stringextn/fuzzy.py ADDED
@@ -0,0 +1,28 @@
1
+ from difflib import SequenceMatcher
2
+
3
+ def similarity(a: str, b: str) -> float:
4
+ """Calculate the similarity ratio between two strings.
5
+
6
+ Computes a similarity score between 0 and 1 using SequenceMatcher from
7
+ the difflib module. The ratio represents the proportion of matching
8
+ characters and sequences. Result is rounded to 3 decimal places.
9
+
10
+ Args:
11
+ a: The first string to compare.
12
+ b: The second string to compare.
13
+
14
+ Returns:
15
+ A float between 0 and 1 representing the similarity ratio, rounded to
16
+ 3 decimal places. 1.0 indicates identical strings, 0.0 indicates no similarity.
17
+
18
+ Raises:
19
+ None
20
+
21
+ Edge cases:
22
+ - Empty strings: two empty strings return 1.0 (identical).
23
+ - One empty string: similarity depends on the other string's length.
24
+ - Case-sensitive comparison: 'abc' and 'ABC' are treated as different.
25
+ - Whitespace is significant: leading/trailing spaces affect the result.
26
+ - The function uses longest contiguous matching subsequences for comparison.
27
+ """
28
+ return round(SequenceMatcher(None, a, b).ratio(), 3)
stringextn/replace.py ADDED
@@ -0,0 +1,30 @@
1
+ import re
2
+
3
+ def multi_replace(s: str, mapping: dict) -> str:
4
+ """Replace multiple substrings in a string using a mapping dictionary.
5
+
6
+ Performs simultaneous replacement of multiple substrings based on the
7
+ provided mapping dictionary. Uses compiled regex pattern for efficient
8
+ substitution. All keys are escaped to be treated as literal strings.
9
+
10
+ Args:
11
+ s: The input string to perform replacements on.
12
+ mapping: A dictionary where keys are substrings to find and values
13
+ are the replacements. Keys are treated as literal strings.
14
+
15
+ Returns:
16
+ The string with all mapped substrings replaced according to the mapping.
17
+
18
+ Raises:
19
+ None
20
+
21
+ Edge cases:
22
+ - Empty mapping dictionary returns the original string unchanged.
23
+ - Empty string returns an empty string.
24
+ - Empty keys in mapping are ignored by the regex pattern.
25
+ - Overlapping matches are not replaced multiple times; first match wins.
26
+ - All special regex characters in keys are escaped automatically.
27
+ - Order of replacements is determined by the order keys appear in the pattern.
28
+ """
29
+ pattern = re.compile("|".join(map(re.escape, mapping.keys())))
30
+ return pattern.sub(lambda m: mapping[m.group(0)], s)
stringextn/security.py ADDED
@@ -0,0 +1,48 @@
1
+ def mask_email(email: str) -> str:
2
+ """Mask an email address for privacy by hiding most of the local part.
3
+
4
+ Replaces most characters in the local part (before @) with asterisks,
5
+ keeping only the first character visible. The domain part remains unchanged.
6
+ Useful for displaying email addresses in logs or UI without full exposure.
7
+
8
+ Args:
9
+ email: A valid email address string containing exactly one @ symbol.
10
+
11
+ Returns:
12
+ The masked email with format: [first_char]***@[domain]
13
+
14
+ Raises:
15
+ ValueError: If the email does not contain exactly one @ symbol.
16
+ IndexError: If the local part (before @) is empty.
17
+
18
+ Edge cases:
19
+ - Single character email local part returns "***@domain".
20
+ - Email with no domain part after @ raises ValueError.
21
+ - Email with multiple @ symbols raises ValueError.
22
+ - No validation is performed on email format beyond @ requirement.
23
+ """
24
+ name, domain = email.split("@")
25
+ return name[0] + "***@" + domain
26
+
27
+ def mask_phone(phone: str) -> str:
28
+ """Mask a phone number for privacy by hiding all but the last 4 digits.
29
+
30
+ Replaces all but the final 4 characters with asterisks. Useful for displaying
31
+ phone numbers in logs or UI while maintaining minimal identifier information.
32
+
33
+ Args:
34
+ phone: A phone number string (any length, typically 10+ digits).
35
+
36
+ Returns:
37
+ The masked phone number with format: ****[last_4_chars]
38
+
39
+ Raises:
40
+ None
41
+
42
+ Edge cases:
43
+ - Phone number with 4 or fewer characters returns the original string unchanged.
44
+ - Phone number with 5 characters returns one asterisk plus last 4 chars.
45
+ - No validation is performed on phone format; any string is accepted.
46
+ - Special characters and spaces are preserved (treated as regular characters).
47
+ """
48
+ return "*" * (len(phone) - 4) + phone[-4:]
stringextn/slug.py ADDED
@@ -0,0 +1,33 @@
1
+ import re
2
+ from .clean import clean_text
3
+
4
+ def slugify(s: str) -> str:
5
+ """Convert a string to a URL-friendly slug format.
6
+
7
+ Converts a string into a slug suitable for URLs by cleaning text,
8
+ converting to lowercase, replacing non-alphanumeric characters with hyphens,
9
+ and removing leading/trailing hyphens. Useful for generating URL-safe identifiers
10
+ from titles or descriptions.
11
+
12
+ Args:
13
+ s: The input string to convert to a slug.
14
+
15
+ Returns:
16
+ A URL-friendly slug with lowercase alphanumeric characters separated
17
+ by hyphens, with no leading or trailing hyphens.
18
+
19
+ Raises:
20
+ None
21
+
22
+ Edge cases:
23
+ - Empty string returns an empty string.
24
+ - String with only special characters returns an empty string.
25
+ - Consecutive special characters are collapsed into a single hyphen.
26
+ - Leading/trailing hyphens are removed via strip.
27
+ - HTML tags and emoji are removed by clean_text.
28
+ - Unicode characters are normalized before conversion.
29
+ - Spaces are converted to hyphens as part of the non-alphanumeric replacement.
30
+ """
31
+ s = clean_text(s).lower()
32
+ s = re.sub(r'[^a-z0-9]+', '-', s)
33
+ return s.strip('-')
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Balaji Katta Venkatarathnam
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,170 @@
1
+ Metadata-Version: 2.1
2
+ Name: stringextn
3
+ Version: 1.0.1
4
+ Summary: Pragmatic string utilities for APIs and data cleaning
5
+ Author: Balaji Katta Venkatarathnam
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/balaji-kv/stringextn
8
+ Project-URL: Source, https://github.com/balaji-kv/stringextn
9
+ Project-URL: Issues, https://github.com/balaji-kv/stringextn/issues
10
+ Keywords: string,text,utilities,slug,mask,cleaning
11
+ Classifier: Development Status :: 5 - Production/Stable
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Topic :: Software Development :: Libraries
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Requires-Python: >=3.8
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+
25
+ # stringextn
26
+
27
+ A pragmatic, zero-dependency Python library for practical string manipulation and text cleaning. `stringextn` provides battle-tested utilities for case conversion, HTML/emoji removal, substring matching, fuzzy comparison, security masking, URL slug generation, and multi-string replacementโ€”all designed with real-world edge cases in mind.
28
+
29
+ ## Installation
30
+
31
+ Install via pip:
32
+
33
+ ```bash
34
+ pip install stringextn
35
+ ```
36
+
37
+ Requires Python 3.8 or higher. No external dependencies.
38
+
39
+ ## Quick Start
40
+
41
+ ```python
42
+ from stringextn import (
43
+ to_snake, to_camel, to_pascal, to_kebab,
44
+ clean_text, remove_html, remove_emoji,
45
+ contains_any, contains_all,
46
+ similarity,
47
+ multi_replace,
48
+ mask_email, mask_phone,
49
+ slugify
50
+ )
51
+
52
+ # Case conversion
53
+ to_snake("myVariableName") # "my_variable_name"
54
+ to_camel("my_variable_name") # "myVariableName"
55
+ to_pascal("my-variable-name") # "MyVariableName"
56
+ to_kebab("myVariableName") # "my-variable-name"
57
+
58
+ # Text cleaning
59
+ clean_text("<p>Hello &amp; goodbye!</p>") # "Hello & goodbye!"
60
+ remove_html("<div>Content</div>") # "Content"
61
+ remove_emoji("Hello ๐Ÿ‘‹ World ๐ŸŒ") # "Hello World "
62
+
63
+ # Substring matching
64
+ contains_any("hello world", ["world", "foo"]) # True
65
+ contains_all("hello world", ["hello", "world"]) # True
66
+
67
+ # Fuzzy string matching
68
+ similarity("kitten", "sitting") # 0.571
69
+
70
+ # Multi-replace
71
+ multi_replace("abc abc abc", {"a": "X", "b": "Y"}) # "XYc XYc XYc"
72
+
73
+ # Privacy masking
74
+ mask_email("user@example.com") # "u***@example.com"
75
+ mask_phone("5551234567") # "****1234"
76
+
77
+ # URL-safe slugs
78
+ slugify("Hello, World! โœจ") # "hello-world"
79
+ ```
80
+
81
+ ## Features
82
+
83
+ ### Case Conversion
84
+ - **`to_snake(s)`** โ€“ Converts to snake_case
85
+ - **`to_camel(s)`** โ€“ Converts to camelCase
86
+ - **`to_pascal(s)`** โ€“ Converts to PascalCase
87
+ - **`to_kebab(s)`** โ€“ Converts to kebab-case
88
+
89
+ Supports mixed input formats (camelCase, PascalCase, kebab-case, snake_case, space-separated).
90
+
91
+ ### Text Cleaning
92
+ - **`clean_text(s)`** โ€“ Comprehensive cleaning pipeline: HTML entity unescaping, tag removal, emoji removal, Unicode normalization, and whitespace normalization
93
+ - **`remove_html(s)`** โ€“ Strips HTML/XML tags
94
+ - **`remove_emoji(s)`** โ€“ Removes emoji characters
95
+ - **`normalize_spaces(s)`** โ€“ Collapses whitespace and trims
96
+ - **`normalize_unicode(s)`** โ€“ Applies NFKD normalization for consistent character representation
97
+
98
+ ### Substring Operations
99
+ - **`contains_any(s, items)`** โ€“ Returns True if string contains any item
100
+ - **`contains_all(s, items)`** โ€“ Returns True if string contains all items
101
+
102
+ Case-sensitive substring matching using Python's `in` operator.
103
+
104
+ ### Fuzzy Matching
105
+ - **`similarity(a, b)`** โ€“ Returns similarity score (0.0โ€“1.0) using difflib's SequenceMatcher
106
+ - 1.0 = identical strings
107
+ - 0.0 = no similarity
108
+ - Rounded to 3 decimal places
109
+
110
+ ### String Replacement
111
+ - **`multi_replace(s, mapping)`** โ€“ Performs simultaneous multi-string replacement
112
+ - All keys are treated as literal strings (regex special chars auto-escaped)
113
+ - Non-cascading: each substring is replaced exactly once
114
+
115
+ ### Security & Privacy
116
+ - **`mask_email(email)`** โ€“ Hides all but first character of email local part
117
+ - Format: `u***@example.com`
118
+ - Raises `ValueError` if email doesn't contain exactly one `@`
119
+ - **`mask_phone(phone)`** โ€“ Hides all but last 4 digits
120
+ - Format: `****1234`
121
+
122
+ ### URL Slugs
123
+ - **`slugify(s)`** โ€“ Generates URL-safe slugs
124
+ - Cleans text, lowercases, replaces non-alphanumeric with hyphens
125
+ - Strips leading/trailing hyphens
126
+ - Example: `"Hello, World! โœจ"` โ†’ `"hello-world"`
127
+
128
+ ## Performance & Behavior Notes
129
+
130
+ ### Unicode Handling
131
+ - **NFKD Normalization**: The `clean_text()` and `slugify()` functions apply NFKD (Compatibility Decomposition) normalization, which:
132
+ - Decomposes accented characters (รฉ โ†’ e + ยด)
133
+ - Applies compatibility mappings (๏ฌ โ†’ fi)
134
+ - Ensures consistent character representation across different input encodings
135
+ - Emoji removal uses Unicode ranges and handles most emoticons and symbols; complex emoji sequences (skin tones, zero-width-joiner) may not be fully removed
136
+ - Non-ASCII characters in `to_snake()` and `to_camel()` are preserved but not affected by case conversion
137
+
138
+ ### Edge Cases
139
+ - **Empty strings**: Most functions return empty strings; `contains_all("", [])` returns True (vacuous truth)
140
+ - **Whitespace**: Leading/trailing whitespace is preserved in case conversion; use `normalize_spaces()` first if needed
141
+ - **Consecutive separators**: `multi_replace()` and `slugify()` handle consecutive delimiters correctly (collapsed in slugs, replaced individually in multi_replace)
142
+ - **Special regex characters**: `multi_replace()` automatically escapes all regex special characters in mapping keys
143
+ - **Email masking**: No format validation; only checks for single `@` symbol
144
+ - **Phone masking**: Works with any string; no validation of format
145
+
146
+ ### Performance
147
+ - All functions use compiled regular expressions or built-in operations for efficiency
148
+ - No external dependencies; pure Python implementation
149
+ - Suitable for high-volume text processing in APIs and data pipelines
150
+
151
+ ## Testing
152
+
153
+ Run the test suite with pytest:
154
+
155
+ ```bash
156
+ pytest tests/
157
+ ```
158
+
159
+ ## License
160
+
161
+ MIT License. See LICENSE file for details.
162
+
163
+ ## Contributing
164
+
165
+ Contributions are welcome. Please ensure all tests pass and add tests for new functionality.
166
+
167
+ ---
168
+
169
+ **Package**: stringextn v1.0.0
170
+ **GitHub**: [stringextn](https://github.com/balaji-kv/stringextn)
@@ -0,0 +1,13 @@
1
+ stringextn/__init__.py,sha256=tFSBvaarjSukVEPA7SjThaJNnjRt0F5f8HCq5wFxau4,154
2
+ stringextn/cases.py,sha256=lQ_SkuW1_im1szFHBD2Fw4uKXbwMrpWg0JWNDZjVYgM,3419
3
+ stringextn/clean.py,sha256=b6aAf0v8TIUnaaTBgPo9o2H_Sd46prwEaR5sYtrVOVI,4311
4
+ stringextn/contains.py,sha256=yV91oA-qMC6FkY1NLDa_c_38ZOHhQPZEWPkBUQwouEc,1603
5
+ stringextn/fuzzy.py,sha256=OeSuNjEkaXs6SmhZ1AVOIl3ipUgA6NFQ9ZUdT9vomX0,1122
6
+ stringextn/replace.py,sha256=ylqCw2DoRWrbR6SEaNkVyKZRl58Y16BsC5Tf6YZaN5U,1276
7
+ stringextn/security.py,sha256=jQRV8lF6hwoKsX8MpiAgrmV9hPrwVdrjGl7xtfKWygc,1887
8
+ stringextn/slug.py,sha256=MYDmAGlm0N8et-Njnik-deVn45EZ9MrVD5wtLH2M4bE,1208
9
+ stringextn-1.0.1.dist-info/LICENSE,sha256=PWxsZvoTQzW_6Oo3dR9F4Xc3B0UzzMyC55PVfOQb5r8,1084
10
+ stringextn-1.0.1.dist-info/METADATA,sha256=9SDLfhOReJOC4geE1nCl-0Fh7FZEhlNYinhjJMkdwwU,6476
11
+ stringextn-1.0.1.dist-info/WHEEL,sha256=WnJ8fYhv8N4SYVK2lLYNI6N0kVATA7b0piVUNvqIIJE,91
12
+ stringextn-1.0.1.dist-info/top_level.txt,sha256=tCN4hz31Y52rDem2AoOHJKNVhD-RdPysvksM9HpJMXI,11
13
+ stringextn-1.0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (75.3.3)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ stringextn