sqlobjects 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,644 @@
1
+ import re
2
+
3
+
4
+ __all__ = ["pluralize", "singularize", "is_plural"]
5
+
6
+
7
+ # Irregular plural forms - words that don't follow standard rules
8
+ IRREGULAR_PLURALS: dict[str, str] = {
9
+ # Person/People
10
+ "person": "people",
11
+ "man": "men",
12
+ "woman": "women",
13
+ "child": "children",
14
+ "human": "humans",
15
+ # Animals
16
+ "mouse": "mice",
17
+ "goose": "geese",
18
+ "foot": "feet",
19
+ "tooth": "teeth",
20
+ "ox": "oxen",
21
+ "louse": "lice",
22
+ # Special cases
23
+ "datum": "data",
24
+ "medium": "media",
25
+ "curriculum": "curricula",
26
+ "memorandum": "memoranda",
27
+ "bacterium": "bacteria",
28
+ "criterion": "criteria",
29
+ "phenomenon": "phenomena",
30
+ "analysis": "analyses",
31
+ "basis": "bases",
32
+ "crisis": "crises",
33
+ "diagnosis": "diagnoses",
34
+ "ellipsis": "ellipses",
35
+ "hypothesis": "hypotheses",
36
+ "oasis": "oases",
37
+ "parenthesis": "parentheses",
38
+ "synopsis": "synopses",
39
+ "thesis": "theses",
40
+ # Additional irregular forms
41
+ "die": "dice",
42
+ "penny": "pennies",
43
+ "quiz": "quizzes",
44
+ "genus": "genera",
45
+ "corpus": "corpora",
46
+ "opus": "opera",
47
+ "octopus": "octopi",
48
+ "platypus": "platypuses",
49
+ "hippopotamus": "hippopotamuses",
50
+ "rhinoceros": "rhinoceroses",
51
+ # Latin/Greek origins
52
+ "alumnus": "alumni",
53
+ "cactus": "cacti",
54
+ "focus": "foci",
55
+ "fungus": "fungi",
56
+ "nucleus": "nuclei",
57
+ "radius": "radii",
58
+ "stimulus": "stimuli",
59
+ "syllabus": "syllabi",
60
+ "terminus": "termini",
61
+ "index": "indices",
62
+ "matrix": "matrices",
63
+ "vertex": "vertices",
64
+ "appendix": "appendices",
65
+ # -f/-fe endings
66
+ "knife": "knives",
67
+ "life": "lives",
68
+ "wife": "wives",
69
+ "half": "halves",
70
+ "leaf": "leaves",
71
+ "loaf": "loaves",
72
+ "shelf": "shelves",
73
+ "thief": "thieves",
74
+ "wolf": "wolves",
75
+ "calf": "calves",
76
+ "elf": "elves",
77
+ "scarf": "scarves",
78
+ "wharf": "wharves",
79
+ }
80
+
81
+ # Words that are the same in singular and plural
82
+ UNCOUNTABLE_WORDS: set[str] = {
83
+ # Animals
84
+ "sheep",
85
+ "deer",
86
+ "fish",
87
+ "moose",
88
+ "swine",
89
+ "bison",
90
+ "buffalo",
91
+ "duck",
92
+ "pike",
93
+ "salmon",
94
+ "trout",
95
+ "squid",
96
+ "aircraft",
97
+ # Materials/Substances
98
+ "rice",
99
+ "wheat",
100
+ "corn",
101
+ "barley",
102
+ "oats",
103
+ "sugar",
104
+ "salt",
105
+ "water",
106
+ "milk",
107
+ "oil",
108
+ "butter",
109
+ "cheese",
110
+ "bread",
111
+ "meat",
112
+ "beef",
113
+ "pork",
114
+ "chicken",
115
+ "seafood",
116
+ # Abstract concepts
117
+ "information",
118
+ "news",
119
+ "advice",
120
+ "progress",
121
+ "research",
122
+ "knowledge",
123
+ "wisdom",
124
+ "intelligence",
125
+ "experience",
126
+ "evidence",
127
+ "proof",
128
+ "truth",
129
+ "justice",
130
+ "peace",
131
+ "happiness",
132
+ "sadness",
133
+ "anger",
134
+ "fear",
135
+ "love",
136
+ "hate",
137
+ "beauty",
138
+ "ugliness",
139
+ "strength",
140
+ "weakness",
141
+ # Activities/Fields
142
+ "homework",
143
+ "housework",
144
+ "work",
145
+ "employment",
146
+ "business",
147
+ "economics",
148
+ "mathematics",
149
+ "physics",
150
+ "chemistry",
151
+ "biology",
152
+ "history",
153
+ "geography",
154
+ "literature",
155
+ "music",
156
+ "art",
157
+ "politics",
158
+ "athletics",
159
+ "gymnastics",
160
+ # Materials
161
+ "gold",
162
+ "silver",
163
+ "copper",
164
+ "iron",
165
+ "steel",
166
+ "wood",
167
+ "paper",
168
+ "plastic",
169
+ "glass",
170
+ "cotton",
171
+ "wool",
172
+ "silk",
173
+ "leather",
174
+ # Weather
175
+ "weather",
176
+ "rain",
177
+ "snow",
178
+ "sunshine",
179
+ "wind",
180
+ "fog",
181
+ # Others
182
+ "equipment",
183
+ "furniture",
184
+ "luggage",
185
+ "baggage",
186
+ "clothing",
187
+ "jewelry",
188
+ "machinery",
189
+ "software",
190
+ "hardware",
191
+ "data",
192
+ "staff",
193
+ "personnel",
194
+ "police",
195
+ "cattle",
196
+ "poultry",
197
+ "scissors",
198
+ "glasses",
199
+ "pants",
200
+ "shorts",
201
+ "jeans",
202
+ "series",
203
+ "species",
204
+ "means",
205
+ "headquarters",
206
+ # Additional uncountable words
207
+ "traffic",
208
+ "feedback",
209
+ "merchandise",
210
+ "livestock",
211
+ "wildlife",
212
+ "offspring",
213
+ "crossroads",
214
+ }
215
+
216
+ # Pluralization rules in order of precedence
217
+ PLURALIZATION_RULES: list[tuple[str, str]] = [
218
+ # Words ending in -s, -ss, -sh, -ch, -x, -z
219
+ (r"(s|ss|sh|ch|x|z)$", r"\1es"),
220
+ # Words ending in consonant + y
221
+ (r"([bcdfghjklmnpqrstvwxz])y$", r"\1ies"),
222
+ # Words ending in vowel + y
223
+ (r"([aeiou])y$", r"\1ys"),
224
+ # Words ending in -f or -fe (not covered by irregular)
225
+ (r"([^aeiou])fe?$", r"\1ves"),
226
+ # Words ending in consonant + o
227
+ (r"([bcdfghjklmnpqrstvwxz])o$", r"\1oes"),
228
+ # Words ending in vowel + o
229
+ (r"([aeiou])o$", r"\1os"),
230
+ # Words ending in -us (Latin)
231
+ (r"us$", r"i"),
232
+ # Words ending in -is (Greek)
233
+ (r"is$", r"es"),
234
+ # Words ending in -on (Greek)
235
+ (r"on$", r"a"),
236
+ # Words ending in -um (Latin)
237
+ (r"um$", r"a"),
238
+ # Default rule: add -s
239
+ (r"$", r"s"),
240
+ ]
241
+
242
+ # Special cases for -o endings that take -s instead of -es
243
+ O_EXCEPTIONS: set[str] = {
244
+ "photo",
245
+ "piano",
246
+ "halo",
247
+ "solo",
248
+ "soprano",
249
+ "alto",
250
+ "disco",
251
+ "casino",
252
+ "studio",
253
+ "radio",
254
+ "stereo",
255
+ "video",
256
+ "audio",
257
+ "portfolio",
258
+ "scenario",
259
+ "embryo",
260
+ "memo",
261
+ "logo",
262
+ "ego",
263
+ "zero",
264
+ "auto",
265
+ "metro",
266
+ "macro",
267
+ "micro",
268
+ "retro",
269
+ "tempo",
270
+ "contralto",
271
+ }
272
+
273
+ # Words ending in -o that take -es (not -s)
274
+ O_ES_WORDS: set[str] = {
275
+ "hero",
276
+ "potato",
277
+ "tomato",
278
+ "echo",
279
+ "veto",
280
+ "torpedo",
281
+ "volcano",
282
+ "tornado",
283
+ "mosquito",
284
+ "buffalo",
285
+ "domino",
286
+ "mango",
287
+ "flamingo",
288
+ }
289
+
290
+ # Words ending in -f that take -s instead of -ves
291
+ F_EXCEPTIONS: set[str] = {
292
+ "roof",
293
+ "proof",
294
+ "chief",
295
+ "cliff",
296
+ "staff",
297
+ "golf",
298
+ "safe",
299
+ "belief",
300
+ "chef",
301
+ "reef",
302
+ "grief",
303
+ "brief",
304
+ "handkerchief",
305
+ }
306
+
307
+
308
+ def pluralize(word: str) -> str:
309
+ """
310
+ Convert a singular English word to its plural form.
311
+
312
+ Args:
313
+ word: The singular word to pluralize
314
+
315
+ Returns:
316
+ The plural form of the word
317
+
318
+ Examples:
319
+ >>> pluralize("cat")
320
+ 'cats'
321
+ >>> pluralize("child")
322
+ 'children'
323
+ >>> pluralize("mouse")
324
+ 'mice'
325
+ >>> pluralize("box")
326
+ 'boxes'
327
+ >>> pluralize("city")
328
+ 'cities'
329
+ """
330
+ if not word or not isinstance(word, str):
331
+ return word
332
+
333
+ # Clean the word
334
+ original_word = word
335
+ word = word.strip().lower()
336
+
337
+ if not word:
338
+ return original_word
339
+
340
+ # Check if already plural or uncountable
341
+ if is_plural(word) or word in UNCOUNTABLE_WORDS:
342
+ return original_word
343
+
344
+ # Check irregular plurals
345
+ if word in IRREGULAR_PLURALS:
346
+ plural = IRREGULAR_PLURALS[word]
347
+ return _preserve_case(original_word, plural)
348
+
349
+ # Apply pluralization rules
350
+ for pattern, replacement in PLURALIZATION_RULES:
351
+ if re.search(pattern, word):
352
+ # Special handling for -o endings
353
+ if pattern == r"([bcdfghjklmnpqrstvwxz])o$":
354
+ if word in O_EXCEPTIONS:
355
+ plural = word + "s"
356
+ elif word in O_ES_WORDS:
357
+ plural = word + "es"
358
+ else:
359
+ # Default for consonant + o: add -es
360
+ plural = re.sub(pattern, replacement, word)
361
+ # Special handling for -f endings
362
+ elif pattern == r"([^aeiou])fe?$":
363
+ if word in F_EXCEPTIONS or word.rstrip("e") in F_EXCEPTIONS:
364
+ plural = word + "s"
365
+ else:
366
+ plural = re.sub(pattern, replacement, word)
367
+ else:
368
+ plural = re.sub(pattern, replacement, word)
369
+
370
+ return _preserve_case(original_word, plural)
371
+
372
+ # Fallback: just add 's'
373
+ return original_word + "s"
374
+
375
+
376
+ def singularize(word: str) -> str:
377
+ """
378
+ Convert a plural English word to its singular form.
379
+
380
+ Args:
381
+ word: The plural word to singularize
382
+
383
+ Returns:
384
+ The singular form of the word
385
+
386
+ Examples:
387
+ >>> singularize("cats")
388
+ 'cat'
389
+ >>> singularize("children")
390
+ 'child'
391
+ >>> singularize("mice")
392
+ 'mouse'
393
+ """
394
+ if not word or not isinstance(word, str):
395
+ return word
396
+
397
+ original_word = word
398
+ word = word.strip().lower()
399
+
400
+ if not word:
401
+ return original_word
402
+
403
+ # Check if already singular or uncountable
404
+ if not is_plural(word) or word in UNCOUNTABLE_WORDS:
405
+ return original_word
406
+
407
+ # Check reverse irregular plurals
408
+ reverse_irregulars = {v: k for k, v in IRREGULAR_PLURALS.items()}
409
+ if word in reverse_irregulars:
410
+ singular = reverse_irregulars[word]
411
+ return _preserve_case(original_word, singular)
412
+
413
+ # Latin/Greek words that end in -a and should become -um
414
+ latin_um_words = {"datum", "medium", "curriculum", "memorandum", "bacterium", "stadium", "aquarium", "terrarium"}
415
+ # Latin/Greek words that end in -a and should become -on
416
+ greek_on_words = {"criterion", "phenomenon", "automaton"}
417
+
418
+ # Apply singularization rules (reverse of pluralization)
419
+ singularization_rules = [
420
+ # -ies -> -y (but not -eies)
421
+ (r"([bcdfghjklmnpqrstvwxz])ies$", r"\1y"),
422
+ # -ves -> -f/-fe
423
+ (r"([bcdfghjklmnpqrstvwxz])ves$", r"\1f"),
424
+ # -oes -> -o (but check exceptions)
425
+ (r"([bcdfghjklmnpqrstvwxz])oes$", r"\1o"),
426
+ # -es -> '' (for words ending in s, ss, sh, ch, x, z)
427
+ (r"(s|ss|sh|ch|x|z)es$", r"\1"),
428
+ # -i -> -us (Latin)
429
+ (r"i$", r"us"),
430
+ # -s -> '' (default)
431
+ (r"s$", r""),
432
+ ]
433
+
434
+ # Special handling for Latin/Greek -a endings
435
+ if word.endswith("a"):
436
+ base = word[:-1]
437
+ if base + "um" in latin_um_words:
438
+ return _preserve_case(original_word, base + "um")
439
+ elif base + "on" in greek_on_words:
440
+ return _preserve_case(original_word, base + "on")
441
+
442
+ for pattern, replacement in singularization_rules:
443
+ if re.search(pattern, word):
444
+ singular = re.sub(pattern, replacement, word)
445
+ return _preserve_case(original_word, singular)
446
+
447
+ return original_word
448
+
449
+
450
+ def is_plural(word: str) -> bool:
451
+ """
452
+ Check if a word is in plural form.
453
+
454
+ Args:
455
+ word: The word to check
456
+
457
+ Returns:
458
+ True if the word appears to be plural, False otherwise
459
+
460
+ Examples:
461
+ >>> is_plural("cats")
462
+ True
463
+ >>> is_plural("cat")
464
+ False
465
+ >>> is_plural("children")
466
+ True
467
+ """
468
+ if not word or not isinstance(word, str):
469
+ return False
470
+
471
+ word = word.strip().lower()
472
+
473
+ if not word:
474
+ return False
475
+
476
+ # Uncountable words are neither singular nor plural
477
+ if word in UNCOUNTABLE_WORDS:
478
+ return False
479
+
480
+ # Check if it's a known plural form
481
+ if word in IRREGULAR_PLURALS.values():
482
+ return True
483
+
484
+ # Check if it's a known singular form
485
+ if word in IRREGULAR_PLURALS:
486
+ return False
487
+
488
+ # Common singular words that end in 's' but are not plural
489
+ singular_s_words = {
490
+ "bus",
491
+ "gas",
492
+ "glass",
493
+ "class",
494
+ "pass",
495
+ "mass",
496
+ "grass",
497
+ "bass",
498
+ "kiss",
499
+ "miss",
500
+ "boss",
501
+ "loss",
502
+ "cross",
503
+ "dress",
504
+ "stress",
505
+ "press",
506
+ "chess",
507
+ "mess",
508
+ "less",
509
+ "guess",
510
+ "bless",
511
+ "process",
512
+ "success",
513
+ "address",
514
+ "access",
515
+ "express",
516
+ "progress",
517
+ "congress",
518
+ "princess",
519
+ "business",
520
+ "witness",
521
+ "fitness",
522
+ "illness",
523
+ "darkness",
524
+ "happiness",
525
+ "this",
526
+ "yes",
527
+ "us",
528
+ "plus",
529
+ "minus",
530
+ "focus",
531
+ "campus",
532
+ "virus",
533
+ "status",
534
+ "bonus",
535
+ "genus",
536
+ "census",
537
+ "chorus",
538
+ "circus",
539
+ }
540
+
541
+ # If word ends in 's' but is a known singular word, it's not plural
542
+ if word.endswith("s") and word in singular_s_words:
543
+ return False
544
+
545
+ # Apply heuristic rules
546
+ plural_patterns = [
547
+ r"[bcdfghjklmnpqrstvwxz]ies$", # cities, flies
548
+ r"[bcdfghjklmnpqrstvwxz]ves$", # knives, wolves
549
+ r"[bcdfghjklmnpqrstvwxz]oes$", # heroes, potatoes
550
+ r"(s|ss|sh|ch|x|z)es$", # boxes, dishes
551
+ r"[aeiou]ys$", # boys, keys
552
+ r"i$", # alumni, fungi
553
+ r"a$", # data, criteria
554
+ r"[^aeiou]s$", # general plural ending (but not after vowels to avoid false positives)
555
+ ]
556
+
557
+ for pattern in plural_patterns:
558
+ if re.search(pattern, word):
559
+ return True
560
+
561
+ return False
562
+
563
+
564
+ def _preserve_case(original: str, converted: str) -> str:
565
+ """
566
+ Preserve the case pattern of the original word in the converted word.
567
+
568
+ Args:
569
+ original: The original word with its case pattern
570
+ converted: The converted word in lowercase
571
+
572
+ Returns:
573
+ The converted word with the original's case pattern applied
574
+ """
575
+ if not original or not converted:
576
+ return converted
577
+
578
+ # If original is all uppercase
579
+ if original.isupper():
580
+ return converted.upper()
581
+
582
+ # If original is title case (first letter uppercase)
583
+ if original[0].isupper() and len(original) > 1 and original[1:].islower():
584
+ return converted.capitalize()
585
+
586
+ # If original has mixed case, try to preserve pattern
587
+ if any(c.isupper() for c in original):
588
+ result = list(converted.lower())
589
+ for i, char in enumerate(original):
590
+ if i < len(result) and char.isupper():
591
+ result[i] = result[i].upper()
592
+ return "".join(result)
593
+
594
+ # Default: return as lowercase
595
+ return converted
596
+
597
+
598
+ # Convenience functions for common use cases
599
+ def smart_pluralize(word: str, count: int) -> str:
600
+ """
601
+ Return singular or plural form based on count.
602
+
603
+ Args:
604
+ word: The base word
605
+ count: The count to determine singular/plural
606
+
607
+ Returns:
608
+ Singular form if count is 1, plural otherwise
609
+
610
+ Examples:
611
+ >>> smart_pluralize("cat", 1)
612
+ 'cat'
613
+ >>> smart_pluralize("cat", 2)
614
+ 'cats'
615
+ >>> smart_pluralize("cat", 0)
616
+ 'cats'
617
+ """
618
+ if count == 1:
619
+ return singularize(word) if is_plural(word) else word
620
+ else:
621
+ return pluralize(word) if not is_plural(word) else word
622
+
623
+
624
+ def format_count(word: str, count: int) -> str:
625
+ """
626
+ Format a count with the appropriate singular/plural form.
627
+
628
+ Args:
629
+ word: The base word
630
+ count: The count
631
+
632
+ Returns:
633
+ Formatted string with count and appropriate word form
634
+
635
+ Examples:
636
+ >>> format_count("cat", 1)
637
+ '1 cat'
638
+ >>> format_count("cat", 2)
639
+ '2 cats'
640
+ >>> format_count("mouse", 3)
641
+ '3 mice'
642
+ """
643
+ word_form = smart_pluralize(word, count)
644
+ return f"{count} {word_form}"