tugaphone 0.0.2a1__py3-none-any.whl → 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tugaphone/version.py CHANGED
@@ -1,7 +1,7 @@
1
1
  # START_VERSION_BLOCK
2
2
  VERSION_MAJOR = 0
3
- VERSION_MINOR = 0
4
- VERSION_BUILD = 2
3
+ VERSION_MINOR = 1
4
+ VERSION_BUILD = 0
5
5
  VERSION_ALPHA = 1
6
6
  # END_VERSION_BLOCK
7
7
 
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.4
2
+ Name: tugaphone
3
+ Version: 0.1.0a1
4
+ Home-page: https://github.com/TigreGotico/tugaphone
5
+ Author: JarbasAi
6
+ Author-email: jarbasai@mailfence.com
7
+ Requires-Dist: brill-postagger
8
+ Requires-Dist: unicode-rbnf
9
+ Dynamic: author
10
+ Dynamic: author-email
11
+ Dynamic: home-page
12
+ Dynamic: requires-dist
@@ -0,0 +1,12 @@
1
+ tugaphone/__init__.py,sha256=7VnC2a-ou5Y0mYxHtHOSBsidiYFklZVRZVLr78tViVY,5282
2
+ tugaphone/lexicon.py,sha256=rwZQtU1mP7PboMu2480e9BSMr5J9oAgfEVdQX93iev0,6596
3
+ tugaphone/number_utils.py,sha256=XhU4gUxaX5Vz3hFoP-Vkx3C75hmoKGvwZ1jbOEbZAPA,14727
4
+ tugaphone/pos.py,sha256=d3rBcSUySjvynJcCLyDXKNy1sRK3LAzloImrEin9izw,5666
5
+ tugaphone/regional_dict.csv,sha256=173QZgoDrCYVlDIXDIq70fn56zJ9f3dfLfA95_kQvhY,49216388
6
+ tugaphone/syl.py,sha256=VtVP-BqFF3wnikBGxhBK9PkVHcqj6gE1tkrn62CNFnA,51050
7
+ tugaphone/tokenizer.py,sha256=5tgXuIS_aDe6xJwPPQ3rprBC4KX7Kh3J_AGcpdobUnM,137800
8
+ tugaphone/version.py,sha256=mTZ0SIYsV1IVEOlxa2pu3LKW4dZCFy4l30LadddrvaQ,237
9
+ tugaphone-0.1.0a1.dist-info/METADATA,sha256=3tPens-TOyUxkk37WCUnrEoV1PEGKbuL4nQFDBaVvIY,300
10
+ tugaphone-0.1.0a1.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
11
+ tugaphone-0.1.0a1.dist-info/top_level.txt,sha256=HJb1X0j2g_NMNmGEHF6IfJEPLPWseI3ShEwCYfYK0dk,10
12
+ tugaphone-0.1.0a1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.45.1)
2
+ Generator: setuptools (79.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
tugaphone/espeak.py DELETED
@@ -1,164 +0,0 @@
1
- """multilingual phonemizers"""
2
-
3
- import json
4
- import os
5
- from langcodes import tag_distance
6
- import subprocess
7
- from typing import List, Dict, Optional
8
-
9
- import numpy as np
10
- import onnxruntime
11
- import requests
12
-
13
-
14
- class EspeakError(Exception):
15
- """Custom exception for espeak-ng related errors."""
16
- pass
17
-
18
-
19
- class EspeakPhonemizer:
20
- """
21
- A phonemizer class that uses the espeak-ng command-line tool to convert text into phonemes.
22
- It segments the input text heuristically based on punctuation to mimic clause-by-clause processing.
23
- """
24
- ESPEAK_LANGS = ['es-419', 'ca', 'qya', 'ga', 'et', 'ky', 'io', 'fa-latn', 'en-gb', 'fo', 'haw', 'kl',
25
- 'ta', 'ml', 'gd', 'sd', 'es', 'hy', 'ur', 'ro', 'hi', 'or', 'ti', 'ca-va', 'om', 'tr', 'pa',
26
- 'smj', 'mk', 'bg', 'cv', "fr", 'fi', 'en-gb-x-rp', 'ru', 'mt', 'an', 'mr', 'pap', 'vi', 'id',
27
- 'fr-be', 'ltg', 'my', 'nl', 'shn', 'ba', 'az', 'cmn', 'da', 'as', 'sw',
28
- 'piqd', 'en-us', 'hr', 'it', 'ug', 'th', 'mi', 'cy', 'ru-lv', 'ia', 'tt', 'hu', 'xex', 'te', 'ne',
29
- 'eu', 'ja', 'bpy', 'hak', 'cs', 'en-gb-scotland', 'hyw', 'uk', 'pt', 'bn', 'mto', 'yue',
30
- 'be', 'gu', 'sv', 'sl', 'cmn-latn-pinyin', 'lfn', 'lv', 'fa', 'sjn', 'nog', 'ms',
31
- 'vi-vn-x-central', 'lt', 'kn', 'he', 'qu', 'ca-ba', 'quc', 'nb', 'sk', 'tn', 'py', 'si', 'de',
32
- 'ar', 'en-gb-x-gbcwmd', 'bs', 'qdb', 'sq', 'sr', 'tk', 'en-029', 'ht', 'ru-cl', 'af', 'pt-br',
33
- 'fr-ch', 'ka', 'en-gb-x-gbclan', 'ko', 'is', 'ca-nw', 'gn', 'kok', 'la', 'lb', 'am', 'kk', 'ku',
34
- 'kaa', 'jbo', 'eo', 'uz', 'nci', 'vi-vn-x-south', 'el', 'pl', 'grc', ]
35
-
36
- @classmethod
37
- def get_lang(cls, target_lang: str) -> str:
38
- """
39
- Validates and returns the closest supported language code.
40
-
41
- Args:
42
- target_lang (str): The language code to validate.
43
-
44
- Returns:
45
- str: The validated language code.
46
-
47
- Raises:
48
- ValueError: If the language code is unsupported.
49
- """
50
- if target_lang.lower() == "en-gb":
51
- return "en-gb-x-rp"
52
- if target_lang in cls.ESPEAK_LANGS:
53
- return target_lang
54
- if target_lang.lower().split("-")[0] in cls.ESPEAK_LANGS:
55
- return target_lang.lower().split("-")[0]
56
- return cls.match_lang(target_lang, cls.ESPEAK_LANGS)
57
-
58
- @staticmethod
59
- def match_lang(target_lang: str, valid_langs: List[str]) -> str:
60
- """
61
- Validates and returns the closest supported language code.
62
-
63
- Args:
64
- target_lang (str): The language code to validate.
65
-
66
- Returns:
67
- str: The validated language code.
68
-
69
- Raises:
70
- ValueError: If the language code is unsupported.
71
- """
72
- if target_lang in valid_langs:
73
- return target_lang
74
- best_lang = "und"
75
- best_distance = 10000000
76
- for l in valid_langs:
77
- try:
78
- distance: int = tag_distance(l, target_lang)
79
- except:
80
- try:
81
- l = f"{l.split('-')[0]}-{l.split('-')[1]}"
82
- distance: int = tag_distance(l, target_lang)
83
- except:
84
- try:
85
- distance: int = tag_distance(l.split('-')[0], target_lang)
86
- except:
87
- continue
88
- if distance < best_distance:
89
- best_lang, best_distance = l, distance
90
-
91
- # If the score is low (meaning a good match), return the language
92
- if best_distance <= 10:
93
- return best_lang
94
- # Otherwise, raise an error for unsupported language
95
- raise ValueError(f"unsupported language code: {target_lang}")
96
-
97
- @staticmethod
98
- def _run_espeak_command(args: List[str], input_text: str = None, check: bool = True) -> str:
99
- """
100
- Helper function to run espeak-ng commands via subprocess.
101
- Executes 'espeak-ng' with the given arguments and input text.
102
- Captures stdout and stderr, and raises EspeakError on failure.
103
-
104
- Args:
105
- args (List[str]): A list of command-line arguments for espeak-ng.
106
- input_text (str, optional): The text to pass to espeak-ng's stdin. Defaults to None.
107
- check (bool, optional): If True, raises a CalledProcessError if the command returns a non-zero exit code. Defaults to True.
108
-
109
- Returns:
110
- str: The stripped standard output from the espeak-ng command.
111
-
112
- Raises:
113
- EspeakError: If espeak-ng command is not found, or if the subprocess call fails.
114
- """
115
- command: List[str] = ['espeak-ng'] + args
116
- try:
117
- process: subprocess.CompletedProcess = subprocess.run(
118
- command,
119
- input=input_text,
120
- capture_output=True,
121
- text=True,
122
- check=check,
123
- encoding='utf-8',
124
- errors='replace' # Replaces unencodable characters with a placeholder
125
- )
126
- return process.stdout.strip()
127
- except FileNotFoundError:
128
- raise EspeakError(
129
- "espeak-ng command not found. Please ensure espeak-ng is installed "
130
- "and available in your system's PATH."
131
- )
132
- except subprocess.CalledProcessError as e:
133
- raise EspeakError(
134
- f"espeak-ng command failed with error code {e.returncode}:\n"
135
- f"STDOUT: {e.stdout}\n"
136
- f"STDERR: {e.stderr}"
137
- )
138
- except Exception as e:
139
- raise EspeakError(f"An unexpected error occurred while running espeak-ng: {e}")
140
-
141
- def phonemize(self, text: str, lang: str) -> str:
142
- lang = self.get_lang(lang)
143
- return self._run_espeak_command(
144
- ['-q', '-x', '--ipa', '-v', lang],
145
- input_text=text
146
- ).replace("\n", " . ")
147
-
148
-
149
-
150
-
151
- if __name__ == "__main__":
152
-
153
- espeak = EspeakPhonemizer()
154
-
155
- lang = "en-gb"
156
-
157
- text1 = "Hello, world. How are you?"
158
-
159
- print("\n--- Getting phonemes for 'Hello, world. How are you?' ---")
160
- phonemes1 = espeak.phonemize(text1, lang)
161
-
162
- print(f" Espeak Phonemes: {phonemes1}")
163
-
164
-