teleutils 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- teleutils-0.0.1/PKG-INFO +101 -0
- teleutils-0.0.1/README.md +89 -0
- teleutils-0.0.1/pyproject.toml +40 -0
- teleutils-0.0.1/src/teleutils/__init__.py +2 -0
- teleutils-0.0.1/src/teleutils/_logging.py +42 -0
- teleutils-0.0.1/src/teleutils/preprocessing/__init__.py +6 -0
- teleutils-0.0.1/src/teleutils/preprocessing/_number_format.py +299 -0
- teleutils-0.0.1/src/teleutils/robocalls/__init__.py +8 -0
- teleutils-0.0.1/src/teleutils/robocalls/_analyzers.py +154 -0
- teleutils-0.0.1/src/teleutils/robocalls/_extractors.py +332 -0
- teleutils-0.0.1/src/teleutils/robocalls/_transformers.py +600 -0
teleutils-0.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: teleutils
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Python Utility Functions Library for Call Details Records ( CDRs ) from Brazilian Telecommunications Carriers
|
|
5
|
+
Author: Maxwel de Souza Freitas
|
|
6
|
+
Author-email: Maxwel de Souza Freitas <maxwel@maxwelfreitas.com.br>
|
|
7
|
+
Requires-Dist: pandas==2.3.3
|
|
8
|
+
Requires-Dist: pyarrow==21.0.0
|
|
9
|
+
Requires-Dist: pyspark==3.5.5
|
|
10
|
+
Requires-Python: >=3.9
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
|
|
13
|
+
<details>
|
|
14
|
+
<summary>Table of Contents</summary>
|
|
15
|
+
<ol>
|
|
16
|
+
<li><a href="#About">About</a></li>
|
|
17
|
+
<li><a href="#Scripts_and_Files">Scripts and Files</a></li>
|
|
18
|
+
<li><a href="#setup">Setup</a></li>
|
|
19
|
+
<li><a href="#roadmap">Roadmap</a></li>
|
|
20
|
+
<li><a href="#contributing">Contributing</a></li>
|
|
21
|
+
<li><a href="#license">License</a></li>
|
|
22
|
+
</ol>
|
|
23
|
+
</details>
|
|
24
|
+
|
|
25
|
+
# About
|
|
26
|
+
|
|
27
|
+
<img align="left" width="100" height="100" src="" alt="Imagem ou logo do Projeto">
|
|
28
|
+
|
|
29
|
+
<br>
|
|
30
|
+
|
|
31
|
+
Project Description
|
|
32
|
+
|
|
33
|
+
<p align="right">(<a href="#indexerd-md-top">back to top</a>)</p>
|
|
34
|
+
|
|
35
|
+
## Scripts and Files
|
|
36
|
+
|
|
37
|
+
| Script module | Description |
|
|
38
|
+
| --- | --- |
|
|
39
|
+
| <link_to_file> | description |
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
Description of execution/workflow of files and scripts.
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
<p align="right">(<a href="#indexerd-md-top">back to top</a>)</p>
|
|
46
|
+
|
|
47
|
+
# Setup
|
|
48
|
+
|
|
49
|
+
Scripts were intended to be used in a Windows machine with UV package and environment management.
|
|
50
|
+
|
|
51
|
+
For more details, see the [UV documentation](https://docs.astral.sh/uv/)
|
|
52
|
+
|
|
53
|
+
Tests are described in the [tests folder](./tests/README.md)
|
|
54
|
+
|
|
55
|
+
# Roadmap
|
|
56
|
+
|
|
57
|
+
This section presents a simplified view of the roadmap and knwon issues.
|
|
58
|
+
|
|
59
|
+
For more details, see the [open issues](<put_link_here_to_issues_page>)
|
|
60
|
+
|
|
61
|
+
* [ ] Initial deployment
|
|
62
|
+
* [x] Create the project structure
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
<p align="right">(<a href="#indexerd-md-top">back to top</a>)</p>
|
|
66
|
+
|
|
67
|
+
<!-- CONTRIBUTING -->
|
|
68
|
+
# Contributing
|
|
69
|
+
|
|
70
|
+
Contributions are what make the open source community such an amazing place to learn, inspire, and create. Any contributions you make are **greatly appreciated**.
|
|
71
|
+
|
|
72
|
+
If you have a suggestion that would make this better, please fork the repo and create a pull request. You can also simply open an issue with the tag "enhancement".
|
|
73
|
+
|
|
74
|
+
<p align="right">(<a href="#indexerd-md-top">back to top</a>)</p>
|
|
75
|
+
|
|
76
|
+
<!-- LICENSE -->
|
|
77
|
+
# License
|
|
78
|
+
|
|
79
|
+
Distributed under the GNU General Public License (GPL), version 3. See [`LICENSE.txt`](../../LICENSE).
|
|
80
|
+
|
|
81
|
+
For additional information, please check <https://www.gnu.org/licenses/quick-guide-gplv3.html>
|
|
82
|
+
|
|
83
|
+
This license model was selected with the idea of enabling collaboration of anyone interested in projects listed within this group.
|
|
84
|
+
|
|
85
|
+
It is in line with the Brazilian Public Software directives, as published at: <https://softwarepublico.gov.br/social/articles/0004/5936/Manual_do_Ofertante_Temporario_04.10.2016.pdf>
|
|
86
|
+
|
|
87
|
+
Further reading material can be found at:
|
|
88
|
+
|
|
89
|
+
* <http://copyfree.org/policy/copyleft>
|
|
90
|
+
* <https://opensource.stackexchange.com/questions/9805/can-i-license-my-project-with-an-open-source-license-but-disallow-commercial-use>
|
|
91
|
+
* <https://opensource.stackexchange.com/questions/21/whats-the-difference-between-permissive-and-copyleft-licenses/42#42>
|
|
92
|
+
|
|
93
|
+
<p align="right">(<a href="#indexerd-md-top">back to top</a>)</p>
|
|
94
|
+
|
|
95
|
+
<!-- REFERENCES -->
|
|
96
|
+
## References
|
|
97
|
+
|
|
98
|
+
* [UV Short Guide](https://www.saaspegasus.com/guides/uv-deep-dive/)
|
|
99
|
+
* [Readme Template](https://github.com/othneildrew/Best-README-Template)
|
|
100
|
+
|
|
101
|
+
<p align="right">(<a href="#indexerd-md-top">back to top</a>)</p>
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
<details>
|
|
2
|
+
<summary>Table of Contents</summary>
|
|
3
|
+
<ol>
|
|
4
|
+
<li><a href="#About">About</a></li>
|
|
5
|
+
<li><a href="#Scripts_and_Files">Scripts and Files</a></li>
|
|
6
|
+
<li><a href="#setup">Setup</a></li>
|
|
7
|
+
<li><a href="#roadmap">Roadmap</a></li>
|
|
8
|
+
<li><a href="#contributing">Contributing</a></li>
|
|
9
|
+
<li><a href="#license">License</a></li>
|
|
10
|
+
</ol>
|
|
11
|
+
</details>
|
|
12
|
+
|
|
13
|
+
# About
|
|
14
|
+
|
|
15
|
+
<img align="left" width="100" height="100" src="" alt="Imagem ou logo do Projeto">
|
|
16
|
+
|
|
17
|
+
<br>
|
|
18
|
+
|
|
19
|
+
Project Description
|
|
20
|
+
|
|
21
|
+
<p align="right">(<a href="#indexerd-md-top">back to top</a>)</p>
|
|
22
|
+
|
|
23
|
+
## Scripts and Files
|
|
24
|
+
|
|
25
|
+
| Script module | Description |
|
|
26
|
+
| --- | --- |
|
|
27
|
+
| <link_to_file> | description |
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
Description of execution/workflow of files and scripts.
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
<p align="right">(<a href="#indexerd-md-top">back to top</a>)</p>
|
|
34
|
+
|
|
35
|
+
# Setup
|
|
36
|
+
|
|
37
|
+
Scripts were intended to be used in a Windows machine with UV package and environment management.
|
|
38
|
+
|
|
39
|
+
For more details, see the [UV documentation](https://docs.astral.sh/uv/)
|
|
40
|
+
|
|
41
|
+
Tests are described in the [tests folder](./tests/README.md)
|
|
42
|
+
|
|
43
|
+
# Roadmap
|
|
44
|
+
|
|
45
|
+
This section presents a simplified view of the roadmap and knwon issues.
|
|
46
|
+
|
|
47
|
+
For more details, see the [open issues](<put_link_here_to_issues_page>)
|
|
48
|
+
|
|
49
|
+
* [ ] Initial deployment
|
|
50
|
+
* [x] Create the project structure
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
<p align="right">(<a href="#indexerd-md-top">back to top</a>)</p>
|
|
54
|
+
|
|
55
|
+
<!-- CONTRIBUTING -->
|
|
56
|
+
# Contributing
|
|
57
|
+
|
|
58
|
+
Contributions are what make the open source community such an amazing place to learn, inspire, and create. Any contributions you make are **greatly appreciated**.
|
|
59
|
+
|
|
60
|
+
If you have a suggestion that would make this better, please fork the repo and create a pull request. You can also simply open an issue with the tag "enhancement".
|
|
61
|
+
|
|
62
|
+
<p align="right">(<a href="#indexerd-md-top">back to top</a>)</p>
|
|
63
|
+
|
|
64
|
+
<!-- LICENSE -->
|
|
65
|
+
# License
|
|
66
|
+
|
|
67
|
+
Distributed under the GNU General Public License (GPL), version 3. See [`LICENSE.txt`](../../LICENSE).
|
|
68
|
+
|
|
69
|
+
For additional information, please check <https://www.gnu.org/licenses/quick-guide-gplv3.html>
|
|
70
|
+
|
|
71
|
+
This license model was selected with the idea of enabling collaboration of anyone interested in projects listed within this group.
|
|
72
|
+
|
|
73
|
+
It is in line with the Brazilian Public Software directives, as published at: <https://softwarepublico.gov.br/social/articles/0004/5936/Manual_do_Ofertante_Temporario_04.10.2016.pdf>
|
|
74
|
+
|
|
75
|
+
Further reading material can be found at:
|
|
76
|
+
|
|
77
|
+
* <http://copyfree.org/policy/copyleft>
|
|
78
|
+
* <https://opensource.stackexchange.com/questions/9805/can-i-license-my-project-with-an-open-source-license-but-disallow-commercial-use>
|
|
79
|
+
* <https://opensource.stackexchange.com/questions/21/whats-the-difference-between-permissive-and-copyleft-licenses/42#42>
|
|
80
|
+
|
|
81
|
+
<p align="right">(<a href="#indexerd-md-top">back to top</a>)</p>
|
|
82
|
+
|
|
83
|
+
<!-- REFERENCES -->
|
|
84
|
+
## References
|
|
85
|
+
|
|
86
|
+
* [UV Short Guide](https://www.saaspegasus.com/guides/uv-deep-dive/)
|
|
87
|
+
* [Readme Template](https://github.com/othneildrew/Best-README-Template)
|
|
88
|
+
|
|
89
|
+
<p align="right">(<a href="#indexerd-md-top">back to top</a>)</p>
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "teleutils"
|
|
3
|
+
version = "0.0.1"
|
|
4
|
+
description = "Python Utility Functions Library for Call Details Records ( CDRs ) from Brazilian Telecommunications Carriers"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = [
|
|
7
|
+
{ name = "Maxwel de Souza Freitas", email = "maxwel@maxwelfreitas.com.br" }
|
|
8
|
+
]
|
|
9
|
+
requires-python = ">=3.9"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"pandas==2.3.3",
|
|
12
|
+
"pyarrow==21.0.0",
|
|
13
|
+
"pyspark==3.5.5",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[project.scripts]
|
|
17
|
+
teleutils = "teleutils:main"
|
|
18
|
+
|
|
19
|
+
[build-system]
|
|
20
|
+
requires = ["uv_build>=0.9.2,<0.10.0"]
|
|
21
|
+
build-backend = "uv_build"
|
|
22
|
+
|
|
23
|
+
[dependency-groups]
|
|
24
|
+
dev = [
|
|
25
|
+
"jupyter>=1.1.1",
|
|
26
|
+
"matplotlib>=3.9.4",
|
|
27
|
+
"pre-commit>=4.3.0",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[[tool.uv.index]]
|
|
31
|
+
name = "pypi"
|
|
32
|
+
url = "https://pypi.org/simple/"
|
|
33
|
+
publish-url = "https://upload.pypi.org/legacy/"
|
|
34
|
+
default = true
|
|
35
|
+
|
|
36
|
+
[[tool.uv.index]]
|
|
37
|
+
name = "testpypi"
|
|
38
|
+
url = "https://test.pypi.org/simple/"
|
|
39
|
+
publish-url = "https://test.pypi.org/legacy/"
|
|
40
|
+
explicit = true
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from functools import wraps
|
|
5
|
+
from typing import Callable
|
|
6
|
+
|
|
7
|
+
# Convenção para bibliotecas: NullHandler no pacote raiz.
|
|
8
|
+
# Evita o aviso "No handlers could be found" quando o consumidor
|
|
9
|
+
# não configura logging. A configuração real (handlers, formatters,
|
|
10
|
+
# níveis) é responsabilidade da aplicação, não da biblioteca.
|
|
11
|
+
logging.getLogger("teleutils").addHandler(logging.NullHandler())
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def log_operation(method: Callable) -> Callable:
|
|
15
|
+
"""
|
|
16
|
+
Decorador para registrar início, fim e falhas de métodos de transformação.
|
|
17
|
+
|
|
18
|
+
Usa o logger do módulo da classe decorada (via self.__class__.__module__)
|
|
19
|
+
para manter a hierarquia de loggers correta na biblioteca.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
@wraps(method)
|
|
23
|
+
def wrapper(self, source_file: str, *args, **kwargs):
|
|
24
|
+
logger = logging.getLogger(self.__class__.__module__)
|
|
25
|
+
logger.info("Iniciando operação [%s]: %s", method.__name__, source_file)
|
|
26
|
+
try:
|
|
27
|
+
result = method(self, source_file, *args, **kwargs)
|
|
28
|
+
logger.info(
|
|
29
|
+
"Operação [%s] concluída com sucesso.",
|
|
30
|
+
method.__name__,
|
|
31
|
+
)
|
|
32
|
+
return result
|
|
33
|
+
except Exception as e:
|
|
34
|
+
logger.exception(
|
|
35
|
+
"Falha na operação [%s]: %s %s",
|
|
36
|
+
method.__name__,
|
|
37
|
+
source_file,
|
|
38
|
+
e,
|
|
39
|
+
)
|
|
40
|
+
raise
|
|
41
|
+
|
|
42
|
+
return wrapper
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Brazilian Phone Number Normalization Module.
|
|
3
|
+
|
|
4
|
+
This module provides functionality for normalizing and validating Brazilian phone numbers
|
|
5
|
+
according to ANATEL (Brazilian National Telecommunications Agency) numbering plans and
|
|
6
|
+
ITU-T E.164 international standard.
|
|
7
|
+
|
|
8
|
+
The module handles various Brazilian number formats including:
|
|
9
|
+
- SMP (Serviço Móvel Pessoal) - Mobile services
|
|
10
|
+
- STFC (Serviço Telefônico Fixo Comutado) - Fixed-line services
|
|
11
|
+
- SME (Serviço Móvel Especializado) - Specialized mobile services
|
|
12
|
+
- SUP (Serviço de Utilidade Pública) - Public utility services
|
|
13
|
+
- CNG (Código Nacional de Gratuidade) - National free-call codes
|
|
14
|
+
|
|
15
|
+
Functions:
|
|
16
|
+
normalize_number: Normalizes a single Brazilian phone number.
|
|
17
|
+
normalize_number_pair: Normalizes a pair of related phone numbers with context.
|
|
18
|
+
|
|
19
|
+
Private Functions:
|
|
20
|
+
_clean_numbers: Removes letters and punctuation from number strings.
|
|
21
|
+
|
|
22
|
+
Constants:
|
|
23
|
+
E164_FULL_NUMBERS: Regex pattern for numbers with length >= 10 digits.
|
|
24
|
+
SMALL_NUMBERS: Regex pattern for numbers with length <= 9 digits.
|
|
25
|
+
PREFFIX: Regex pattern for removing call prefixes.
|
|
26
|
+
|
|
27
|
+
Example:
|
|
28
|
+
>>> normalize_number("(11) 99999-9999")
|
|
29
|
+
['11999999999', True]
|
|
30
|
+
>>> normalize_number("0800-123-4567")
|
|
31
|
+
['08001234567', True]
|
|
32
|
+
|
|
33
|
+
References:
|
|
34
|
+
- ANATEL Numbering Plan: https://www.anatel.gov.br/
|
|
35
|
+
- ITU-T E.164 Standard: https://handle.itu.int/11.1002/1000/10688
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
import re
|
|
39
|
+
import string
|
|
40
|
+
|
|
41
|
+
#: Regex pattern for matching Brazilian phone numbers with length >= 10 digits.
|
|
42
|
+
#: Covers full E.164 format numbers including country code (55), area codes,
|
|
43
|
+
#: and various service types (SMP, STFC, CNG, SME) with their specific patterns.
|
|
44
|
+
E164_FULL_NUMBERS = re.compile(
|
|
45
|
+
r"""# (BRAZIL COUNTRY CODE) (CSP) (optional)
|
|
46
|
+
(?:55)?(?:1[2-8]|2[12469]|3[16789]|4[1235679]|5[3568]|6[1235]|7[12456]|8[157]|9[18])?(
|
|
47
|
+
# CN+PREFIXO+MCDU
|
|
48
|
+
# SMP
|
|
49
|
+
(?:1[1-9]9[0-9]{8})$|
|
|
50
|
+
(?:2[12478]9[0-9]{8})$|
|
|
51
|
+
(?:3[1-578]9[0-9]{8})$|
|
|
52
|
+
(?:4[1-9]9[0-9]{8})$|
|
|
53
|
+
(?:5[1345]9[0-9]{8})$|
|
|
54
|
+
(?:6[1-9]9[0-9]{8})$|
|
|
55
|
+
(?:7[134579]9[0-9]{8})$|
|
|
56
|
+
(?:8[1-9]9[0-9]{8})$|
|
|
57
|
+
(?:9[1-9]9[0-9]{8})$|
|
|
58
|
+
# STFC
|
|
59
|
+
(?:1[1-9][2345][0-9]{7})$|
|
|
60
|
+
(?:2[12478][2345][0-9]{7})$|
|
|
61
|
+
(?:3[1-578][2345][0-9]{7})$|
|
|
62
|
+
(?:4[1-9][2345][0-9]{7})$|
|
|
63
|
+
(?:5[1345][2345][0-9]{7})$|
|
|
64
|
+
(?:6[1-9][2345][0-9]{7})$|
|
|
65
|
+
(?:7[134579][2345][0-9]{7})$|
|
|
66
|
+
(?:8[1-9][2345][0-9]{7})$|
|
|
67
|
+
(?:9[1-9][2345][0-9]{7})$|
|
|
68
|
+
# CNG
|
|
69
|
+
(?:[589]00[0-9]{7})$|
|
|
70
|
+
(?:30[03][0-9]{7})$|
|
|
71
|
+
# SME
|
|
72
|
+
(?:1[1-9]7[0789][0-9]{6})$|
|
|
73
|
+
(?:2[124]7[078][0-9]{6})$|
|
|
74
|
+
(?:2778[0-9]{6})$|
|
|
75
|
+
(?:3[147]7[78][0-9]{6})$|
|
|
76
|
+
(?:4[1-478]78[0-9]{6})$|
|
|
77
|
+
(?:5[14]78[0-9]{6})$|
|
|
78
|
+
(?:6[125]78[0-9]{6})$|
|
|
79
|
+
(?:7[135]78[0-9]{6})$|
|
|
80
|
+
(?:8[15]78[0-9]{6})$
|
|
81
|
+
)""",
|
|
82
|
+
re.VERBOSE,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
#: Regex pattern for matching Brazilian phone numbers with length <= 9 digits.
|
|
86
|
+
#: Covers local numbers without area codes including mobile (SMP), fixed-line (STFC),
|
|
87
|
+
#: specialized mobile (SME), and public utility services (SUP) patterns.
|
|
88
|
+
SMALL_NUMBERS = re.compile(
|
|
89
|
+
r"""# (BRAZIL COUNTRY CODE) (CN) (optional)
|
|
90
|
+
(?:55)?(?:1[1-9]|2[12478]|3[1-578]|4[1-9]|5[1345]|6[1-9]|7[134579]|8[1-9]|9[1-9])?(
|
|
91
|
+
# PREFIXO+MCDU
|
|
92
|
+
# SMP
|
|
93
|
+
(?:9[0-9]{8})$|
|
|
94
|
+
# STFC
|
|
95
|
+
(?:[2345][0-9]{7})$|
|
|
96
|
+
# SME
|
|
97
|
+
(?:7[0789][0-9]{6})$|
|
|
98
|
+
# SUP
|
|
99
|
+
(?:10[024])$|
|
|
100
|
+
(?:1031[234579])$|
|
|
101
|
+
(?:1032[13-9])$|
|
|
102
|
+
(?:1033[124-9])$|
|
|
103
|
+
(?:1034[123578])$|
|
|
104
|
+
(?:1035[1-468])$|
|
|
105
|
+
(?:1036[139])$|
|
|
106
|
+
(?:1038[149])$|
|
|
107
|
+
(?:1039[168])$|
|
|
108
|
+
(?:105[012356789])$|
|
|
109
|
+
(?:106[012467])$|
|
|
110
|
+
(?:1061[0-35-8])$|
|
|
111
|
+
(?:1062[0145])$|
|
|
112
|
+
(?:1063[0137])$|
|
|
113
|
+
(?:1064[4789])$|
|
|
114
|
+
(?:1065[01235])$|
|
|
115
|
+
(?:1066[016])$|
|
|
116
|
+
(?:1067[137])$|
|
|
117
|
+
(?:1068[5-8])$|
|
|
118
|
+
(?:1069[1359])$|
|
|
119
|
+
(?:11[125-8])$|
|
|
120
|
+
(?:12[135789])$|
|
|
121
|
+
(?:13[024568])$|
|
|
122
|
+
(?:133[12])$|
|
|
123
|
+
(?:1358)$|
|
|
124
|
+
(?:14[25678])$|
|
|
125
|
+
(?:15[0-9])$|
|
|
126
|
+
(?:16[0-8])$|
|
|
127
|
+
(?:18[0158])$|
|
|
128
|
+
(?:1746)$|
|
|
129
|
+
(?:19[0-9])$|
|
|
130
|
+
(?:911)$
|
|
131
|
+
)""",
|
|
132
|
+
re.VERBOSE,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
#: Regex pattern for removing call prefixes from Brazilian phone numbers.
|
|
136
|
+
#: Removes collect call prefixes (90, 9090), international prefix (00),
|
|
137
|
+
#: and national long-distance prefix (0) to normalize numbers.
|
|
138
|
+
PREFFIX = re.compile(
|
|
139
|
+
r"""(
|
|
140
|
+
^90(?:90)?| # collect call preffix
|
|
141
|
+
^00| # international preffix
|
|
142
|
+
^0 # national preffix
|
|
143
|
+
)""",
|
|
144
|
+
re.VERBOSE,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _clean_numbers(text):
|
|
149
|
+
"""
|
|
150
|
+
Remove letters and punctuation from a text string, keeping only digits.
|
|
151
|
+
|
|
152
|
+
This private function uses string translation to efficiently remove all
|
|
153
|
+
ASCII letters and punctuation characters, leaving only numeric digits.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
text (str): Input string that may contain letters, punctuation, and digits.
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
str: String containing only numeric digits.
|
|
160
|
+
|
|
161
|
+
Example:
|
|
162
|
+
>>> _clean_numbers("(11) 99999-9999")
|
|
163
|
+
'11999999999'
|
|
164
|
+
>>> _clean_numbers("abc123def456")
|
|
165
|
+
'123456'
|
|
166
|
+
"""
|
|
167
|
+
letters = string.ascii_letters
|
|
168
|
+
punctuation = string.punctuation
|
|
169
|
+
remove_table = str.maketrans("", "", letters + punctuation + " ")
|
|
170
|
+
return text.translate(remove_table)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def normalize_number(subscriber_number, national_destination_code=""):
|
|
174
|
+
"""
|
|
175
|
+
Normalize a Brazilian phone number according to ANATEL standards.
|
|
176
|
+
|
|
177
|
+
This function processes various formats of Brazilian phone numbers, removes
|
|
178
|
+
prefixes, validates against official numbering patterns, and returns a
|
|
179
|
+
normalized format suitable for database storage and analysis.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
subscriber_number (str or int): The phone number to normalize. Can contain
|
|
183
|
+
letters, punctuation, and various prefixes.
|
|
184
|
+
national_destination_code (str, optional): Two-digit area code to prepend
|
|
185
|
+
to 8-9 digit local numbers. Defaults to "".
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
tuple: A two-element tuple containing:
|
|
189
|
+
- str: The normalized phone number (or original if invalid)
|
|
190
|
+
- bool: True if number was successfully normalized, False otherwise
|
|
191
|
+
|
|
192
|
+
Processing Steps:
|
|
193
|
+
1. Handles semicolon-separated numbers (takes first part)
|
|
194
|
+
2. Removes filler characters ('f')
|
|
195
|
+
3. Removes letters and punctuation
|
|
196
|
+
4. Strips call prefixes (collect call, international, national)
|
|
197
|
+
5. Validates against Brazilian numbering patterns
|
|
198
|
+
6. Adds area code to local numbers when provided
|
|
199
|
+
|
|
200
|
+
Examples:
|
|
201
|
+
>>> normalize_number("(11) 99999-9999")
|
|
202
|
+
('11999999999', True)
|
|
203
|
+
>>> normalize_number("0800-123-4567")
|
|
204
|
+
('08001234567', True)
|
|
205
|
+
>>> normalize_number("99999999", "11")
|
|
206
|
+
('1199999999', True)
|
|
207
|
+
>>> normalize_number("invalid")
|
|
208
|
+
('invalid', False)
|
|
209
|
+
|
|
210
|
+
"""
|
|
211
|
+
subscriber_number = str(subscriber_number)
|
|
212
|
+
if ";" in subscriber_number:
|
|
213
|
+
subscriber_number = subscriber_number.split(";")[0]
|
|
214
|
+
# remove filler
|
|
215
|
+
subscriber_number = subscriber_number.replace("f", "")
|
|
216
|
+
|
|
217
|
+
clean_subscriber_number = _clean_numbers(subscriber_number)
|
|
218
|
+
# remove collect call indicator or the international/national prefix
|
|
219
|
+
clean_subscriber_number = PREFFIX.sub("", clean_subscriber_number)
|
|
220
|
+
|
|
221
|
+
if len(clean_subscriber_number) > 13:
|
|
222
|
+
return (subscriber_number, False)
|
|
223
|
+
|
|
224
|
+
if len(clean_subscriber_number) >= 10:
|
|
225
|
+
normalized_subscriber_number = E164_FULL_NUMBERS.findall(
|
|
226
|
+
clean_subscriber_number
|
|
227
|
+
)
|
|
228
|
+
else:
|
|
229
|
+
normalized_subscriber_number = SMALL_NUMBERS.findall(clean_subscriber_number)
|
|
230
|
+
|
|
231
|
+
# if not exactly one match
|
|
232
|
+
if len(normalized_subscriber_number) == 1:
|
|
233
|
+
normalized_subscriber_number = normalized_subscriber_number[0]
|
|
234
|
+
if len(normalized_subscriber_number) in (8, 9) and national_destination_code:
|
|
235
|
+
normalized_subscriber_number = (
|
|
236
|
+
f"{national_destination_code}{normalized_subscriber_number}"
|
|
237
|
+
)
|
|
238
|
+
return (normalized_subscriber_number, True)
|
|
239
|
+
|
|
240
|
+
return (subscriber_number, False)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def normalize_number_pair(number_a, number_b, national_destination_code=""):
|
|
244
|
+
"""
|
|
245
|
+
Normalize a pair of related Brazilian phone numbers with contextual area code inference.
|
|
246
|
+
|
|
247
|
+
This function normalizes two phone numbers where the first number (typically
|
|
248
|
+
the calling number) can provide area code context for the second number
|
|
249
|
+
(typically the called number) if it lacks an area code.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
number_a (str or int): First phone number, often the calling/originating number.
|
|
253
|
+
number_b (str or int): Second phone number, often the called/destination number.
|
|
254
|
+
national_destination_code (str, optional): Two-digit area code to prepend
|
|
255
|
+
to 8-9 digit local numbers. Defaults to "".
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
tuple: A four-element tuple containing:
|
|
259
|
+
- str: Normalized number_a (or original if invalid)
|
|
260
|
+
- bool: True if number_a was successfully normalized
|
|
261
|
+
- str: Normalized number_b (or original if invalid)
|
|
262
|
+
- bool: True if number_b was successfully normalized
|
|
263
|
+
|
|
264
|
+
Logic:
|
|
265
|
+
1. Normalizes number_a first
|
|
266
|
+
2. If number_a is valid and 10-11 digits, extracts area code (first 2 digits)
|
|
267
|
+
3. Uses extracted area code as context for normalizing number_b
|
|
268
|
+
4. Returns normalization results for both numbers
|
|
269
|
+
|
|
270
|
+
Examples:
|
|
271
|
+
>>> normalize_number_pair("11999999999", "88888888")
|
|
272
|
+
('11999999999', True, '1188888888', True)
|
|
273
|
+
>>> normalize_number_pair("invalid", "11999999999")
|
|
274
|
+
('invalid', False, '11999999999', True)
|
|
275
|
+
>>> normalize_number_pair("1133334444", "22225555")
|
|
276
|
+
('1133334444', True, '1122225555', True)
|
|
277
|
+
|
|
278
|
+
Use Case:
|
|
279
|
+
Particularly useful for Call Detail Records (CDRs) where the originating
|
|
280
|
+
number can provide geographic context for local destination numbers.
|
|
281
|
+
"""
|
|
282
|
+
normalized_number_a, is_number_a_valid = normalize_number(number_a)
|
|
283
|
+
|
|
284
|
+
if is_number_a_valid and len(normalized_number_a) in (10, 11):
|
|
285
|
+
if not national_destination_code:
|
|
286
|
+
national_destination_code = normalized_number_a[:2]
|
|
287
|
+
else:
|
|
288
|
+
national_destination_code = ""
|
|
289
|
+
|
|
290
|
+
normalized_number_b, is_number_b_valid = normalize_number(
|
|
291
|
+
number_b, national_destination_code
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
return (
|
|
295
|
+
normalized_number_a,
|
|
296
|
+
is_number_a_valid,
|
|
297
|
+
normalized_number_b,
|
|
298
|
+
is_number_b_valid,
|
|
299
|
+
)
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# O NullHandler já é registrado ao importar _logging.
|
|
2
|
+
# Aqui apenas garantimos que isso acontece ao carregar o pacote.
|
|
3
|
+
import teleutils._logging # noqa: F401
|
|
4
|
+
from teleutils.robocalls._analyzers import RoboCallsAnalyzer
|
|
5
|
+
from teleutils.robocalls._extractors import RoboCallsExtractor
|
|
6
|
+
from teleutils.robocalls._transformers import RoboCallsTransformer
|
|
7
|
+
|
|
8
|
+
__all__ = ["RoboCallsExtractor", "RoboCallsTransformer", "RoboCallsAnalyzer"]
|