table-stream 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ from .base import HashMap, hashMapName, HashMapDataFrame, HashMapDict, ArrayList, K, T
2
+ from .types import (
3
+ WorkbookData, MessageNotification, Provider, Listener, EVENT_TYPE, VALUE,
4
+ Mediator, ComponentMediator
5
+ )
6
+ from .thread import ControlledThread, ThreadConsumer
7
+ from .sheet import SheetLoader, InterfaceSheetLoad, ParserData
8
+
@@ -0,0 +1,2 @@
1
+ from .mapping import HashMap, K, T, hashMapName, ArrayList
2
+ from .hash_map import HashMapDict, HashMapDataFrame
@@ -0,0 +1,160 @@
1
+ from typing import Any, Literal
2
+ from pandas.core.api import DataFrame
3
+ from table_stream.base.mapping import ArrayList, HashMap, T
4
+ import json
5
+
6
+
7
+ class HashMapDict[K, T](HashMap):
8
+
9
+ def __init__(self, _mapping: dict[K, T] | None = None) -> None:
10
+ super().__init__()
11
+ if _mapping is None:
12
+ _mapping = dict()
13
+ self._mapping: dict[K, T] = _mapping
14
+
15
+ def size_values(self) -> int:
16
+ return len(self._mapping.values())
17
+
18
+ def size_header(self) -> int:
19
+ return self.header().size()
20
+
21
+ def __getitem__(self, key: K) -> T:
22
+ return self._mapping[key]
23
+
24
+ def __setitem__(self, key: K, value: T) -> None:
25
+ self._mapping[key] = value
26
+
27
+ def get_hash_map_name(self) -> Literal['DICT']:
28
+ return "DICT"
29
+
30
+ def get_real_hash_map(self) -> dict[K, T]:
31
+ return self._mapping
32
+
33
+ def set_real_hash_map(self, hash_map: dict[K, T]) -> None:
34
+ self._mapping = hash_map
35
+
36
+ def clear(self) -> None:
37
+ self._mapping.clear()
38
+
39
+ def get_first(self) -> T:
40
+ return self._mapping[self.header().get_first()]
41
+
42
+ def set_first(self, value: T) -> None:
43
+ self._mapping[self.header().get_first()] = value
44
+
45
+ def get_last(self) -> T:
46
+ return self._mapping[self.header().get_last()]
47
+
48
+ def set_last(self, value: T) -> None:
49
+ self._mapping[self.header().get_last()] = value
50
+
51
+ def set_value(self, key: K, value: T) -> None:
52
+ self._mapping[key] = value
53
+
54
+ def get_value(self, key: K) -> T:
55
+ return self._mapping[key]
56
+
57
+ def header(self) -> ArrayList[K]:
58
+ return ArrayList(list(self._mapping.keys()))
59
+
60
+ def values(self) -> ArrayList[T]:
61
+ return ArrayList(list(self._mapping.values()))
62
+
63
+ def delete_items(self, keys: list[K]) -> None:
64
+ for k in keys:
65
+ self._mapping.pop(k)
66
+
67
+ def to_json(self) -> str:
68
+ """
69
+ Converte o mapa real para uma string JSON.
70
+ Nota: As chaves e valores devem ser serializáveis pelo módulo json.
71
+ """
72
+ return json.dumps(self.to_map_str(), ensure_ascii=False, indent=4)
73
+
74
+ def to_map_str(self) -> dict[str, Any]:
75
+ final = dict()
76
+ for i in self.header():
77
+ final[str(i)] = str(self.get_value(i))
78
+ return final
79
+
80
+
81
+ class HashMapDataFrame[K, Series](HashMap):
82
+
83
+ def __init__(self, data: DataFrame | None) -> None:
84
+ super().__init__()
85
+ if data is None:
86
+ data = DataFrame()
87
+ self._data: DataFrame = data
88
+
89
+ def size_values(self) -> int:
90
+ return len(self.values())
91
+
92
+ def size_header(self) -> int:
93
+ return len(self.header())
94
+
95
+ def __getitem__(self, key: K) -> Series:
96
+ return self._data[key]
97
+
98
+ def __setitem__(self, key: K, value: Series) -> None:
99
+ self._data[key] = value
100
+
101
+ def get_hash_map_name(self) -> Literal['DATAFRAME']:
102
+ return "DATAFRAME"
103
+
104
+ def get_real_hash_map(self) -> DataFrame:
105
+ return self._data
106
+
107
+ def set_real_hash_map(self, hash_map: DataFrame) -> None:
108
+ self._data = hash_map
109
+
110
+ def clear(self) -> None:
111
+ self._data = DataFrame()
112
+
113
+ def get_first(self) -> Series:
114
+ return self._data[self.header().get_first()]
115
+
116
+ def set_first(self, value: Series) -> None:
117
+ self._data[self.header().get_first()] = value
118
+
119
+ def get_last(self) -> Series:
120
+ return self._data[self.header().get_last()]
121
+
122
+ def set_last(self, value: T) -> None:
123
+ raise NotImplementedError
124
+
125
+ def set_value(self, key: K, value: Series) -> None:
126
+ self._data[key] = value
127
+
128
+ def get_value(self, key: K) -> Series:
129
+ return self._data[key]
130
+
131
+ def header(self) -> ArrayList[K]:
132
+ return ArrayList(self._data.columns.tolist())
133
+
134
+ def values(self) -> ArrayList[Any]:
135
+ return ArrayList(self._data.values.tolist())
136
+
137
+ def delete_items(self, keys: list[K]) -> None:
138
+ """
139
+ Remove as colunas informadas.
140
+ """
141
+ # axis=1 indica que queremos remover colunas (as chaves do seu mapa)
142
+ # errors='ignore' evita que o código quebre caso uma chave não exista
143
+ #self._data.drop(columns=keys, axis=1, inplace=True, errors='ignore'
144
+ self._data.drop(columns=keys, inplace=True, errors="ignore",)
145
+
146
+ def to_json(self) -> str:
147
+ """
148
+ Serializa o DataFrame para JSON.
149
+ Mantém estrutura por colunas.
150
+ """
151
+ return self._data.astype(str).to_json(orient="columns", force_ascii=False, indent=4)
152
+
153
+ def to_map_str(self) -> dict[str, Any]:
154
+ return self._data.astype('str').to_dict()
155
+
156
+
157
+
158
+
159
+
160
+
@@ -0,0 +1,153 @@
1
+
2
+ from __future__ import annotations
3
+ from abc import abstractmethod, ABC, ABCMeta
4
+ from collections.abc import Iterator
5
+ from typing import Any, Callable, TypeVar, Generic, Union, Literal
6
+ #import pandas as pd
7
+ #from pandas import Series
8
+
9
+
10
+ T = TypeVar('T')
11
+ K = TypeVar('K')
12
+ hashMapName = Literal['DATAFRAME', 'DICT']
13
+
14
+
15
+ class ArrayList[T](list):
16
+
17
+ def __init__(self, items: list[T] | None = None):
18
+ if items is None:
19
+ items = list()
20
+ super().__init__(items)
21
+
22
+ def set_items(self, items: list[T]):
23
+ self.clear()
24
+ super().__init__(items)
25
+
26
+ def for_each(self, func: Callable[[T], Any | None]) -> None:
27
+ for x in self: func(x)
28
+
29
+ def apply_command(self, func: Callable[[T], Any]) -> ArrayList[Any]:
30
+ return ArrayList([func(x) for x in self])
31
+
32
+ def size(self) -> int:
33
+ return len(self)
34
+
35
+ def empty(self) -> bool:
36
+ return self.size() == 0
37
+
38
+ def get_first(self) -> T:
39
+ return self[0]
40
+
41
+ def get_last(self) -> T:
42
+ return self[-1]
43
+
44
+ def hash(self) -> int:
45
+ return hash(tuple(self))
46
+
47
+ def contains(self, _o: T) -> bool:
48
+ for i in self:
49
+ if i == _o:
50
+ return True
51
+ return False
52
+
53
+
54
+ class HashMap[K, T](metaclass=ABCMeta):
55
+
56
+ def __repr__(self) -> str:
57
+ return f"<HashMap()> {self.header()}"
58
+
59
+ @abstractmethod
60
+ def size_values(self) -> int:
61
+ pass
62
+
63
+ @abstractmethod
64
+ def size_header(self) -> int:
65
+ pass
66
+
67
+ @abstractmethod
68
+ def __getitem__(self, key: K) -> T:
69
+ pass
70
+
71
+ @abstractmethod
72
+ def __setitem__(self, key: K, value: T) -> None:
73
+ pass
74
+
75
+ @abstractmethod
76
+ def get_hash_map_name(self) -> hashMapName:
77
+ pass
78
+
79
+ @abstractmethod
80
+ def get_real_hash_map(self) -> Union[dict[K, T], pd.DataFrame]:
81
+ pass
82
+
83
+ @abstractmethod
84
+ def set_real_hash_map(self, hash_map: dict[K, T] | pd.DataFrame) -> None:
85
+ pass
86
+
87
+ @abstractmethod
88
+ def clear(self) -> None:
89
+ pass
90
+
91
+ @abstractmethod
92
+ def get_first(self) -> T:
93
+ pass
94
+
95
+ @abstractmethod
96
+ def set_first(self, value: T) -> None:
97
+ pass
98
+
99
+ @abstractmethod
100
+ def get_last(self) -> T:
101
+ pass
102
+
103
+ @abstractmethod
104
+ def set_last(self, value: T) -> None:
105
+ pass
106
+
107
+ @abstractmethod
108
+ def set_value(self, key: K, value: T) -> None:
109
+ pass
110
+
111
+ @abstractmethod
112
+ def get_value(self, key: K) -> T:
113
+ pass
114
+
115
+ @abstractmethod
116
+ def header(self) -> ArrayList[K]:
117
+ """
118
+ Retornar as chaves de um dicionário ou columns de um DataFrame()
119
+ """
120
+ pass
121
+
122
+ @abstractmethod
123
+ def values(self) -> ArrayList[T]:
124
+ pass
125
+
126
+ @abstractmethod
127
+ def delete_items(self, keys: list[K]) -> None:
128
+ """
129
+ Apaga chaves e valores, semelhante ao método pop().
130
+ """
131
+ pass
132
+
133
+ @abstractmethod
134
+ def to_json(self) -> str:
135
+ """
136
+ Converte o HashMap() para uma representação no formato json.
137
+ """
138
+ pass
139
+
140
+ @abstractmethod
141
+ def to_map_str(self) -> dict[str, Any]:
142
+ """
143
+ Converte o HashMap() para uma representação no formato dict() python.
144
+ """
145
+ pass
146
+
147
+ @classmethod
148
+ def from_json(cls, data: str) -> HashMap:
149
+ pass
150
+
151
+ @classmethod
152
+ def from_map(cls, data: Any) -> HashMap:
153
+ pass
@@ -0,0 +1,90 @@
1
+ class CoreException(Exception):
2
+
3
+ def __init__(self, message: str = f'Erro') -> None:
4
+ super().__init__(message)
5
+ self._message = message
6
+
7
+ def get_message(self) -> str:
8
+ return self._message
9
+
10
+ def set_message(self, message: str) -> None:
11
+ self._message = message
12
+ super().__init__(f"{__class__.__name__}: {message}")
13
+
14
+ def launch_exception_handler(self) -> None:
15
+ raise self
16
+
17
+
18
+ #============================================================#
19
+ # Erros relacionados a HashMap
20
+ #============================================================#
21
+ class InvalidHashMapTableError(CoreException):
22
+
23
+ def __init__(self, message: str = 'HashMap inválido') -> None:
24
+ super().__init__(message)
25
+
26
+
27
+ class InvalidBodyTableError(InvalidHashMapTableError):
28
+
29
+ def __init__(self, message: str = 'Erro corpo de tabela inválido') -> None:
30
+ super().__init__(message)
31
+
32
+
33
+ class SizeTableError(InvalidHashMapTableError):
34
+
35
+ def __init__(self, message: str = 'Tamanho de tabela inválido') -> None:
36
+ super().__init__(message)
37
+
38
+
39
+ #============================================================#
40
+ # Erros relacionados a planilhas
41
+ #============================================================#
42
+ class UndefinedSheetIndex(CoreException):
43
+
44
+ def __init__(self, message: str = 'SheetIndexNames não foi definido') -> None:
45
+ super().__init__(message)
46
+
47
+
48
+ class LoadWorkbookError(CoreException):
49
+
50
+ def __init__(self, message: str = 'Erro ao tentar ler Workbook') -> None:
51
+ super().__init__(message)
52
+
53
+ #============================================================#
54
+ # Erros relacionados a arquivos/bytes de imagem (.png, .jpg, ...)
55
+ #============================================================#
56
+ class InvalidSourceImageError(CoreException):
57
+
58
+ def __init__(self, message: str = 'Erro, use bytes de imagem') -> None:
59
+ super().__init__(message)
60
+
61
+
62
+ #============================================================#
63
+ # Erros relacionados a módulos
64
+ #============================================================#
65
+ class NotImplementedModuleError(CoreException):
66
+
67
+ def __init__(self, message: str = 'Erro, módulo não implementado') -> None:
68
+ super().__init__(message)
69
+
70
+
71
+ class NotImplementedModuleImageError(NotImplementedModuleError):
72
+
73
+ def __init__(self, message: str = 'Erro, módulo IMAGEM não implementado') -> None:
74
+ super().__init__(message)
75
+
76
+
77
+ class NotImplementedModulePdfError(NotImplementedModuleError):
78
+
79
+ def __init__(self, message: str = 'Erro, módulo PDF não implementado') -> None:
80
+ super().__init__(message)
81
+
82
+
83
+ class NotImplementedInvertColor(NotImplementedError):
84
+
85
+ def __init__(self, message: str = 'Adaptador InvertColor não implementado...') -> None:
86
+ super().__init__(message)
87
+
88
+
89
+
90
+
@@ -0,0 +1,10 @@
1
+ from .excel import ExcelLoadPandasInterface
2
+ from .ods import ODSLoadPandasInterface
3
+ from .csv import (
4
+ CsvLoadPandasInterface, csvEncoding, CsvMapping, csvSeparator, create_csv_mapping,
5
+ CsvSeparatorList, CsvEncodingList,
6
+ )
7
+ from .parse import FilterData, ParserData
8
+ from .interface import InterfaceSheetLoad
9
+ from .load_adapter import SheetLoader
10
+
@@ -0,0 +1,97 @@
1
+ from __future__ import annotations
2
+ from io import BytesIO
3
+ import pandas as pd
4
+ from typing import Literal, Union, TypedDict
5
+
6
+ from table_stream import ArrayList
7
+ from table_stream.erros import LoadWorkbookError
8
+ from table_stream.types.workbook import WorkbookData
9
+ from table_stream.sheet.interface import InterfaceSheetLoad
10
+
11
+ csvEncoding = Literal['utf-8', 'iso-8859-1', 'latin1', 'cp1252']
12
+ CsvEncodingList: list[str] = ['utf-8', 'iso-8859-1', 'latin1', 'cp1252'] # ["utf-8", "latin1", "cp1252"]
13
+ csvSeparator = Literal[',', ';', '|', '\t', '_', ' ']
14
+ CsvSeparatorList: list[str] = [';', ',', '|', '\t', '_', ' '] # [";", ",", "\\t", "|", "-"]
15
+
16
+
17
+ class CsvMapping(TypedDict, total=True):
18
+
19
+ encoding: csvEncoding
20
+ separator: csvSeparator
21
+ virgula: csvSeparator
22
+ ponto_virgula: csvSeparator
23
+ pipe: csvSeparator
24
+ tab: csvSeparator
25
+ esp: csvSeparator
26
+ under: csvSeparator
27
+
28
+
29
+ def create_csv_mapping() -> CsvMapping:
30
+
31
+ return {
32
+ 'encoding': 'utf-8',
33
+ 'separator': ';',
34
+ 'virgula': ',',
35
+ 'ponto_virgula': ';',
36
+ 'pipe': '|',
37
+ 'tab': '\t',
38
+ 'esp': ' ',
39
+ 'under': '_',
40
+ }
41
+
42
+
43
+ class CsvLoadPandasInterface(InterfaceSheetLoad):
44
+ """Leitura de CSV usando a biblioteca Pandas."""
45
+
46
+ def __init__(
47
+ self,
48
+ file_csv: Union[str, BytesIO] | None,
49
+ delimiter: csvSeparator = "\t",
50
+ encoding: csvEncoding = 'utf-8'
51
+ ):
52
+ super().__init__()
53
+ self._file_csv: Union[str, BytesIO] | None = file_csv
54
+ self.delimiter: csvSeparator = delimiter
55
+ self.encoding: csvEncoding = encoding
56
+
57
+ def get_sheet_names(self) -> ArrayList[str]:
58
+ return ArrayList(["Sheet1"])
59
+
60
+ def _check_file_csv(self):
61
+ self.check_file()
62
+
63
+ def set_file_sheet(self, f: str | BytesIO) -> None:
64
+ self._file_csv = f
65
+
66
+ def get_file_sheet(self) -> str | BytesIO:
67
+ return self._file_csv
68
+
69
+ def hash(self) -> int:
70
+ self._check_file_csv()
71
+ return hash(self._file_csv)
72
+
73
+ def get_workbook_data(self, sheet_name: str = None) -> WorkbookData:
74
+ self._check_file_csv()
75
+ df: pd.DataFrame
76
+ workbook_data = WorkbookData()
77
+ try:
78
+ # Forçar a leitura como str.
79
+ workbook_data.set_value(
80
+ "Sheet1",
81
+ pd.read_csv(self._file_csv, sep=self.delimiter, encoding=self.encoding, dtype=str).fillna('')
82
+ )
83
+ except Exception as e:
84
+ raise LoadWorkbookError(f"{__class__.__name__} Error: {e}")
85
+ else:
86
+ return workbook_data
87
+
88
+ def get_type_load(self) -> Literal[".csv"]:
89
+ return ".csv"
90
+
91
+
92
+ __all__ = [
93
+ 'csvEncoding', 'CsvMapping', 'csvSeparator', 'CsvLoadPandasInterface',
94
+ 'create_csv_mapping', 'CsvEncodingList', 'CsvSeparatorList',
95
+ ]
96
+
97
+
@@ -0,0 +1,45 @@
1
+ from __future__ import annotations
2
+ from io import BytesIO
3
+ from typing import Union, Literal
4
+ import pandas as pd
5
+ from table_stream.base.hash_map import ArrayList
6
+ from table_stream.types.workbook import WorkbookData
7
+ from table_stream.sheet.interface import InterfaceSheetLoad
8
+
9
+
10
+ class ExcelLoadPandasInterface(InterfaceSheetLoad):
11
+
12
+ def __init__(self, xlsx_file: Union[str, BytesIO] | None):
13
+ self._xlsx_file: Union[str, BytesIO] | None = xlsx_file
14
+
15
+ def get_sheet_names(self) -> ArrayList[str]:
16
+ self._check_file()
17
+ rd: pd.ExcelFile = pd.ExcelFile(self._xlsx_file)
18
+ return ArrayList([str(x) for x in rd.sheet_names])
19
+
20
+ def _check_file(self) -> None:
21
+ self.check_file()
22
+
23
+ def set_file_sheet(self, f: str | BytesIO) -> None:
24
+ self._xlsx_file = f
25
+
26
+ def get_file_sheet(self) -> str | BytesIO:
27
+ return self._xlsx_file
28
+
29
+ def hash(self) -> int:
30
+ self.check_file()
31
+ return hash(self.get_file_sheet())
32
+
33
+ def get_workbook_data(self, sheet_name: str = None) -> WorkbookData:
34
+ if sheet_name is None:
35
+ return WorkbookData(pd.read_excel(self.get_file_sheet(), sheet_name=None))
36
+ return WorkbookData({
37
+ sheet_name: pd.read_excel(self.get_file_sheet(), sheet_name=sheet_name),
38
+ })
39
+
40
+ def get_type_load(self) -> Literal[".xlsx"]:
41
+ return ".xlsx"
42
+
43
+
44
+ __all__ = ['ExcelLoadPandasInterface']
45
+
@@ -0,0 +1,25 @@
1
+ from __future__ import annotations
2
+ import re
3
+
4
+
5
+ #===========================================================#
6
+ # Funções Auxiliares de Leitura XML
7
+ #===========================================================#
8
+
9
+ def column_coord_to_index(coord: str) -> int:
10
+ """
11
+ Converte coordenada de célula (ex: 'AZ25') para o índice da coluna (AZ=52).
12
+ """
13
+ match = re.match(r'([A-Za-z]+)', coord)
14
+ if not match:
15
+ raise ValueError(f"Coordenada inválida: {coord}")
16
+
17
+ coluna_letras: str = match.group(1)
18
+ idx: int = 0
19
+ for char in coluna_letras.upper():
20
+ col_valor = ord(char) - ord('A') + 1
21
+ idx = (idx * 26) + col_valor
22
+ return idx
23
+
24
+
25
+
@@ -0,0 +1,63 @@
1
+ from __future__ import annotations
2
+ import os
3
+ from abc import ABCMeta, abstractmethod
4
+ from typing import Literal
5
+ from io import BytesIO
6
+ from table_stream.types.workbook import WorkbookData
7
+ from table_stream.base.hash_map import ArrayList
8
+ from pandas import DataFrame
9
+
10
+
11
+ sheetExtension = Literal['.csv', '.xlsx', '.ods']
12
+
13
+
14
+ class InterfaceSheetLoad(metaclass=ABCMeta):
15
+
16
+ def check_file(self):
17
+ if self.get_file_sheet() is None:
18
+ raise FileNotFoundError()
19
+ if isinstance(self.get_file_sheet(), str):
20
+ if not os.path.exists(self.get_file_sheet()):
21
+ raise FileNotFoundError()
22
+
23
+ @abstractmethod
24
+ def get_type_load(self) -> sheetExtension:
25
+ pass
26
+
27
+ @abstractmethod
28
+ def set_file_sheet(self, f: str | BytesIO) -> None:
29
+ pass
30
+
31
+ @abstractmethod
32
+ def get_file_sheet(self) -> str | BytesIO:
33
+ pass
34
+
35
+ @abstractmethod
36
+ def hash(self) -> int:
37
+ pass
38
+
39
+ @abstractmethod
40
+ def get_workbook_data(self, sheet_name: str = None) -> WorkbookData:
41
+ """
42
+ Retorna um conjunto de chave:valor com os nomes de cada ABA da planilha
43
+ apontando para o DataFrame() correspondente.
44
+ """
45
+ pass
46
+
47
+ @abstractmethod
48
+ def get_sheet_names(self) -> ArrayList[str]:
49
+ pass
50
+
51
+ def get_sheet_at(self, idx: int) -> DataFrame:
52
+ sheet_name = self.get_sheet_names()[idx]
53
+ return self.get_sheet(sheet_name)
54
+
55
+ def get_sheet(self, sheet_name: str) -> DataFrame:
56
+ return self.get_workbook_data(sheet_name).get_first()
57
+
58
+
59
+ __all__ = ['InterfaceSheetLoad', 'sheetExtension']
60
+
61
+
62
+
63
+