supertable 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- supertable-0.1.0/LICENSE +84 -0
- supertable-0.1.0/MANIFEST.in +17 -0
- supertable-0.1.0/PKG-INFO +93 -0
- supertable-0.1.0/README.md +73 -0
- supertable-0.1.0/pyproject.toml +25 -0
- supertable-0.1.0/setup.cfg +4 -0
- supertable-0.1.0/setup.py +21 -0
- supertable-0.1.0/supertable/__init__.py +0 -0
- supertable-0.1.0/supertable/config/__init__.py +0 -0
- supertable-0.1.0/supertable/config/defaults.py +81 -0
- supertable-0.1.0/supertable/config/homedir.py +34 -0
- supertable-0.1.0/supertable/data_reader.py +213 -0
- supertable-0.1.0/supertable/data_writer.py +100 -0
- supertable-0.1.0/supertable/history_cleaner.py +99 -0
- supertable-0.1.0/supertable/locking/__init__.py +3 -0
- supertable-0.1.0/supertable/locking/file_lock.py +104 -0
- supertable-0.1.0/supertable/locking/locking.py +162 -0
- supertable-0.1.0/supertable/locking/locking_backend.py +6 -0
- supertable-0.1.0/supertable/locking/redis_lock.py +70 -0
- supertable-0.1.0/supertable/meta_reader.py +182 -0
- supertable-0.1.0/supertable/plan_extender.py +97 -0
- supertable-0.1.0/supertable/processing.py +334 -0
- supertable-0.1.0/supertable/query_plan_manager.py +29 -0
- supertable-0.1.0/supertable/rbac/__init__.py +0 -0
- supertable-0.1.0/supertable/rbac/access_control.py +229 -0
- supertable-0.1.0/supertable/rbac/filter_builder.py +67 -0
- supertable-0.1.0/supertable/rbac/permissions.py +33 -0
- supertable-0.1.0/supertable/rbac/role_manager.py +149 -0
- supertable-0.1.0/supertable/rbac/row_column_security.py +53 -0
- supertable-0.1.0/supertable/rbac/user_manager.py +236 -0
- supertable-0.1.0/supertable/simple_table.py +192 -0
- supertable-0.1.0/supertable/staging_area.py +65 -0
- supertable-0.1.0/supertable/storage/__init__.py +0 -0
- supertable-0.1.0/supertable/storage/azure_storage.py +199 -0
- supertable-0.1.0/supertable/storage/gcp_storage.py +0 -0
- supertable-0.1.0/supertable/storage/local_storage.py +128 -0
- supertable-0.1.0/supertable/storage/minio_storage.py +218 -0
- supertable-0.1.0/supertable/storage/s3_storage.py +233 -0
- supertable-0.1.0/supertable/storage/storage_factory.py +29 -0
- supertable-0.1.0/supertable/storage/storage_interface.py +105 -0
- supertable-0.1.0/supertable/super_table.py +275 -0
- supertable-0.1.0/supertable/utils/__init__.py +0 -0
- supertable-0.1.0/supertable/utils/helper.py +118 -0
- supertable-0.1.0/supertable/utils/sql_parser.py +131 -0
- supertable-0.1.0/supertable/utils/timer.py +125 -0
- supertable-0.1.0/supertable.egg-info/PKG-INFO +93 -0
- supertable-0.1.0/supertable.egg-info/SOURCES.txt +47 -0
- supertable-0.1.0/supertable.egg-info/dependency_links.txt +1 -0
- supertable-0.1.0/supertable.egg-info/top_level.txt +1 -0
supertable-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
SUPER TABLE PUBLIC USE LICENSE (STPUL) v1.0
|
|
2
|
+
|
|
3
|
+
1. Definitions
|
|
4
|
+
1.1. “Software” refers to the source code, binary code, documentation,
|
|
5
|
+
and related materials for the project known as “SuperTable.”
|
|
6
|
+
1.2. “Licensor” refers to kladna Soft kft.
|
|
7
|
+
1.3. “You” (or “Your”) means the individual or entity exercising the
|
|
8
|
+
rights granted under this License.
|
|
9
|
+
|
|
10
|
+
2. Grant of License
|
|
11
|
+
2.1. Personal, Non-Commercial, and Evaluation Use
|
|
12
|
+
- You are granted a worldwide, royalty-free, non-exclusive,
|
|
13
|
+
non-transferable license to download, install, and use the
|
|
14
|
+
Software solely for personal, non-commercial, or evaluation
|
|
15
|
+
purposes at no cost.
|
|
16
|
+
- Evaluation use is limited to internal testing and shall not
|
|
17
|
+
exceed 30 days unless otherwise authorized in writing by the
|
|
18
|
+
Licensor.
|
|
19
|
+
|
|
20
|
+
2.2. Commercial or Enterprise Use
|
|
21
|
+
- If You wish to use the Software for commercial purposes,
|
|
22
|
+
within an enterprise environment, or as part of any product
|
|
23
|
+
or service provided to a third party, You must obtain a
|
|
24
|
+
separate commercial license from the Licensor. Fees may apply.
|
|
25
|
+
|
|
26
|
+
3. No Modification
|
|
27
|
+
3.1. You may not modify, reverse engineer, decompile, disassemble,
|
|
28
|
+
create derivative works from, or otherwise alter the Software
|
|
29
|
+
in whole or in part without the prior written consent of the
|
|
30
|
+
Licensor.
|
|
31
|
+
|
|
32
|
+
4. No Redistribution
|
|
33
|
+
4.1. You may not publish, distribute, sublicense, or otherwise make
|
|
34
|
+
the Software available to any third party, unless You have
|
|
35
|
+
obtained explicit written permission from the Licensor.
|
|
36
|
+
|
|
37
|
+
5. Ownership and Reservation of Rights
|
|
38
|
+
5.1. All rights, title, and interest in the Software remain with the
|
|
39
|
+
Licensor. This License does not transfer any ownership rights.
|
|
40
|
+
5.2. Access to the source code is provided for transparency and
|
|
41
|
+
evaluation only. It does not constitute a grant of rights
|
|
42
|
+
beyond those explicitly stated in this License.
|
|
43
|
+
5.3. The Licensor reserves all rights not expressly granted to You
|
|
44
|
+
under this License.
|
|
45
|
+
|
|
46
|
+
6. Disclaimer of Warranties
|
|
47
|
+
6.1. THE SOFTWARE IS PROVIDED “AS IS,” WITHOUT WARRANTY OF ANY KIND,
|
|
48
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
49
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
|
|
50
|
+
NON-INFRINGEMENT. THE ENTIRE RISK AS TO THE QUALITY AND
|
|
51
|
+
PERFORMANCE OF THE SOFTWARE IS WITH YOU.
|
|
52
|
+
|
|
53
|
+
7. Limitation of Liability
|
|
54
|
+
7.1. IN NO EVENT SHALL THE LICENSOR BE LIABLE FOR ANY INDIRECT,
|
|
55
|
+
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
56
|
+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
|
57
|
+
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
58
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
|
59
|
+
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
60
|
+
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
61
|
+
THE SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
62
|
+
|
|
63
|
+
8. Termination
|
|
64
|
+
8.1. This License and the rights granted hereunder will terminate
|
|
65
|
+
automatically if You fail to comply with any term or condition
|
|
66
|
+
of this License. Upon termination, You must cease all use of
|
|
67
|
+
the Software and destroy any copies of the Software in Your
|
|
68
|
+
possession or control.
|
|
69
|
+
8.2. The Licensor reserves the right to revoke this License in the
|
|
70
|
+
event of a violation or at its sole discretion, subject to
|
|
71
|
+
applicable law.
|
|
72
|
+
|
|
73
|
+
9. Governing Law
|
|
74
|
+
9.1. This License shall be governed by and construed in accordance
|
|
75
|
+
with the laws of Hungary, without regard to its conflict of
|
|
76
|
+
law principles.
|
|
77
|
+
|
|
78
|
+
10. Contact Information
|
|
79
|
+
10.1. For inquiries about commercial licensing or any other questions
|
|
80
|
+
regarding this License, please contact:
|
|
81
|
+
lkupas@kladnasoft.com
|
|
82
|
+
|
|
83
|
+
BY USING THE SOFTWARE, YOU ACKNOWLEDGE THAT YOU HAVE READ THIS LICENSE,
|
|
84
|
+
UNDERSTAND IT, AND AGREE TO BE BOUND BY ITS TERMS AND CONDITIONS.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# MANIFEST.in
|
|
2
|
+
|
|
3
|
+
# Essential top‑level docs
|
|
4
|
+
include README.md
|
|
5
|
+
include LICENSE.txt
|
|
6
|
+
include pyproject.toml
|
|
7
|
+
#include setup.py
|
|
8
|
+
|
|
9
|
+
# Remove bulky or dev‑only content
|
|
10
|
+
prune examples
|
|
11
|
+
prune tests
|
|
12
|
+
prune tmp
|
|
13
|
+
prune generator
|
|
14
|
+
prune */__pycache__
|
|
15
|
+
|
|
16
|
+
# Exclude compiled artifacts and large sample data
|
|
17
|
+
global-exclude *.py[cod] *.parquet *.csv *.json
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: supertable
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: SuperTable revolutionizes data management by integrating multiple basic tables into a single, cohesive framework.
|
|
5
|
+
Home-page: https://github.com/kladnasoft/supertable
|
|
6
|
+
Author: Levente Kupas
|
|
7
|
+
Author-email: Levente Kupas <lkupas@kladnasoft.com>
|
|
8
|
+
License: Super Table Public Use License (STPUL) v1.0
|
|
9
|
+
Project-URL: Homepage, https://github.com/kladnasoft/supertable
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: License :: Other/Proprietary License
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Requires-Python: >=3.10
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Dynamic: author
|
|
17
|
+
Dynamic: home-page
|
|
18
|
+
Dynamic: license-file
|
|
19
|
+
Dynamic: requires-python
|
|
20
|
+
|
|
21
|
+
# SuperTable
|
|
22
|
+
|
|
23
|
+

|
|
24
|
+

|
|
25
|
+
|
|
26
|
+
**SuperTable — The simplest data warehouse & cataloging system.**
|
|
27
|
+
A high‑performance, lightweight transaction catalog that integrates multiple
|
|
28
|
+
basic tables into a single, cohesive framework designed for ultimate
|
|
29
|
+
efficiency.
|
|
30
|
+
|
|
31
|
+
Whether you are processing massive datasets or continuously evolving data
|
|
32
|
+
structures, SuperTable lets you query your information effortlessly. It
|
|
33
|
+
automatically creates and manages tables so you can start running SQL queries
|
|
34
|
+
immediately—no complicated schemas or manual joins required.
|
|
35
|
+
|
|
36
|
+
By holding all your data in one place, SuperTable removes complexity, boosts
|
|
37
|
+
efficiency, and provides a unified view of your information. From startups to
|
|
38
|
+
enterprises, SuperTable helps you make better decisions, faster.
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## Key Features
|
|
43
|
+
|
|
44
|
+
- **Automatic table creation**
|
|
45
|
+
Load your data and SuperTable instantly builds the required tables and
|
|
46
|
+
columns—no predefined schema or extra setup.
|
|
47
|
+
|
|
48
|
+
- **Self‑referencing architecture**
|
|
49
|
+
Combine and analyze data across tables without writing manual joins; tables
|
|
50
|
+
can dynamically reference each other for richer insights.
|
|
51
|
+
|
|
52
|
+
- **Staging module with history**
|
|
53
|
+
Upload files to a staging area and reload any version at any time, keeping a
|
|
54
|
+
complete audit trail for tracking and compliance.
|
|
55
|
+
|
|
56
|
+
- **Columnar storage for speed**
|
|
57
|
+
Fully denormalized, column‑partitioned tables deliver lightning‑fast queries,
|
|
58
|
+
even with thousands of columns.
|
|
59
|
+
|
|
60
|
+
- **Built‑in RBAC security**
|
|
61
|
+
Define users and roles to control row‑ and column‑level access—no external
|
|
62
|
+
security tools required.
|
|
63
|
+
|
|
64
|
+
- **Platform independent**
|
|
65
|
+
Deploy on any major cloud provider or on‑premise. SuperTable is a pure Python
|
|
66
|
+
library with no hidden costs.
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## Benefits
|
|
71
|
+
|
|
72
|
+
- **Quick start**
|
|
73
|
+
Go from raw data to query‑ready in minutes—faster than spreadsheets or
|
|
74
|
+
traditional databases.
|
|
75
|
+
|
|
76
|
+
- **Higher efficiency**
|
|
77
|
+
Eliminate manual steps and rework so you spend more time analyzing and less
|
|
78
|
+
time wrangling.
|
|
79
|
+
|
|
80
|
+
- **Holistic insights**
|
|
81
|
+
Analyze datasets individually or together to uncover trends, outliers, and
|
|
82
|
+
cross‑dependencies.
|
|
83
|
+
|
|
84
|
+
- **Cost savings**
|
|
85
|
+
Consolidate licenses, simplify support, and reinvest the savings in deeper
|
|
86
|
+
analytics.
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
SuperTable provides a flexible, high‑performance solution that grows with your
|
|
91
|
+
business. Cut complexity, save time, and gain deeper insights—all in a single,
|
|
92
|
+
streamlined platform.
|
|
93
|
+
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# SuperTable
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+

|
|
5
|
+
|
|
6
|
+
**SuperTable — The simplest data warehouse & cataloging system.**
|
|
7
|
+
A high‑performance, lightweight transaction catalog that integrates multiple
|
|
8
|
+
basic tables into a single, cohesive framework designed for ultimate
|
|
9
|
+
efficiency.
|
|
10
|
+
|
|
11
|
+
Whether you are processing massive datasets or continuously evolving data
|
|
12
|
+
structures, SuperTable lets you query your information effortlessly. It
|
|
13
|
+
automatically creates and manages tables so you can start running SQL queries
|
|
14
|
+
immediately—no complicated schemas or manual joins required.
|
|
15
|
+
|
|
16
|
+
By holding all your data in one place, SuperTable removes complexity, boosts
|
|
17
|
+
efficiency, and provides a unified view of your information. From startups to
|
|
18
|
+
enterprises, SuperTable helps you make better decisions, faster.
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Key Features
|
|
23
|
+
|
|
24
|
+
- **Automatic table creation**
|
|
25
|
+
Load your data and SuperTable instantly builds the required tables and
|
|
26
|
+
columns—no predefined schema or extra setup.
|
|
27
|
+
|
|
28
|
+
- **Self‑referencing architecture**
|
|
29
|
+
Combine and analyze data across tables without writing manual joins; tables
|
|
30
|
+
can dynamically reference each other for richer insights.
|
|
31
|
+
|
|
32
|
+
- **Staging module with history**
|
|
33
|
+
Upload files to a staging area and reload any version at any time, keeping a
|
|
34
|
+
complete audit trail for tracking and compliance.
|
|
35
|
+
|
|
36
|
+
- **Columnar storage for speed**
|
|
37
|
+
Fully denormalized, column‑partitioned tables deliver lightning‑fast queries,
|
|
38
|
+
even with thousands of columns.
|
|
39
|
+
|
|
40
|
+
- **Built‑in RBAC security**
|
|
41
|
+
Define users and roles to control row‑ and column‑level access—no external
|
|
42
|
+
security tools required.
|
|
43
|
+
|
|
44
|
+
- **Platform independent**
|
|
45
|
+
Deploy on any major cloud provider or on‑premise. SuperTable is a pure Python
|
|
46
|
+
library with no hidden costs.
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Benefits
|
|
51
|
+
|
|
52
|
+
- **Quick start**
|
|
53
|
+
Go from raw data to query‑ready in minutes—faster than spreadsheets or
|
|
54
|
+
traditional databases.
|
|
55
|
+
|
|
56
|
+
- **Higher efficiency**
|
|
57
|
+
Eliminate manual steps and rework so you spend more time analyzing and less
|
|
58
|
+
time wrangling.
|
|
59
|
+
|
|
60
|
+
- **Holistic insights**
|
|
61
|
+
Analyze datasets individually or together to uncover trends, outliers, and
|
|
62
|
+
cross‑dependencies.
|
|
63
|
+
|
|
64
|
+
- **Cost savings**
|
|
65
|
+
Consolidate licenses, simplify support, and reinvest the savings in deeper
|
|
66
|
+
analytics.
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
SuperTable provides a flexible, high‑performance solution that grows with your
|
|
71
|
+
business. Cut complexity, save time, and gain deeper insights—all in a single,
|
|
72
|
+
streamlined platform.
|
|
73
|
+
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "supertable"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "SuperTable revolutionizes data management by integrating multiple basic tables into a single, cohesive framework."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = { text = "Super Table Public Use License (STPUL) v1.0" }
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Levente Kupas", email = "lkupas@kladnasoft.com" }
|
|
14
|
+
]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"License :: Other/Proprietary License",
|
|
18
|
+
"Operating System :: OS Independent"
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
[tool.setuptools]
|
|
22
|
+
include-package-data = true
|
|
23
|
+
|
|
24
|
+
[project.urls]
|
|
25
|
+
Homepage = "https://github.com/kladnasoft/supertable"
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
setup(
|
|
4
|
+
name="supertable",
|
|
5
|
+
version="0.1.0",
|
|
6
|
+
packages=find_packages(include=["supertable", "supertable.*"]),
|
|
7
|
+
include_package_data=True,
|
|
8
|
+
author="Levente Kupas",
|
|
9
|
+
author_email="lkupas@kladnasoft.com",
|
|
10
|
+
description="A high-performance, lightweight transaction cataloging system designed for ultimate efficiency.",
|
|
11
|
+
license="Super Table Public Use License (STPUL) v1.0",
|
|
12
|
+
long_description=open("README.md").read(),
|
|
13
|
+
long_description_content_type="text/markdown",
|
|
14
|
+
url="https://github.com/kladnasoft/supertable",
|
|
15
|
+
classifiers=[
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"License :: Other/Proprietary License",
|
|
18
|
+
"Operating System :: OS Independent",
|
|
19
|
+
],
|
|
20
|
+
python_requires=">=3.10",
|
|
21
|
+
)
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import logging
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
from dotenv import load_dotenv
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
import colorlog
|
|
9
|
+
|
|
10
|
+
# Configure colors for all levels
|
|
11
|
+
handler = colorlog.StreamHandler()
|
|
12
|
+
handler.setFormatter(colorlog.ColoredFormatter(
|
|
13
|
+
'%(log_color)s%(asctime)s - %(levelname)-8s - %(message)s',
|
|
14
|
+
datefmt='%Y-%m-%d %H:%M:%S',
|
|
15
|
+
log_colors={
|
|
16
|
+
'DEBUG': 'cyan',
|
|
17
|
+
'INFO': 'green',
|
|
18
|
+
'WARNING': 'yellow',
|
|
19
|
+
'ERROR': 'red',
|
|
20
|
+
'CRITICAL': 'red,bg_white', # White background with red text
|
|
21
|
+
},
|
|
22
|
+
secondary_log_colors={},
|
|
23
|
+
style='%'
|
|
24
|
+
))
|
|
25
|
+
|
|
26
|
+
logging.basicConfig(level=logging.INFO, handlers=[handler])
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class Default:
|
|
32
|
+
MAX_MEMORY_CHUNK_SIZE: int = 16 * 1024 * 1024
|
|
33
|
+
DEFAULT_TIMEOUT_SEC: int = 10
|
|
34
|
+
DEFAULT_LOCK_DURATION_SEC: int = 60
|
|
35
|
+
LOG_LEVEL: str = "INFO" # Replaced IS_DEBUG with LOG_LEVEL
|
|
36
|
+
IS_SHOW_TIMING: bool = True
|
|
37
|
+
STORAGE_TYPE: str = "LOCAL"
|
|
38
|
+
|
|
39
|
+
def update_default(self, **kwargs):
|
|
40
|
+
"""
|
|
41
|
+
Updates fields of this Default instance in-place
|
|
42
|
+
with any matching keys in kwargs.
|
|
43
|
+
"""
|
|
44
|
+
for key, value in kwargs.items():
|
|
45
|
+
if hasattr(self, key):
|
|
46
|
+
setattr(self, key, value)
|
|
47
|
+
if key == "LOG_LEVEL":
|
|
48
|
+
self._update_log_level()
|
|
49
|
+
|
|
50
|
+
def _update_log_level(self):
|
|
51
|
+
"""Update the logging level based on the current setting"""
|
|
52
|
+
logging.getLogger().setLevel(self.LOG_LEVEL)
|
|
53
|
+
logger.info(f"Log level changed to {self.LOG_LEVEL}")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def load_defaults_from_env(env_file: str = ".env") -> Default:
|
|
57
|
+
load_dotenv(env_file)
|
|
58
|
+
|
|
59
|
+
# Get log level from env, default to INFO
|
|
60
|
+
log_level = os.getenv("LOG_LEVEL", "INFO").upper()
|
|
61
|
+
|
|
62
|
+
# Validate log level
|
|
63
|
+
valid_levels = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
|
|
64
|
+
if log_level not in valid_levels:
|
|
65
|
+
log_level = "INFO"
|
|
66
|
+
logger.warning(f"Invalid LOG_LEVEL in .env. Using default INFO. Valid levels are: {valid_levels}")
|
|
67
|
+
|
|
68
|
+
# Set the log level immediately
|
|
69
|
+
logging.getLogger().setLevel(log_level)
|
|
70
|
+
|
|
71
|
+
return Default(
|
|
72
|
+
MAX_MEMORY_CHUNK_SIZE=int(os.getenv("MAX_MEMORY_CHUNK_SIZE", 16 * 1024 * 1024)),
|
|
73
|
+
DEFAULT_TIMEOUT_SEC=int(os.getenv("DEFAULT_TIMEOUT_SEC", 10)),
|
|
74
|
+
DEFAULT_LOCK_DURATION_SEC=int(os.getenv("DEFAULT_LOCK_DURATION_SEC", 60)),
|
|
75
|
+
LOG_LEVEL=log_level,
|
|
76
|
+
IS_SHOW_TIMING=(os.getenv("IS_SHOW_TIMING", "True").lower() == "true"),
|
|
77
|
+
STORAGE_TYPE=os.getenv("STORAGE_TYPE", "LOCAL").upper(),
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
default = load_defaults_from_env()
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
from supertable.config.defaults import default, logger
|
|
5
|
+
|
|
6
|
+
# If this file is located in a subdirectory, adjust the path logic as needed.
|
|
7
|
+
# Currently appending ".." from __file__ to add the project root directory
|
|
8
|
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
|
9
|
+
|
|
10
|
+
app_home = os.getenv("SUPERTABLE_HOME", "~/supertable")
|
|
11
|
+
|
|
12
|
+
def change_to_app_home(home_dir: str) -> None:
|
|
13
|
+
"""
|
|
14
|
+
Attempts to change the current working directory to `home_dir`.
|
|
15
|
+
Prints the outcome. Logs (or prints) any error encountered.
|
|
16
|
+
"""
|
|
17
|
+
expanded_dir = os.path.expanduser(home_dir)
|
|
18
|
+
try:
|
|
19
|
+
os.chdir(expanded_dir)
|
|
20
|
+
logger.debug(f"Changed working directory to {expanded_dir}")
|
|
21
|
+
except Exception as e:
|
|
22
|
+
logger.error(f"Failed to change working directory to {expanded_dir}: {e}")
|
|
23
|
+
|
|
24
|
+
if app_home:
|
|
25
|
+
change_to_app_home(app_home)
|
|
26
|
+
else:
|
|
27
|
+
logger.error("SUPERTABLE_HOME environment variable is not set")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
logger.info(f"Current working directory: {os.getcwd()}")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def get_app_home():
|
|
34
|
+
return app_home
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
|
|
3
|
+
import duckdb
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
from supertable.config.defaults import logger
|
|
7
|
+
from supertable.utils.timer import Timer
|
|
8
|
+
from supertable.super_table import SuperTable
|
|
9
|
+
from supertable.query_plan_manager import QueryPlanManager
|
|
10
|
+
from supertable.utils.sql_parser import SQLParser
|
|
11
|
+
from supertable.utils.helper import dict_keys_to_lowercase
|
|
12
|
+
from supertable.plan_extender import PlanStats, extend_execution_plan
|
|
13
|
+
from supertable.rbac.access_control import restrict_read_access
|
|
14
|
+
|
|
15
|
+
class Status(Enum):
|
|
16
|
+
OK = "ok"
|
|
17
|
+
ERROR = "error"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class DataReader:
|
|
21
|
+
def __init__(self, super_name, organization, query):
|
|
22
|
+
self.super_table = SuperTable(super_name=super_name, organization=organization)
|
|
23
|
+
self.parser = SQLParser(query)
|
|
24
|
+
self.parser.parse_sql()
|
|
25
|
+
self.timer = None
|
|
26
|
+
self.plan_stats = None
|
|
27
|
+
self.query_plan_manager = None
|
|
28
|
+
|
|
29
|
+
def filter_snapshots(self, super_table_data, super_table_meta):
|
|
30
|
+
snapshots = super_table_data.get("snapshots")
|
|
31
|
+
file_count = super_table_meta.get("file_count", 0)
|
|
32
|
+
total_rows = super_table_meta.get("total_rows", 0)
|
|
33
|
+
total_file_size = super_table_meta.get("total_file_size", 0)
|
|
34
|
+
self.plan_stats.add_stat({"TABLE_FILES": file_count})
|
|
35
|
+
self.plan_stats.add_stat({"TABLE_SIZE": total_file_size})
|
|
36
|
+
self.plan_stats.add_stat({"TABLE_ROWS": total_rows})
|
|
37
|
+
|
|
38
|
+
if self.super_table.super_name.lower() == self.parser.original_table.lower():
|
|
39
|
+
filtered_snapshots = [
|
|
40
|
+
s for s in snapshots
|
|
41
|
+
if not (s["table_name"].startswith("__") and s["table_name"].endswith("__"))
|
|
42
|
+
]
|
|
43
|
+
return filtered_snapshots
|
|
44
|
+
else:
|
|
45
|
+
filtered_snapshots = [
|
|
46
|
+
entry
|
|
47
|
+
for entry in snapshots
|
|
48
|
+
if entry["table_name"].lower() == self.parser.original_table.lower()
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
return filtered_snapshots
|
|
52
|
+
|
|
53
|
+
timer = Timer()
|
|
54
|
+
@timer
|
|
55
|
+
def execute(self, user_hash: str, with_scan: bool=False):
|
|
56
|
+
status = Status.ERROR
|
|
57
|
+
message = None
|
|
58
|
+
self.timer = Timer()
|
|
59
|
+
self.plan_stats = PlanStats()
|
|
60
|
+
|
|
61
|
+
try:
|
|
62
|
+
super_table_data, super_table_path, super_table_meta = self.super_table.get_super_table_and_path_with_shared_lock()
|
|
63
|
+
|
|
64
|
+
self.timer.capture_and_reset_timing(event="META")
|
|
65
|
+
|
|
66
|
+
self.query_plan_manager = QueryPlanManager(super_name=self.super_table.super_name,
|
|
67
|
+
organization=self.super_table.organization,
|
|
68
|
+
current_meta_path=super_table_path,
|
|
69
|
+
parser=self.parser)
|
|
70
|
+
|
|
71
|
+
snapshots = self.filter_snapshots(super_table_data=super_table_data,
|
|
72
|
+
super_table_meta=super_table_meta)
|
|
73
|
+
logger.debug(f"Filtered snapshots: {snapshots}")
|
|
74
|
+
|
|
75
|
+
parquet_files, schema = self.process_snapshots(snapshots=snapshots,
|
|
76
|
+
with_scan=with_scan)
|
|
77
|
+
logger.debug(f"Parquet Files: {parquet_files}")
|
|
78
|
+
|
|
79
|
+
missing_columns = (
|
|
80
|
+
set([column.lower() for column in self.parser.columns_list])
|
|
81
|
+
- set("*")
|
|
82
|
+
- schema
|
|
83
|
+
)
|
|
84
|
+
logger.debug(f"Mising Columns: {missing_columns}")
|
|
85
|
+
|
|
86
|
+
if len(snapshots) == 0 or missing_columns or not parquet_files:
|
|
87
|
+
message = (
|
|
88
|
+
f"Missing column(s): {', '.join ( missing_columns )}"
|
|
89
|
+
if missing_columns
|
|
90
|
+
else "No parquet files found"
|
|
91
|
+
)
|
|
92
|
+
logger.warning(f"Filter Result: {message}")
|
|
93
|
+
return pd.DataFrame(), status, message
|
|
94
|
+
|
|
95
|
+
restrict_read_access(super_name=self.super_table.super_name,
|
|
96
|
+
organization=self.super_table.organization,
|
|
97
|
+
user_hash=user_hash,
|
|
98
|
+
table_name=self.parser.reflection_table,
|
|
99
|
+
table_schema=schema,
|
|
100
|
+
parsed_columns=self.parser.columns_list,
|
|
101
|
+
parser=self.parser)
|
|
102
|
+
|
|
103
|
+
self.timer.capture_and_reset_timing(event="FILTERING")
|
|
104
|
+
|
|
105
|
+
result = self.execute_with_duckdb(parquet_files=parquet_files,
|
|
106
|
+
query_manager=self.query_plan_manager)
|
|
107
|
+
|
|
108
|
+
status = Status.OK
|
|
109
|
+
except Exception as e:
|
|
110
|
+
message = str(e)
|
|
111
|
+
logger.error(f"Exception: {e}")
|
|
112
|
+
result = pd.DataFrame()
|
|
113
|
+
self.timer.capture_and_reset_timing(event="EXECUTING_QUERY")
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
extend_execution_plan(super_table=self.super_table,
|
|
117
|
+
user_hash=user_hash,
|
|
118
|
+
query_plan_manager=self.query_plan_manager,
|
|
119
|
+
timing=self.timer.timings,
|
|
120
|
+
status=status.value,
|
|
121
|
+
message=message,
|
|
122
|
+
result_shape=result.shape,
|
|
123
|
+
plan_stats=self.plan_stats)
|
|
124
|
+
except Exception as e:
|
|
125
|
+
logger.error(f"Exception: {e}")
|
|
126
|
+
|
|
127
|
+
self.timer.capture_and_reset_timing(event="EXTENDING_PLAN")
|
|
128
|
+
self.timer.capture_duration(event="TOTAL_EXECUTE")
|
|
129
|
+
return result, status, message
|
|
130
|
+
|
|
131
|
+
def process_snapshots(self, snapshots, with_scan):
|
|
132
|
+
parquet_files = []
|
|
133
|
+
reflection_file_size = 0
|
|
134
|
+
reflection_rows = 0
|
|
135
|
+
|
|
136
|
+
schema = set()
|
|
137
|
+
for snapshot in snapshots:
|
|
138
|
+
current_snapshot_path = snapshot["path"]
|
|
139
|
+
current_snapshot_data = self.super_table.read_simple_table_snapshot(
|
|
140
|
+
current_snapshot_path
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
current_schema = current_snapshot_data.get("schema", {})
|
|
144
|
+
resources = current_snapshot_data.get("resources", {})
|
|
145
|
+
schema.update(dict_keys_to_lowercase(current_schema).keys())
|
|
146
|
+
|
|
147
|
+
for resource in resources:
|
|
148
|
+
file_size = resource.get("file_size", 0)
|
|
149
|
+
file_rows = resource.get("rows", 0)
|
|
150
|
+
|
|
151
|
+
if (
|
|
152
|
+
with_scan
|
|
153
|
+
or self.parser.columns_csv == "*"
|
|
154
|
+
or any(
|
|
155
|
+
col in dict_keys_to_lowercase(current_schema).keys()
|
|
156
|
+
for col in [
|
|
157
|
+
column.lower() for column in self.parser.columns_list
|
|
158
|
+
]
|
|
159
|
+
)
|
|
160
|
+
):
|
|
161
|
+
parquet_files.append(resource["file"])
|
|
162
|
+
reflection_file_size += file_size
|
|
163
|
+
reflection_rows += file_rows
|
|
164
|
+
|
|
165
|
+
logger.debug(f"snapshots: {len ( snapshots )}")
|
|
166
|
+
logger.debug(f"parquet_files: {len ( parquet_files )}")
|
|
167
|
+
logger.debug(f"schema: {schema}")
|
|
168
|
+
|
|
169
|
+
self.plan_stats.add_stat({"REFLECTIONS": len(parquet_files)})
|
|
170
|
+
self.plan_stats.add_stat({"REFLECTION_SIZE": reflection_file_size})
|
|
171
|
+
self.plan_stats.add_stat({"REFLECTION_ROWS": reflection_rows})
|
|
172
|
+
|
|
173
|
+
return parquet_files, schema
|
|
174
|
+
|
|
175
|
+
def execute_with_duckdb(self, parquet_files, query_manager: QueryPlanManager):
|
|
176
|
+
# Use DuckDB to read and query the parquet files directly
|
|
177
|
+
con = duckdb.connect()
|
|
178
|
+
|
|
179
|
+
con.execute("PRAGMA memory_limit='2GB';")
|
|
180
|
+
con.execute(f"PRAGMA temp_directory='{query_manager.temp_dir}';")
|
|
181
|
+
con.execute("PRAGMA enable_profiling='json';")
|
|
182
|
+
#con.execute("SET profiling_mode = 'standard';")
|
|
183
|
+
con.execute(f"PRAGMA profile_output = '{query_manager.query_plan_path}';")
|
|
184
|
+
|
|
185
|
+
# Read and register parquet files directly with DuckDB
|
|
186
|
+
parquet_files_str = ", ".join(f"'{file}'" for file in parquet_files)
|
|
187
|
+
logger.debug(f"parquet files: {len(parquet_files)}")
|
|
188
|
+
|
|
189
|
+
self.timer.capture_and_reset_timing("CONNECTING")
|
|
190
|
+
|
|
191
|
+
create_table = f"""
|
|
192
|
+
CREATE TABLE {self.parser.reflection_table}
|
|
193
|
+
AS
|
|
194
|
+
SELECT {self.parser.columns_csv}
|
|
195
|
+
FROM parquet_scan([{parquet_files_str}], union_by_name=True, HIVE_PARTITIONING=TRUE);
|
|
196
|
+
"""
|
|
197
|
+
|
|
198
|
+
logger.debug(f"create_table: {create_table}")
|
|
199
|
+
con.execute(create_table)
|
|
200
|
+
|
|
201
|
+
create_view = f"""
|
|
202
|
+
CREATE VIEW {self.parser.rbac_view}
|
|
203
|
+
AS
|
|
204
|
+
{self.parser.view_definition}
|
|
205
|
+
"""
|
|
206
|
+
logger.debug(f"create_view: {create_view}")
|
|
207
|
+
con.execute(create_view)
|
|
208
|
+
|
|
209
|
+
self.timer.capture_and_reset_timing("CREATING_REFLECTION")
|
|
210
|
+
logger.debug(f"Executing Query: {self.parser.executing_query}")
|
|
211
|
+
result = con.execute(query=self.parser.executing_query).fetchdf()
|
|
212
|
+
logger.debug(f"result.shape: {result.shape}")
|
|
213
|
+
return result
|