universal-database-inspector 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- universal_database_inspector-0.1.0/PKG-INFO +141 -0
- universal_database_inspector-0.1.0/README.md +118 -0
- universal_database_inspector-0.1.0/pyproject.toml +39 -0
- universal_database_inspector-0.1.0/setup.cfg +4 -0
- universal_database_inspector-0.1.0/universal_database_inspector/__init__.py +59 -0
- universal_database_inspector-0.1.0/universal_database_inspector/__main__.py +36 -0
- universal_database_inspector-0.1.0/universal_database_inspector/ai.py +69 -0
- universal_database_inspector-0.1.0/universal_database_inspector/application.py +151 -0
- universal_database_inspector-0.1.0/universal_database_inspector/config.py +55 -0
- universal_database_inspector-0.1.0/universal_database_inspector/connection.py +72 -0
- universal_database_inspector-0.1.0/universal_database_inspector/describer.py +259 -0
- universal_database_inspector-0.1.0/universal_database_inspector/inspector.py +126 -0
- universal_database_inspector-0.1.0/universal_database_inspector/labeler.py +167 -0
- universal_database_inspector-0.1.0/universal_database_inspector/scaffold.py +41 -0
- universal_database_inspector-0.1.0/universal_database_inspector/table.py +140 -0
- universal_database_inspector-0.1.0/universal_database_inspector.egg-info/PKG-INFO +141 -0
- universal_database_inspector-0.1.0/universal_database_inspector.egg-info/SOURCES.txt +19 -0
- universal_database_inspector-0.1.0/universal_database_inspector.egg-info/dependency_links.txt +1 -0
- universal_database_inspector-0.1.0/universal_database_inspector.egg-info/entry_points.txt +2 -0
- universal_database_inspector-0.1.0/universal_database_inspector.egg-info/requires.txt +5 -0
- universal_database_inspector-0.1.0/universal_database_inspector.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: universal_database_inspector
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Automated MySQL schema decomposition with AI-powered labeling and descriptions.
|
|
5
|
+
Author: nailen1
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: mysql,schema,openai,database,inspector,labeling
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Topic :: Database
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
Requires-Dist: mysql-connector-python>=8.0.0
|
|
19
|
+
Requires-Dist: sqlalchemy>=2.0.0
|
|
20
|
+
Requires-Dist: pandas>=2.0.0
|
|
21
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
22
|
+
Requires-Dist: openai>=1.0.0
|
|
23
|
+
|
|
24
|
+
# universal_database_inspector
|
|
25
|
+
|
|
26
|
+
MySQL 데이터베이스의 스키마를 자동으로 분석하고, AI를 활용하여 컬럼 라벨링과 테이블 설명을 생성하는 Python 패키지입니다.
|
|
27
|
+
|
|
28
|
+
## Features
|
|
29
|
+
|
|
30
|
+
- **스키마 추출** — 데이터베이스의 전체 테이블·컬럼 구조를 JSON으로 저장
|
|
31
|
+
- **AI 컬럼 라벨링** — OpenAI 모델을 활용하여 각 컬럼의 한국어 라벨을 자동 생성
|
|
32
|
+
- **AI 테이블 설명** — 테이블의 목적과 용도를 한국어로 요약
|
|
33
|
+
- **테이블 통계** — 행 수, 컬럼 수, 날짜 범위 자동 수집
|
|
34
|
+
- **Table 객체** — 테이블별 구조·라벨·설명·데이터를 하나의 인터페이스로 접근
|
|
35
|
+
|
|
36
|
+
## Installation
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pip install git+https://github.com/nailen1/universal_database_inspector.git
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Setup
|
|
43
|
+
|
|
44
|
+
`.env.example`을 참고하여 `.env` 파일을 생성합니다.
|
|
45
|
+
|
|
46
|
+
```
|
|
47
|
+
# MySQL Database Connection
|
|
48
|
+
DB_HOST=your_host
|
|
49
|
+
DB_PORT=3306
|
|
50
|
+
DB_USER=your_user
|
|
51
|
+
DB_PASSWORD=your_password
|
|
52
|
+
DB_NAME=your_database
|
|
53
|
+
|
|
54
|
+
# OpenAI API Key
|
|
55
|
+
OPENAI_API_KEY=your_openai_api_key
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Quick Start
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from universal_database_inspector import init_structure, inspect_all, describe_all_tables, Table
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### 1. 출력 폴더 초기화
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
init_structure()
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
```
|
|
71
|
+
database_structure/
|
|
72
|
+
├── labels/
|
|
73
|
+
└── description/
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### 2. 데이터베이스 구조 추출
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
structure = inspect_all()
|
|
80
|
+
list(structure.keys())
|
|
81
|
+
# ['bond', 'currency', 'daily_price', 'index', 'index_membership', 'stock_info']
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
`database_structure/structure.json`에 전체 테이블-컬럼 매핑이 저장됩니다.
|
|
85
|
+
|
|
86
|
+
### 3. 테이블 설명 생성
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
describe_all_tables()
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
각 테이블에 대해 AI 라벨링과 설명 파일을 일괄 생성합니다.
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
[1/6] describing: bond
|
|
96
|
+
[2/6] describing: currency
|
|
97
|
+
...
|
|
98
|
+
done: 6 created, 0 skipped (total 6)
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### 4. Table 객체로 접근
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
table = Table('bond')
|
|
105
|
+
|
|
106
|
+
table.columns # ['code', 'date', 'open', 'high', 'low', 'close']
|
|
107
|
+
table.labels # {'code': '채권 코드', 'date': '거래일자', 'open': '시가', ...}
|
|
108
|
+
table.description # {'description': '...', 'first_date': '1962-01-02', 'last_date': '2026-02-20', ...}
|
|
109
|
+
table.df # 원본 데이터 DataFrame
|
|
110
|
+
table.labeled # 한국어 라벨이 적용된 DataFrame
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## Output Structure
|
|
114
|
+
|
|
115
|
+
```
|
|
116
|
+
database_structure/
|
|
117
|
+
├── structure.json # 전체 테이블-컬럼 구조
|
|
118
|
+
├── labels/
|
|
119
|
+
│ ├── bond.json # {"code": "채권 코드", "date": "거래일자", ...}
|
|
120
|
+
│ ├── currency.json
|
|
121
|
+
│ └── ...
|
|
122
|
+
└── description/
|
|
123
|
+
├── bond.json # {"description": "...", "row_count": 85376, ...}
|
|
124
|
+
├── currency.json
|
|
125
|
+
└── ...
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## CLI
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
python -m universal_database_inspector
|
|
132
|
+
python -m universal_database_inspector --overwrite
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## Dependencies
|
|
136
|
+
|
|
137
|
+
- `mysql-connector-python` — MySQL 연결
|
|
138
|
+
- `sqlalchemy` — ORM 엔진
|
|
139
|
+
- `pandas` — 데이터 처리
|
|
140
|
+
- `python-dotenv` — 환경변수 로딩
|
|
141
|
+
- `openai` — OpenAI API 호출
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# universal_database_inspector
|
|
2
|
+
|
|
3
|
+
MySQL 데이터베이스의 스키마를 자동으로 분석하고, AI를 활용하여 컬럼 라벨링과 테이블 설명을 생성하는 Python 패키지입니다.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **스키마 추출** — 데이터베이스의 전체 테이블·컬럼 구조를 JSON으로 저장
|
|
8
|
+
- **AI 컬럼 라벨링** — OpenAI 모델을 활용하여 각 컬럼의 한국어 라벨을 자동 생성
|
|
9
|
+
- **AI 테이블 설명** — 테이블의 목적과 용도를 한국어로 요약
|
|
10
|
+
- **테이블 통계** — 행 수, 컬럼 수, 날짜 범위 자동 수집
|
|
11
|
+
- **Table 객체** — 테이블별 구조·라벨·설명·데이터를 하나의 인터페이스로 접근
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pip install git+https://github.com/nailen1/universal_database_inspector.git
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Setup
|
|
20
|
+
|
|
21
|
+
`.env.example`을 참고하여 `.env` 파일을 생성합니다.
|
|
22
|
+
|
|
23
|
+
```
|
|
24
|
+
# MySQL Database Connection
|
|
25
|
+
DB_HOST=your_host
|
|
26
|
+
DB_PORT=3306
|
|
27
|
+
DB_USER=your_user
|
|
28
|
+
DB_PASSWORD=your_password
|
|
29
|
+
DB_NAME=your_database
|
|
30
|
+
|
|
31
|
+
# OpenAI API Key
|
|
32
|
+
OPENAI_API_KEY=your_openai_api_key
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Quick Start
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from universal_database_inspector import init_structure, inspect_all, describe_all_tables, Table
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### 1. 출력 폴더 초기화
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
init_structure()
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
database_structure/
|
|
49
|
+
├── labels/
|
|
50
|
+
└── description/
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### 2. 데이터베이스 구조 추출
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
structure = inspect_all()
|
|
57
|
+
list(structure.keys())
|
|
58
|
+
# ['bond', 'currency', 'daily_price', 'index', 'index_membership', 'stock_info']
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
`database_structure/structure.json`에 전체 테이블-컬럼 매핑이 저장됩니다.
|
|
62
|
+
|
|
63
|
+
### 3. 테이블 설명 생성
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
describe_all_tables()
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
각 테이블에 대해 AI 라벨링과 설명 파일을 일괄 생성합니다.
|
|
70
|
+
|
|
71
|
+
```
|
|
72
|
+
[1/6] describing: bond
|
|
73
|
+
[2/6] describing: currency
|
|
74
|
+
...
|
|
75
|
+
done: 6 created, 0 skipped (total 6)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### 4. Table 객체로 접근
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
table = Table('bond')
|
|
82
|
+
|
|
83
|
+
table.columns # ['code', 'date', 'open', 'high', 'low', 'close']
|
|
84
|
+
table.labels # {'code': '채권 코드', 'date': '거래일자', 'open': '시가', ...}
|
|
85
|
+
table.description # {'description': '...', 'first_date': '1962-01-02', 'last_date': '2026-02-20', ...}
|
|
86
|
+
table.df # 원본 데이터 DataFrame
|
|
87
|
+
table.labeled # 한국어 라벨이 적용된 DataFrame
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Output Structure
|
|
91
|
+
|
|
92
|
+
```
|
|
93
|
+
database_structure/
|
|
94
|
+
├── structure.json # 전체 테이블-컬럼 구조
|
|
95
|
+
├── labels/
|
|
96
|
+
│ ├── bond.json # {"code": "채권 코드", "date": "거래일자", ...}
|
|
97
|
+
│ ├── currency.json
|
|
98
|
+
│ └── ...
|
|
99
|
+
└── description/
|
|
100
|
+
├── bond.json # {"description": "...", "row_count": 85376, ...}
|
|
101
|
+
├── currency.json
|
|
102
|
+
└── ...
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## CLI
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
python -m universal_database_inspector
|
|
109
|
+
python -m universal_database_inspector --overwrite
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Dependencies
|
|
113
|
+
|
|
114
|
+
- `mysql-connector-python` — MySQL 연결
|
|
115
|
+
- `sqlalchemy` — ORM 엔진
|
|
116
|
+
- `pandas` — 데이터 처리
|
|
117
|
+
- `python-dotenv` — 환경변수 로딩
|
|
118
|
+
- `openai` — OpenAI API 호출
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "universal_database_inspector"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Automated MySQL schema decomposition with AI-powered labeling and descriptions."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "nailen1" },
|
|
14
|
+
]
|
|
15
|
+
keywords = ["mysql", "schema", "openai", "database", "inspector", "labeling"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 4 - Beta",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.10",
|
|
22
|
+
"Programming Language :: Python :: 3.11",
|
|
23
|
+
"Programming Language :: Python :: 3.12",
|
|
24
|
+
"Topic :: Database",
|
|
25
|
+
]
|
|
26
|
+
dependencies = [
|
|
27
|
+
"mysql-connector-python>=8.0.0",
|
|
28
|
+
"sqlalchemy>=2.0.0",
|
|
29
|
+
"pandas>=2.0.0",
|
|
30
|
+
"python-dotenv>=1.0.0",
|
|
31
|
+
"openai>=1.0.0",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
[project.scripts]
|
|
35
|
+
database-inspector = "universal_database_inspector.__main__:main"
|
|
36
|
+
|
|
37
|
+
[tool.setuptools.packages.find]
|
|
38
|
+
where = ["."]
|
|
39
|
+
include = ["universal_database_inspector*"]
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""universal_database_inspector - Automated MySQL schema decomposition."""
|
|
2
|
+
|
|
3
|
+
from universal_database_inspector.config import load_db_config
|
|
4
|
+
from universal_database_inspector.connection import (
|
|
5
|
+
get_engine,
|
|
6
|
+
get_connection,
|
|
7
|
+
get_list_tables,
|
|
8
|
+
)
|
|
9
|
+
from universal_database_inspector.inspector import (
|
|
10
|
+
load_structure,
|
|
11
|
+
get_columns,
|
|
12
|
+
get_structure,
|
|
13
|
+
save_structure,
|
|
14
|
+
inspect_all,
|
|
15
|
+
)
|
|
16
|
+
from universal_database_inspector.labeler import (
|
|
17
|
+
fetch_sample_rows,
|
|
18
|
+
generate_labels,
|
|
19
|
+
save_labels,
|
|
20
|
+
label_table,
|
|
21
|
+
)
|
|
22
|
+
from universal_database_inspector.application import (
|
|
23
|
+
label_all_tables,
|
|
24
|
+
load_labels,
|
|
25
|
+
get_labeled_table,
|
|
26
|
+
)
|
|
27
|
+
from universal_database_inspector.describer import (
|
|
28
|
+
get_table_stats,
|
|
29
|
+
generate_description,
|
|
30
|
+
save_description,
|
|
31
|
+
describe_all_tables,
|
|
32
|
+
)
|
|
33
|
+
from universal_database_inspector.table import Table
|
|
34
|
+
from universal_database_inspector.scaffold import init_structure
|
|
35
|
+
|
|
36
|
+
__all__ = [
|
|
37
|
+
"load_db_config",
|
|
38
|
+
"get_engine",
|
|
39
|
+
"get_connection",
|
|
40
|
+
"get_list_tables",
|
|
41
|
+
"load_structure",
|
|
42
|
+
"get_columns",
|
|
43
|
+
"get_structure",
|
|
44
|
+
"save_structure",
|
|
45
|
+
"inspect_all",
|
|
46
|
+
"fetch_sample_rows",
|
|
47
|
+
"generate_labels",
|
|
48
|
+
"save_labels",
|
|
49
|
+
"label_table",
|
|
50
|
+
"label_all_tables",
|
|
51
|
+
"load_labels",
|
|
52
|
+
"get_labeled_table",
|
|
53
|
+
"get_table_stats",
|
|
54
|
+
"generate_description",
|
|
55
|
+
"save_description",
|
|
56
|
+
"describe_all_tables",
|
|
57
|
+
"Table",
|
|
58
|
+
"init_structure",
|
|
59
|
+
]
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""CLI entrypoint for universal_database_inspector.
|
|
2
|
+
|
|
3
|
+
Usage::
|
|
4
|
+
|
|
5
|
+
python -m universal_database_inspector
|
|
6
|
+
python -m universal_database_inspector --overwrite
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import argparse
|
|
10
|
+
|
|
11
|
+
from universal_database_inspector.inspector import inspect_all
|
|
12
|
+
from universal_database_inspector.describer import describe_all_tables
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def main():
|
|
16
|
+
parser = argparse.ArgumentParser(
|
|
17
|
+
description="Inspect database structure, generate labels and descriptions.",
|
|
18
|
+
)
|
|
19
|
+
parser.add_argument(
|
|
20
|
+
"--overwrite",
|
|
21
|
+
action="store_true",
|
|
22
|
+
help="Overwrite existing label/description files.",
|
|
23
|
+
)
|
|
24
|
+
args = parser.parse_args()
|
|
25
|
+
|
|
26
|
+
print("\n[1/2] structure.json")
|
|
27
|
+
inspect_all()
|
|
28
|
+
|
|
29
|
+
print("\n[2/2] labels + descriptions")
|
|
30
|
+
describe_all_tables(overwrite=args.overwrite)
|
|
31
|
+
|
|
32
|
+
print("\nall done.")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
if __name__ == "__main__":
|
|
36
|
+
main()
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Minimal OpenAI API wrapper.
|
|
2
|
+
|
|
3
|
+
Provides a single ``prompt_to_model`` function used by the labeler
|
|
4
|
+
and describer modules. Replaces the external ``use_ai`` dependency.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
|
|
9
|
+
from dotenv import load_dotenv
|
|
10
|
+
from openai import OpenAI, BadRequestError
|
|
11
|
+
|
|
12
|
+
DEFAULT_MODEL_NAME = "gpt-4.1"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _get_client() -> OpenAI:
|
|
16
|
+
load_dotenv()
|
|
17
|
+
api_key = os.getenv("OPENAI_API_KEY")
|
|
18
|
+
if api_key is None:
|
|
19
|
+
raise EnvironmentError("Missing environment variable: OPENAI_API_KEY")
|
|
20
|
+
return OpenAI(api_key=api_key)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _chat_completion(client: OpenAI, **kwargs) -> object:
|
|
24
|
+
"""Call chat completions with automatic parameter fallback.
|
|
25
|
+
|
|
26
|
+
Handles two known incompatibilities across model generations:
|
|
27
|
+
- max_tokens -> max_completion_tokens (newer models)
|
|
28
|
+
- temperature not supported (reasoning models like o3-mini)
|
|
29
|
+
"""
|
|
30
|
+
try:
|
|
31
|
+
return client.chat.completions.create(**kwargs)
|
|
32
|
+
except BadRequestError as e:
|
|
33
|
+
body = e.body or {}
|
|
34
|
+
param = body.get("param", "") if isinstance(body, dict) else ""
|
|
35
|
+
|
|
36
|
+
if param == "max_tokens" and "max_tokens" in kwargs:
|
|
37
|
+
kwargs["max_completion_tokens"] = kwargs.pop("max_tokens")
|
|
38
|
+
return _chat_completion(client, **kwargs)
|
|
39
|
+
|
|
40
|
+
if param == "temperature" and "temperature" in kwargs:
|
|
41
|
+
del kwargs["temperature"]
|
|
42
|
+
return _chat_completion(client, **kwargs)
|
|
43
|
+
|
|
44
|
+
raise
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def prompt_to_model(
|
|
48
|
+
prompt: str,
|
|
49
|
+
model_name: str = DEFAULT_MODEL_NAME,
|
|
50
|
+
system_message: str | None = None,
|
|
51
|
+
max_tokens: int = 1024,
|
|
52
|
+
temperature: float = 0.7,
|
|
53
|
+
) -> str:
|
|
54
|
+
"""Send a prompt to an OpenAI model and return the response text."""
|
|
55
|
+
client = _get_client()
|
|
56
|
+
|
|
57
|
+
messages = []
|
|
58
|
+
if system_message:
|
|
59
|
+
messages.append({"role": "system", "content": system_message})
|
|
60
|
+
messages.append({"role": "user", "content": prompt})
|
|
61
|
+
|
|
62
|
+
response = _chat_completion(
|
|
63
|
+
client,
|
|
64
|
+
model=model_name,
|
|
65
|
+
messages=messages,
|
|
66
|
+
max_tokens=max_tokens,
|
|
67
|
+
temperature=temperature,
|
|
68
|
+
)
|
|
69
|
+
return response.choices[0].message.content
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""High-level database inspector workflows.
|
|
2
|
+
|
|
3
|
+
Combines inspector and labeler primitives into batch operations.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
|
|
9
|
+
import pandas as pd
|
|
10
|
+
|
|
11
|
+
from universal_database_inspector.connection import get_engine
|
|
12
|
+
from universal_database_inspector.inspector import load_structure
|
|
13
|
+
from universal_database_inspector.labeler import save_labels
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
_PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _project_root():
|
|
20
|
+
return _PROJECT_ROOT
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _group_key(table_name: str) -> str | None:
|
|
24
|
+
"""Return the group key if the table belongs to a grouped prefix.
|
|
25
|
+
|
|
26
|
+
Only numeric-suffix tables (e.g. ``p_ks_000020``) are grouped.
|
|
27
|
+
Named variants like ``p_ks_market`` are treated individually.
|
|
28
|
+
"""
|
|
29
|
+
if table_name.startswith("p_ks_"):
|
|
30
|
+
suffix = table_name[len("p_ks_"):]
|
|
31
|
+
if suffix.isdigit():
|
|
32
|
+
return "p_ks"
|
|
33
|
+
return None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def label_all_tables(
|
|
37
|
+
output_dir: str = "database_structure",
|
|
38
|
+
overwrite: bool = False,
|
|
39
|
+
) -> list[str]:
|
|
40
|
+
"""Generate AI labels for every table and save as JSON files.
|
|
41
|
+
|
|
42
|
+
Tables sharing a common prefix (e.g. ``p_ks_*``) are labeled once
|
|
43
|
+
using the union of their columns, saved as ``{prefix}.json``.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
output_dir: Root output directory.
|
|
47
|
+
overwrite: If False, skip tables whose label file already exists.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
list[str]: Paths to the saved label files.
|
|
51
|
+
"""
|
|
52
|
+
structure = load_structure(output_dir=output_dir)
|
|
53
|
+
|
|
54
|
+
grouped_columns: dict[str, list[str]] = {}
|
|
55
|
+
normal_tables: dict[str, list[str]] = {}
|
|
56
|
+
|
|
57
|
+
for table, columns in structure.items():
|
|
58
|
+
gk = _group_key(table)
|
|
59
|
+
if gk is not None:
|
|
60
|
+
existing = grouped_columns.get(gk, [])
|
|
61
|
+
for col in columns:
|
|
62
|
+
if col not in existing:
|
|
63
|
+
existing.append(col)
|
|
64
|
+
grouped_columns[gk] = existing
|
|
65
|
+
else:
|
|
66
|
+
normal_tables[table] = columns
|
|
67
|
+
|
|
68
|
+
resolved_dir = output_dir
|
|
69
|
+
if not os.path.isabs(resolved_dir):
|
|
70
|
+
resolved_dir = os.path.join(_project_root(), resolved_dir)
|
|
71
|
+
labels_dir = os.path.join(resolved_dir, "labels")
|
|
72
|
+
|
|
73
|
+
targets = {**normal_tables, **grouped_columns}
|
|
74
|
+
paths = []
|
|
75
|
+
created = 0
|
|
76
|
+
skipped = 0
|
|
77
|
+
total = len(targets)
|
|
78
|
+
for i, (name, columns) in enumerate(targets.items(), 1):
|
|
79
|
+
label_file = os.path.join(labels_dir, f"{name}.json")
|
|
80
|
+
already_exists = os.path.exists(label_file)
|
|
81
|
+
|
|
82
|
+
if not overwrite and already_exists:
|
|
83
|
+
skipped += 1
|
|
84
|
+
print(f"[{i}/{total}] skip (exists): {name}")
|
|
85
|
+
paths.append(label_file)
|
|
86
|
+
continue
|
|
87
|
+
|
|
88
|
+
print(f"[{i}/{total}] labeling: {name}")
|
|
89
|
+
path = save_labels(
|
|
90
|
+
name, columns,
|
|
91
|
+
output_dir=output_dir, overwrite=overwrite,
|
|
92
|
+
)
|
|
93
|
+
paths.append(path)
|
|
94
|
+
created += 1
|
|
95
|
+
print(f"done: {created} created, {skipped} skipped (total {total})")
|
|
96
|
+
return paths
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def load_labels(
|
|
100
|
+
table_name: str,
|
|
101
|
+
output_dir: str = "database_structure",
|
|
102
|
+
) -> dict:
|
|
103
|
+
"""Load a saved label JSON file for a table.
|
|
104
|
+
|
|
105
|
+
For grouped tables (e.g. ``p_ks_000020``), loads the shared label
|
|
106
|
+
file (``p_ks.json``) instead.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
table_name: Name of the target table.
|
|
110
|
+
output_dir: Root output directory.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
dict: ``{column_name: label}`` mapping.
|
|
114
|
+
|
|
115
|
+
Raises:
|
|
116
|
+
FileNotFoundError: If the label file does not exist.
|
|
117
|
+
"""
|
|
118
|
+
if not os.path.isabs(output_dir):
|
|
119
|
+
output_dir = os.path.join(_project_root(), output_dir)
|
|
120
|
+
|
|
121
|
+
lookup_name = _group_key(table_name) or table_name
|
|
122
|
+
labels_path = os.path.join(output_dir, "labels", f"{lookup_name}.json")
|
|
123
|
+
with open(labels_path, "r", encoding="utf-8") as f:
|
|
124
|
+
return json.load(f)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def get_labeled_table(
|
|
128
|
+
table_name: str,
|
|
129
|
+
option_label: bool = True,
|
|
130
|
+
output_dir: str = "database_structure",
|
|
131
|
+
) -> pd.DataFrame:
|
|
132
|
+
"""Fetch a table and optionally rename columns to Korean labels.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
table_name: Name of the target table.
|
|
136
|
+
option_label: If True, rename columns using the label JSON file.
|
|
137
|
+
output_dir: Root output directory containing label files.
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
pd.DataFrame: Table data with original or labeled column names.
|
|
141
|
+
"""
|
|
142
|
+
engine = get_engine()
|
|
143
|
+
query = f"SELECT * FROM `{table_name}`"
|
|
144
|
+
df = pd.read_sql(query, engine)
|
|
145
|
+
|
|
146
|
+
if option_label:
|
|
147
|
+
labels = load_labels(table_name, output_dir=output_dir)
|
|
148
|
+
rename_map = {col: labels[col] for col in df.columns if labels.get(col)}
|
|
149
|
+
df = df.rename(columns=rename_map)
|
|
150
|
+
|
|
151
|
+
return df
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Database configuration loader.
|
|
2
|
+
|
|
3
|
+
Reads MySQL connection parameters from environment variables:
|
|
4
|
+
DB_HOST, DB_PORT, DB_USER, DB_PASSWORD, DB_NAME.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
|
|
9
|
+
from dotenv import load_dotenv
|
|
10
|
+
|
|
11
|
+
load_dotenv()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def load_db_config() -> dict:
|
|
15
|
+
"""Load MySQL connection configuration from environment variables.
|
|
16
|
+
|
|
17
|
+
Required variables::
|
|
18
|
+
|
|
19
|
+
DB_HOST
|
|
20
|
+
DB_PORT
|
|
21
|
+
DB_USER
|
|
22
|
+
DB_PASSWORD
|
|
23
|
+
DB_NAME
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
dict with keys: host, port, user, password, database.
|
|
27
|
+
|
|
28
|
+
Raises:
|
|
29
|
+
EnvironmentError: If any required variable is missing.
|
|
30
|
+
"""
|
|
31
|
+
required_keys = {
|
|
32
|
+
"host": "DB_HOST",
|
|
33
|
+
"port": "DB_PORT",
|
|
34
|
+
"user": "DB_USER",
|
|
35
|
+
"password": "DB_PASSWORD",
|
|
36
|
+
"database": "DB_NAME",
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
config = {}
|
|
40
|
+
missing = []
|
|
41
|
+
|
|
42
|
+
for key, env_var in required_keys.items():
|
|
43
|
+
value = os.getenv(env_var)
|
|
44
|
+
if value is None:
|
|
45
|
+
missing.append(env_var)
|
|
46
|
+
else:
|
|
47
|
+
config[key] = value
|
|
48
|
+
|
|
49
|
+
if missing:
|
|
50
|
+
raise EnvironmentError(
|
|
51
|
+
f"Missing environment variables: {', '.join(missing)}"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
config["port"] = int(config["port"])
|
|
55
|
+
return config
|