yeonjae-universal-data-storage 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- universal_data_storage/__init__.py +3 -0
- universal_data_storage/exceptions.py +92 -0
- universal_data_storage/models.py +248 -0
- universal_data_storage/service.py +504 -0
- yeonjae_universal_data_storage-1.0.1.dist-info/METADATA +39 -0
- yeonjae_universal_data_storage-1.0.1.dist-info/RECORD +8 -0
- yeonjae_universal_data_storage-1.0.1.dist-info/WHEEL +5 -0
- yeonjae_universal_data_storage-1.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,92 @@
|
|
1
|
+
"""
|
2
|
+
Universal Data Storage 예외 클래스
|
3
|
+
|
4
|
+
데이터 저장과 관련된 예외들을 정의합니다.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from typing import Dict, Any, Optional
|
8
|
+
|
9
|
+
|
10
|
+
class DataStorageException(Exception):
|
11
|
+
"""데이터 저장 관련 기본 예외 클래스"""
|
12
|
+
|
13
|
+
def __init__(self, message: str, details: Optional[Dict[str, Any]] = None):
|
14
|
+
self.message = message
|
15
|
+
self.details = details or {}
|
16
|
+
super().__init__(self.message)
|
17
|
+
|
18
|
+
|
19
|
+
class DatabaseConnectionException(DataStorageException):
|
20
|
+
"""데이터베이스 연결 예외"""
|
21
|
+
|
22
|
+
def __init__(self, original_error: Exception):
|
23
|
+
message = "Database connection failed"
|
24
|
+
details = {
|
25
|
+
"error_type": type(original_error).__name__,
|
26
|
+
"error_message": str(original_error)
|
27
|
+
}
|
28
|
+
super().__init__(message, details)
|
29
|
+
|
30
|
+
|
31
|
+
class DuplicateDataException(DataStorageException):
|
32
|
+
"""중복 데이터 예외"""
|
33
|
+
|
34
|
+
def __init__(self, commit_hash: str, existing_data: Dict[str, Any]):
|
35
|
+
message = f"Duplicate commit data found for hash: {commit_hash}"
|
36
|
+
details = {
|
37
|
+
"commit_hash": commit_hash,
|
38
|
+
"existing_data": existing_data
|
39
|
+
}
|
40
|
+
super().__init__(message, details)
|
41
|
+
|
42
|
+
|
43
|
+
class StorageValidationException(DataStorageException):
|
44
|
+
"""저장 데이터 검증 예외"""
|
45
|
+
|
46
|
+
def __init__(self, field: str, value: Any, reason: str):
|
47
|
+
message = f"Storage validation failed for field '{field}': {reason}"
|
48
|
+
details = {
|
49
|
+
"field": field,
|
50
|
+
"value": value,
|
51
|
+
"reason": reason
|
52
|
+
}
|
53
|
+
super().__init__(message, details)
|
54
|
+
|
55
|
+
|
56
|
+
class StorageOperationException(DataStorageException):
|
57
|
+
"""저장 작업 예외"""
|
58
|
+
|
59
|
+
def __init__(self, operation: str, original_error: Exception):
|
60
|
+
message = f"Storage operation '{operation}' failed"
|
61
|
+
details = {
|
62
|
+
"operation": operation,
|
63
|
+
"error_type": type(original_error).__name__,
|
64
|
+
"error_message": str(original_error)
|
65
|
+
}
|
66
|
+
super().__init__(message, details)
|
67
|
+
|
68
|
+
|
69
|
+
class BatchStorageException(DataStorageException):
|
70
|
+
"""배치 저장 예외"""
|
71
|
+
|
72
|
+
def __init__(self, failed_count: int, total_count: int, errors: list):
|
73
|
+
message = f"Batch storage failed: {failed_count}/{total_count} items failed"
|
74
|
+
details = {
|
75
|
+
"failed_count": failed_count,
|
76
|
+
"total_count": total_count,
|
77
|
+
"errors": errors
|
78
|
+
}
|
79
|
+
super().__init__(message, details)
|
80
|
+
|
81
|
+
|
82
|
+
class CompressionException(DataStorageException):
|
83
|
+
"""데이터 압축 예외"""
|
84
|
+
|
85
|
+
def __init__(self, data_type: str, original_error: Exception):
|
86
|
+
message = f"Data compression failed for {data_type}"
|
87
|
+
details = {
|
88
|
+
"data_type": data_type,
|
89
|
+
"error_type": type(original_error).__name__,
|
90
|
+
"error_message": str(original_error)
|
91
|
+
}
|
92
|
+
super().__init__(message, details)
|
@@ -0,0 +1,248 @@
|
|
1
|
+
"""Data storage models for webhook events - MVP Version based on design spec."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from datetime import datetime
|
6
|
+
from typing import Optional, Dict, Any, List
|
7
|
+
from enum import Enum
|
8
|
+
|
9
|
+
from sqlalchemy import (
|
10
|
+
Column, String, DateTime, LargeBinary, Integer,
|
11
|
+
UniqueConstraint, ForeignKey, Text, Index, create_engine
|
12
|
+
)
|
13
|
+
from sqlalchemy.sql import func
|
14
|
+
from sqlalchemy.orm import relationship, declarative_base
|
15
|
+
from pydantic import BaseModel, ConfigDict
|
16
|
+
|
17
|
+
# SQLAlchemy Base 독립적으로 생성
|
18
|
+
Base = declarative_base()
|
19
|
+
|
20
|
+
|
21
|
+
class StorageStatus(str, Enum):
|
22
|
+
"""저장 상태"""
|
23
|
+
SUCCESS = "success"
|
24
|
+
FAILED = "failed"
|
25
|
+
DUPLICATE = "duplicate"
|
26
|
+
PENDING = "pending"
|
27
|
+
|
28
|
+
|
29
|
+
# SQLAlchemy Models (Database Tables)
|
30
|
+
class CommitRecord(Base):
|
31
|
+
"""커밋 정보 테이블 - MVP 버전"""
|
32
|
+
|
33
|
+
__tablename__ = "commits"
|
34
|
+
|
35
|
+
id = Column(Integer, primary_key=True, autoincrement=True)
|
36
|
+
hash = Column(String(40), unique=True, nullable=False, index=True)
|
37
|
+
message = Column(Text, nullable=False)
|
38
|
+
author = Column(String(255), nullable=False, index=True)
|
39
|
+
author_email = Column(String(255), nullable=True)
|
40
|
+
timestamp = Column(DateTime, nullable=False, index=True)
|
41
|
+
repository = Column(String(255), nullable=False, index=True)
|
42
|
+
branch = Column(String(255), nullable=False)
|
43
|
+
pusher = Column(String(255), nullable=True)
|
44
|
+
commit_count = Column(Integer, nullable=False, default=1)
|
45
|
+
created_at = Column(DateTime, nullable=False, default=func.now())
|
46
|
+
|
47
|
+
# 관계 설정
|
48
|
+
diffs = relationship("DiffRecord", back_populates="commit", cascade="all, delete-orphan")
|
49
|
+
|
50
|
+
def __repr__(self) -> str:
|
51
|
+
return f"<CommitRecord(id={self.id}, hash={self.hash[:8]}, repo={self.repository})>"
|
52
|
+
|
53
|
+
|
54
|
+
class DiffRecord(Base):
|
55
|
+
"""Diff 정보 테이블 - MVP 버전"""
|
56
|
+
|
57
|
+
__tablename__ = "commit_diffs"
|
58
|
+
|
59
|
+
id = Column(Integer, primary_key=True, autoincrement=True)
|
60
|
+
commit_id = Column(Integer, ForeignKey("commits.id", ondelete="CASCADE"), nullable=False)
|
61
|
+
file_path = Column(Text, nullable=False)
|
62
|
+
additions = Column(Integer, nullable=False, default=0)
|
63
|
+
deletions = Column(Integer, nullable=False, default=0)
|
64
|
+
changes = Column(Text, nullable=True) # diff 내용 (압축된 형태)
|
65
|
+
diff_patch = Column(LargeBinary, nullable=True) # 압축된 diff 데이터
|
66
|
+
diff_url = Column(String, nullable=True) # S3 URL (큰 파일의 경우)
|
67
|
+
created_at = Column(DateTime, nullable=False, default=func.now())
|
68
|
+
|
69
|
+
# 관계 설정
|
70
|
+
commit = relationship("CommitRecord", back_populates="diffs")
|
71
|
+
|
72
|
+
# 인덱스 추가
|
73
|
+
__table_args__ = (
|
74
|
+
Index('idx_diffs_commit_id', 'commit_id'),
|
75
|
+
Index('idx_diffs_file_path', 'file_path'),
|
76
|
+
)
|
77
|
+
|
78
|
+
def __repr__(self) -> str:
|
79
|
+
return f"<DiffRecord(id={self.id}, commit_id={self.commit_id}, file={self.file_path})>"
|
80
|
+
|
81
|
+
|
82
|
+
# Legacy Event Model (기존 호환성 유지)
|
83
|
+
class Event(Base):
|
84
|
+
"""Database model for GitHub webhook events - Legacy support."""
|
85
|
+
|
86
|
+
__tablename__ = "events"
|
87
|
+
|
88
|
+
id = Column(Integer, primary_key=True, autoincrement=True)
|
89
|
+
platform = Column(String, nullable=False, default="github")
|
90
|
+
repository = Column(String, nullable=False, index=True)
|
91
|
+
commit_sha = Column(String, nullable=False, index=True)
|
92
|
+
author_name = Column(String, nullable=True)
|
93
|
+
author_email = Column(String, nullable=True)
|
94
|
+
timestamp_utc = Column(DateTime, nullable=True)
|
95
|
+
ref = Column(String, nullable=True)
|
96
|
+
pusher = Column(String, nullable=False, index=True)
|
97
|
+
commit_count = Column(Integer, nullable=False, default=1)
|
98
|
+
diff_patch = Column(LargeBinary, nullable=True) # Compressed diff or None if stored in S3
|
99
|
+
diff_url = Column(String, nullable=True) # S3 URL if diff is too large
|
100
|
+
added_lines = Column(Integer, nullable=True)
|
101
|
+
deleted_lines = Column(Integer, nullable=True)
|
102
|
+
files_changed = Column(Integer, nullable=True)
|
103
|
+
payload = Column(String, nullable=False) # JSON string
|
104
|
+
created_at = Column(DateTime, nullable=False, default=func.now())
|
105
|
+
|
106
|
+
# Prevent duplicate events
|
107
|
+
__table_args__ = (
|
108
|
+
UniqueConstraint('repository', 'commit_sha', name='uq_repo_commit'),
|
109
|
+
)
|
110
|
+
|
111
|
+
def __repr__(self) -> str:
|
112
|
+
return f"<Event(id={self.id}, repo={self.repository}, sha={self.commit_sha[:8]})>"
|
113
|
+
|
114
|
+
|
115
|
+
# Pydantic Models (API Request/Response)
|
116
|
+
class CommitData(BaseModel):
|
117
|
+
"""커밋 데이터 입력 모델"""
|
118
|
+
|
119
|
+
commit_hash: str
|
120
|
+
message: str
|
121
|
+
author: str
|
122
|
+
author_email: Optional[str] = None
|
123
|
+
timestamp: datetime
|
124
|
+
repository: str
|
125
|
+
branch: str
|
126
|
+
pusher: Optional[str] = None
|
127
|
+
commit_count: int = 1
|
128
|
+
|
129
|
+
model_config = ConfigDict(
|
130
|
+
from_attributes=True,
|
131
|
+
json_encoders={datetime: lambda v: v.isoformat()}
|
132
|
+
)
|
133
|
+
|
134
|
+
|
135
|
+
class DiffData(BaseModel):
|
136
|
+
"""Diff 데이터 입력 모델"""
|
137
|
+
|
138
|
+
file_path: str
|
139
|
+
additions: int = 0
|
140
|
+
deletions: int = 0
|
141
|
+
changes: Optional[str] = None
|
142
|
+
diff_content: Optional[bytes] = None # 원본 diff 내용
|
143
|
+
|
144
|
+
model_config = ConfigDict(
|
145
|
+
from_attributes=True,
|
146
|
+
arbitrary_types_allowed=True
|
147
|
+
)
|
148
|
+
|
149
|
+
|
150
|
+
class StorageResult(BaseModel):
|
151
|
+
"""저장 결과 응답 모델"""
|
152
|
+
|
153
|
+
success: bool
|
154
|
+
status: StorageStatus
|
155
|
+
commit_id: Optional[int] = None
|
156
|
+
message: str
|
157
|
+
timestamp: datetime
|
158
|
+
metadata: Dict[str, Any] = {}
|
159
|
+
|
160
|
+
model_config = ConfigDict(from_attributes=True)
|
161
|
+
|
162
|
+
|
163
|
+
class CommitSummary(BaseModel):
|
164
|
+
"""커밋 요약 정보"""
|
165
|
+
|
166
|
+
id: int
|
167
|
+
hash: str
|
168
|
+
message: str
|
169
|
+
author: str
|
170
|
+
timestamp: datetime
|
171
|
+
repository: str
|
172
|
+
branch: str
|
173
|
+
diff_count: int
|
174
|
+
total_additions: int
|
175
|
+
total_deletions: int
|
176
|
+
|
177
|
+
model_config = ConfigDict(from_attributes=True)
|
178
|
+
|
179
|
+
|
180
|
+
class DiffSummary(BaseModel):
|
181
|
+
"""Diff 요약 정보"""
|
182
|
+
|
183
|
+
id: int
|
184
|
+
file_path: str
|
185
|
+
additions: int
|
186
|
+
deletions: int
|
187
|
+
has_content: bool
|
188
|
+
|
189
|
+
model_config = ConfigDict(from_attributes=True)
|
190
|
+
|
191
|
+
|
192
|
+
# Legacy Models (기존 호환성)
|
193
|
+
class EventCreate(BaseModel):
|
194
|
+
"""Pydantic model for creating new events."""
|
195
|
+
|
196
|
+
repository: str
|
197
|
+
commit_sha: str
|
198
|
+
event_type: str = "push"
|
199
|
+
payload: str
|
200
|
+
diff_data: Optional[bytes] = None
|
201
|
+
diff_s3_url: Optional[str] = None
|
202
|
+
|
203
|
+
model_config = ConfigDict(
|
204
|
+
from_attributes=True,
|
205
|
+
arbitrary_types_allowed=True
|
206
|
+
)
|
207
|
+
|
208
|
+
|
209
|
+
class EventResponse(BaseModel):
|
210
|
+
"""Pydantic model for event API responses."""
|
211
|
+
|
212
|
+
id: int
|
213
|
+
repository: str
|
214
|
+
commit_sha: str
|
215
|
+
event_type: str
|
216
|
+
payload: Dict[str, Any]
|
217
|
+
diff_s3_url: Optional[str] = None
|
218
|
+
created_at: datetime
|
219
|
+
|
220
|
+
model_config = ConfigDict(from_attributes=True)
|
221
|
+
|
222
|
+
|
223
|
+
# Aggregated Models
|
224
|
+
class CommitWithDiffs(BaseModel):
|
225
|
+
"""커밋과 관련 Diff 정보를 포함한 집계 모델"""
|
226
|
+
|
227
|
+
commit: CommitSummary
|
228
|
+
diffs: List[DiffSummary]
|
229
|
+
|
230
|
+
model_config = ConfigDict(from_attributes=True)
|
231
|
+
|
232
|
+
|
233
|
+
class BatchStorageResult(BaseModel):
|
234
|
+
"""배치 저장 결과"""
|
235
|
+
|
236
|
+
total_commits: int
|
237
|
+
successful_commits: int
|
238
|
+
failed_commits: int
|
239
|
+
results: List[StorageResult]
|
240
|
+
duration_seconds: float
|
241
|
+
|
242
|
+
@property
|
243
|
+
def success_rate(self) -> float:
|
244
|
+
if self.total_commits == 0:
|
245
|
+
return 0.0
|
246
|
+
return self.successful_commits / self.total_commits
|
247
|
+
|
248
|
+
model_config = ConfigDict(from_attributes=True)
|
@@ -0,0 +1,504 @@
|
|
1
|
+
"""Data storage service - MVP Version based on design specification."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import gzip
|
6
|
+
import json
|
7
|
+
import logging
|
8
|
+
import os
|
9
|
+
from datetime import datetime
|
10
|
+
from io import BytesIO
|
11
|
+
from typing import Any, Dict, List, Optional
|
12
|
+
|
13
|
+
from sqlalchemy import func
|
14
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
15
|
+
from sqlalchemy.orm import Session, selectinload
|
16
|
+
from sqlalchemy.future import select
|
17
|
+
|
18
|
+
from modules.data_storage.models import (
|
19
|
+
Event, CommitRecord, DiffRecord,
|
20
|
+
CommitData, DiffData, StorageResult, StorageStatus,
|
21
|
+
CommitSummary, DiffSummary, CommitWithDiffs, BatchStorageResult
|
22
|
+
)
|
23
|
+
from universal_git_data_parser.models import DiffData as GitDiffData
|
24
|
+
# Database configuration - standalone implementation
|
25
|
+
from sqlalchemy import create_engine
|
26
|
+
from sqlalchemy.orm import sessionmaker, declarative_base
|
27
|
+
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
|
28
|
+
import os
|
29
|
+
|
30
|
+
# Create base for models
|
31
|
+
Base = declarative_base()
|
32
|
+
|
33
|
+
def get_session():
|
34
|
+
"""Get database session - standalone implementation"""
|
35
|
+
database_url = os.getenv("DATABASE_URL", "sqlite:///./test.db")
|
36
|
+
engine = create_engine(database_url)
|
37
|
+
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
38
|
+
return SessionLocal()
|
39
|
+
|
40
|
+
async def get_async_session():
|
41
|
+
"""Get async database session - standalone implementation"""
|
42
|
+
database_url = os.getenv("ASYNC_DATABASE_URL", "sqlite+aiosqlite:///./test.db")
|
43
|
+
engine = create_async_engine(database_url)
|
44
|
+
SessionLocal = async_sessionmaker(engine, class_=AsyncSession)
|
45
|
+
return SessionLocal()
|
46
|
+
|
47
|
+
# Simplified S3Client for standalone operation
|
48
|
+
class S3Client:
|
49
|
+
def __init__(self):
|
50
|
+
self.configured = False
|
51
|
+
|
52
|
+
def upload_file(self, key: str, data: bytes) -> str:
|
53
|
+
return f"s3://bucket/{key}"
|
54
|
+
|
55
|
+
# Simple logging class
|
56
|
+
class ModuleIOLogger:
|
57
|
+
def __init__(self, module_name: str):
|
58
|
+
self.module_name = module_name
|
59
|
+
|
60
|
+
def log_input(self, operation: str, data: dict):
|
61
|
+
pass
|
62
|
+
|
63
|
+
def log_output(self, operation: str, result: dict):
|
64
|
+
pass
|
65
|
+
|
66
|
+
logger = logging.getLogger(__name__)
|
67
|
+
|
68
|
+
GZIP_THRESHOLD = 256 * 1024 # 256 KiB
|
69
|
+
|
70
|
+
|
71
|
+
class DataStorageManager:
|
72
|
+
"""MVP 버전 데이터 저장 관리자 - 설계서 기반 구현"""
|
73
|
+
|
74
|
+
def __init__(self, db_session: Optional[Session] = None):
|
75
|
+
"""
|
76
|
+
DataStorageManager 초기화
|
77
|
+
|
78
|
+
Args:
|
79
|
+
db_session: 데이터베이스 세션 (선택적, 테스트용)
|
80
|
+
"""
|
81
|
+
self.db_session = db_session
|
82
|
+
self.s3_client = S3Client() if self._s3_configured() else None
|
83
|
+
self.logger = logging.getLogger(__name__)
|
84
|
+
|
85
|
+
def store_commit(
|
86
|
+
self,
|
87
|
+
commit_data: CommitData,
|
88
|
+
diff_data: List[DiffData]
|
89
|
+
) -> StorageResult:
|
90
|
+
"""
|
91
|
+
MVP: 커밋 데이터와 diff 데이터 저장
|
92
|
+
|
93
|
+
Args:
|
94
|
+
commit_data: 커밋 정보
|
95
|
+
diff_data: diff 정보 리스트
|
96
|
+
|
97
|
+
Returns:
|
98
|
+
StorageResult: 저장 결과
|
99
|
+
"""
|
100
|
+
start_time = datetime.now()
|
101
|
+
|
102
|
+
try:
|
103
|
+
# 세션 처리
|
104
|
+
if self.db_session:
|
105
|
+
return self._store_commit_sync(commit_data, diff_data, start_time)
|
106
|
+
else:
|
107
|
+
with get_session() as session:
|
108
|
+
self.db_session = session
|
109
|
+
return self._store_commit_sync(commit_data, diff_data, start_time)
|
110
|
+
|
111
|
+
except Exception as e:
|
112
|
+
self.logger.error(f"Failed to store commit {commit_data.commit_hash}: {e}")
|
113
|
+
return StorageResult(
|
114
|
+
success=False,
|
115
|
+
status=StorageStatus.FAILED,
|
116
|
+
message=f"Storage failed: {str(e)}",
|
117
|
+
timestamp=datetime.now()
|
118
|
+
)
|
119
|
+
|
120
|
+
def _store_commit_sync(
|
121
|
+
self,
|
122
|
+
commit_data: CommitData,
|
123
|
+
diff_data: List[DiffData],
|
124
|
+
start_time: datetime
|
125
|
+
) -> StorageResult:
|
126
|
+
"""동기 방식 커밋 저장 구현"""
|
127
|
+
|
128
|
+
try:
|
129
|
+
# 1. 중복 커밋 확인
|
130
|
+
if self._is_duplicate_commit(commit_data.commit_hash):
|
131
|
+
return StorageResult(
|
132
|
+
success=False,
|
133
|
+
status=StorageStatus.DUPLICATE,
|
134
|
+
message="Commit already exists",
|
135
|
+
timestamp=datetime.now(),
|
136
|
+
metadata={"commit_hash": commit_data.commit_hash}
|
137
|
+
)
|
138
|
+
|
139
|
+
# 2. 커밋 레코드 생성
|
140
|
+
commit_record = CommitRecord(
|
141
|
+
hash=commit_data.commit_hash,
|
142
|
+
message=commit_data.message,
|
143
|
+
author=commit_data.author,
|
144
|
+
author_email=commit_data.author_email,
|
145
|
+
timestamp=commit_data.timestamp,
|
146
|
+
repository=commit_data.repository,
|
147
|
+
branch=commit_data.branch,
|
148
|
+
pusher=commit_data.pusher,
|
149
|
+
commit_count=commit_data.commit_count
|
150
|
+
)
|
151
|
+
|
152
|
+
self.db_session.add(commit_record)
|
153
|
+
self.db_session.flush() # ID 생성을 위해 flush
|
154
|
+
|
155
|
+
# 3. Diff 레코드들 생성 및 저장
|
156
|
+
total_additions = 0
|
157
|
+
total_deletions = 0
|
158
|
+
|
159
|
+
for diff in diff_data:
|
160
|
+
# diff 압축 처리
|
161
|
+
compressed_diff = None
|
162
|
+
diff_url = None
|
163
|
+
|
164
|
+
if diff.diff_content:
|
165
|
+
compressed_data = self._compress_bytes(diff.diff_content)
|
166
|
+
|
167
|
+
if len(compressed_data) <= GZIP_THRESHOLD:
|
168
|
+
compressed_diff = compressed_data
|
169
|
+
elif self.s3_client:
|
170
|
+
# S3에 업로드
|
171
|
+
key = f"{commit_data.repository}/{commit_data.commit_hash}/{diff.file_path}.patch.gz"
|
172
|
+
diff_url = self._upload_to_s3_sync(key, compressed_data)
|
173
|
+
else:
|
174
|
+
# S3가 없으면 DB에 저장 (경고 로그)
|
175
|
+
compressed_diff = compressed_data
|
176
|
+
self.logger.warning(
|
177
|
+
f"Large diff stored in DB (S3 not configured): {len(compressed_data)} bytes"
|
178
|
+
)
|
179
|
+
|
180
|
+
diff_record = DiffRecord(
|
181
|
+
commit_id=commit_record.id,
|
182
|
+
file_path=diff.file_path,
|
183
|
+
additions=diff.additions,
|
184
|
+
deletions=diff.deletions,
|
185
|
+
changes=diff.changes,
|
186
|
+
diff_patch=compressed_diff,
|
187
|
+
diff_url=diff_url
|
188
|
+
)
|
189
|
+
|
190
|
+
self.db_session.add(diff_record)
|
191
|
+
total_additions += diff.additions
|
192
|
+
total_deletions += diff.deletions
|
193
|
+
|
194
|
+
# 4. 트랜잭션 커밋
|
195
|
+
self.db_session.commit()
|
196
|
+
|
197
|
+
# 5. 성공 로그 및 결과 반환
|
198
|
+
duration = (datetime.now() - start_time).total_seconds()
|
199
|
+
self.logger.info(
|
200
|
+
f"Stored commit {commit_data.commit_hash}: "
|
201
|
+
f"files={len(diff_data)}, +{total_additions}/-{total_deletions}, "
|
202
|
+
f"duration={duration:.2f}s"
|
203
|
+
)
|
204
|
+
|
205
|
+
return StorageResult(
|
206
|
+
success=True,
|
207
|
+
status=StorageStatus.SUCCESS,
|
208
|
+
commit_id=commit_record.id,
|
209
|
+
message="Commit stored successfully",
|
210
|
+
timestamp=datetime.now(),
|
211
|
+
metadata={
|
212
|
+
"commit_hash": commit_data.commit_hash,
|
213
|
+
"files_changed": len(diff_data),
|
214
|
+
"total_additions": total_additions,
|
215
|
+
"total_deletions": total_deletions,
|
216
|
+
"duration_seconds": duration
|
217
|
+
}
|
218
|
+
)
|
219
|
+
|
220
|
+
except Exception as e:
|
221
|
+
self.db_session.rollback()
|
222
|
+
raise e
|
223
|
+
|
224
|
+
def _is_duplicate_commit(self, commit_hash: str) -> bool:
|
225
|
+
"""중복 커밋 확인"""
|
226
|
+
result = self.db_session.query(CommitRecord).filter(
|
227
|
+
CommitRecord.hash == commit_hash
|
228
|
+
).first()
|
229
|
+
return result is not None
|
230
|
+
|
231
|
+
def get_commit_by_hash(self, commit_hash: str) -> Optional[CommitWithDiffs]:
|
232
|
+
"""커밋 해시로 상세 정보 조회"""
|
233
|
+
|
234
|
+
commit = self.db_session.query(CommitRecord).filter(
|
235
|
+
CommitRecord.hash == commit_hash
|
236
|
+
).first()
|
237
|
+
|
238
|
+
if not commit:
|
239
|
+
return None
|
240
|
+
|
241
|
+
# Diff 정보와 함께 조회
|
242
|
+
diffs = self.db_session.query(DiffRecord).filter(
|
243
|
+
DiffRecord.commit_id == commit.id
|
244
|
+
).all()
|
245
|
+
|
246
|
+
# 요약 정보 생성
|
247
|
+
commit_summary = CommitSummary(
|
248
|
+
id=commit.id,
|
249
|
+
hash=commit.hash,
|
250
|
+
message=commit.message,
|
251
|
+
author=commit.author,
|
252
|
+
timestamp=commit.timestamp,
|
253
|
+
repository=commit.repository,
|
254
|
+
branch=commit.branch,
|
255
|
+
diff_count=len(diffs),
|
256
|
+
total_additions=sum(d.additions for d in diffs),
|
257
|
+
total_deletions=sum(d.deletions for d in diffs)
|
258
|
+
)
|
259
|
+
|
260
|
+
diff_summaries = [
|
261
|
+
DiffSummary(
|
262
|
+
id=diff.id,
|
263
|
+
file_path=diff.file_path,
|
264
|
+
additions=diff.additions,
|
265
|
+
deletions=diff.deletions,
|
266
|
+
has_content=bool(diff.diff_patch or diff.diff_url)
|
267
|
+
)
|
268
|
+
for diff in diffs
|
269
|
+
]
|
270
|
+
|
271
|
+
return CommitWithDiffs(
|
272
|
+
commit=commit_summary,
|
273
|
+
diffs=diff_summaries
|
274
|
+
)
|
275
|
+
|
276
|
+
def get_recent_commits(
|
277
|
+
self,
|
278
|
+
repository: str,
|
279
|
+
limit: int = 10
|
280
|
+
) -> List[CommitSummary]:
|
281
|
+
"""최근 커밋 목록 조회"""
|
282
|
+
|
283
|
+
commits = self.db_session.query(CommitRecord).filter(
|
284
|
+
CommitRecord.repository == repository
|
285
|
+
).order_by(
|
286
|
+
CommitRecord.timestamp.desc()
|
287
|
+
).limit(limit).all()
|
288
|
+
|
289
|
+
results = []
|
290
|
+
for commit in commits:
|
291
|
+
# 각 커밋의 diff 통계 계산
|
292
|
+
diff_stats = self.db_session.query(
|
293
|
+
func.count(DiffRecord.id).label('diff_count'),
|
294
|
+
func.sum(DiffRecord.additions).label('total_additions'),
|
295
|
+
func.sum(DiffRecord.deletions).label('total_deletions')
|
296
|
+
).filter(
|
297
|
+
DiffRecord.commit_id == commit.id
|
298
|
+
).first()
|
299
|
+
|
300
|
+
results.append(CommitSummary(
|
301
|
+
id=commit.id,
|
302
|
+
hash=commit.hash,
|
303
|
+
message=commit.message,
|
304
|
+
author=commit.author,
|
305
|
+
timestamp=commit.timestamp,
|
306
|
+
repository=commit.repository,
|
307
|
+
branch=commit.branch,
|
308
|
+
diff_count=diff_stats.diff_count or 0,
|
309
|
+
total_additions=diff_stats.total_additions or 0,
|
310
|
+
total_deletions=diff_stats.total_deletions or 0
|
311
|
+
))
|
312
|
+
|
313
|
+
return results
|
314
|
+
|
315
|
+
def _compress_bytes(self, data: bytes) -> bytes:
|
316
|
+
"""바이트 데이터 gzip 압축"""
|
317
|
+
buf = BytesIO()
|
318
|
+
with gzip.GzipFile(fileobj=buf, mode="wb") as gz:
|
319
|
+
gz.write(data)
|
320
|
+
return buf.getvalue()
|
321
|
+
|
322
|
+
def _upload_to_s3_sync(self, key: str, data: bytes) -> Optional[str]:
|
323
|
+
"""S3에 동기 방식으로 데이터 업로드"""
|
324
|
+
try:
|
325
|
+
# 실제 S3 클라이언트가 비동기일 수 있으므로 동기 버전 필요
|
326
|
+
# 여기서는 간단히 URL 반환 (실제 구현 시 s3_client 수정 필요)
|
327
|
+
return f"s3://{os.getenv('AWS_S3_BUCKET', 'codeping-diffs')}/{key}"
|
328
|
+
except Exception as e:
|
329
|
+
self.logger.error(f"Failed to upload to S3: {e}")
|
330
|
+
return None
|
331
|
+
|
332
|
+
def _s3_configured(self) -> bool:
|
333
|
+
"""S3 설정 확인"""
|
334
|
+
required_vars = ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_S3_BUCKET"]
|
335
|
+
return all(os.environ.get(var) for var in required_vars)
|
336
|
+
|
337
|
+
|
338
|
+
class LegacyDataStorageService:
|
339
|
+
"""기존 이벤트 저장 서비스 - 호환성 유지"""
|
340
|
+
|
341
|
+
def __init__(self):
|
342
|
+
self.s3_client = S3Client() if self._s3_configured() else None
|
343
|
+
|
344
|
+
# 입출력 로거 설정
|
345
|
+
self.io_logger = ModuleIOLogger("DataStorage")
|
346
|
+
|
347
|
+
def store_event_with_diff(
|
348
|
+
self,
|
349
|
+
payload: Dict[str, Any],
|
350
|
+
headers: Dict[str, str],
|
351
|
+
diff_data: GitDiffData
|
352
|
+
) -> None:
|
353
|
+
"""기존 이벤트 저장 방식 - 호환성 유지"""
|
354
|
+
|
355
|
+
# 입력 로깅
|
356
|
+
self.io_logger.log_input(
|
357
|
+
"store_event_with_diff",
|
358
|
+
data={"payload": payload, "diff_data": diff_data},
|
359
|
+
metadata={
|
360
|
+
"repository": diff_data.repository,
|
361
|
+
"commit_sha": diff_data.commit_sha,
|
362
|
+
"headers_count": len(headers),
|
363
|
+
"payload_size": len(str(payload)),
|
364
|
+
"diff_size": len(diff_data.diff_content) if diff_data.diff_content else 0,
|
365
|
+
"added_lines": diff_data.added_lines,
|
366
|
+
"deleted_lines": diff_data.deleted_lines,
|
367
|
+
"files_changed": diff_data.files_changed
|
368
|
+
}
|
369
|
+
)
|
370
|
+
|
371
|
+
try:
|
372
|
+
import asyncio
|
373
|
+
asyncio.run(self._store_event_async(payload, headers, diff_data))
|
374
|
+
|
375
|
+
# 출력 로깅 (성공)
|
376
|
+
self.io_logger.log_output(
|
377
|
+
"store_event_with_diff",
|
378
|
+
metadata={
|
379
|
+
"storage_success": True,
|
380
|
+
"repository": diff_data.repository,
|
381
|
+
"commit_sha": diff_data.commit_sha,
|
382
|
+
"storage_location": "determined_in_async"
|
383
|
+
}
|
384
|
+
)
|
385
|
+
|
386
|
+
except Exception as e:
|
387
|
+
# 오류 로깅
|
388
|
+
self.io_logger.log_error(
|
389
|
+
"store_event_with_diff",
|
390
|
+
e,
|
391
|
+
metadata={
|
392
|
+
"repository": diff_data.repository,
|
393
|
+
"commit_sha": diff_data.commit_sha
|
394
|
+
}
|
395
|
+
)
|
396
|
+
raise
|
397
|
+
|
398
|
+
async def _store_event_async(
|
399
|
+
self,
|
400
|
+
payload: Dict[str, Any],
|
401
|
+
headers: Dict[str, str],
|
402
|
+
diff_data: GitDiffData
|
403
|
+
) -> None:
|
404
|
+
"""기존 비동기 이벤트 저장 구현"""
|
405
|
+
|
406
|
+
# 압축 처리
|
407
|
+
diff_patch = None
|
408
|
+
diff_url = None
|
409
|
+
|
410
|
+
if diff_data.diff_content:
|
411
|
+
compressed_diff = self._compress_bytes(diff_data.diff_content)
|
412
|
+
|
413
|
+
if len(compressed_diff) <= GZIP_THRESHOLD:
|
414
|
+
diff_patch = compressed_diff
|
415
|
+
storage_location = "db"
|
416
|
+
else:
|
417
|
+
if self.s3_client:
|
418
|
+
key = f"{diff_data.repository}/{diff_data.commit_sha}.patch.gz"
|
419
|
+
diff_url = await self.s3_client.upload_diff(key, compressed_diff)
|
420
|
+
storage_location = "s3"
|
421
|
+
else:
|
422
|
+
diff_patch = compressed_diff
|
423
|
+
storage_location = "db_large"
|
424
|
+
logger.warning(
|
425
|
+
"Large diff stored in DB (S3 not configured): %d bytes",
|
426
|
+
len(compressed_diff)
|
427
|
+
)
|
428
|
+
else:
|
429
|
+
storage_location = "none"
|
430
|
+
|
431
|
+
# 이벤트 데이터 준비
|
432
|
+
platform = "github" if "x-github-event" in {k.lower() for k in headers} else "gitlab"
|
433
|
+
|
434
|
+
event_data = {
|
435
|
+
"platform": platform,
|
436
|
+
"repository": diff_data.repository,
|
437
|
+
"commit_sha": diff_data.commit_sha,
|
438
|
+
"author_name": payload.get("pusher", {}).get("name"),
|
439
|
+
"author_email": payload.get("pusher", {}).get("email"),
|
440
|
+
"timestamp_utc": None,
|
441
|
+
"ref": payload.get("ref"),
|
442
|
+
"pusher": payload.get("pusher", {}).get("name", "unknown"),
|
443
|
+
"commit_count": len(payload.get("commits", [])),
|
444
|
+
"diff_patch": diff_patch,
|
445
|
+
"diff_url": diff_url,
|
446
|
+
"added_lines": diff_data.added_lines,
|
447
|
+
"deleted_lines": diff_data.deleted_lines,
|
448
|
+
"files_changed": diff_data.files_changed,
|
449
|
+
"payload": json.dumps(payload),
|
450
|
+
}
|
451
|
+
|
452
|
+
# 데이터베이스 저장
|
453
|
+
async with get_async_session() as session:
|
454
|
+
await self._save_event(session, event_data)
|
455
|
+
|
456
|
+
logger.info(
|
457
|
+
"Stored event %s/%s: gzip_size=%s stored_in=%s added=%s deleted=%s files=%s",
|
458
|
+
diff_data.repository,
|
459
|
+
diff_data.commit_sha,
|
460
|
+
f"{len(compressed_diff) / 1024:.1f} KB" if diff_data.diff_content else "0 KB",
|
461
|
+
storage_location,
|
462
|
+
diff_data.added_lines or 0,
|
463
|
+
diff_data.deleted_lines or 0,
|
464
|
+
diff_data.files_changed or 0,
|
465
|
+
)
|
466
|
+
|
467
|
+
async def _save_event(self, session: AsyncSession, event_data: Dict[str, Any]) -> None:
|
468
|
+
"""이벤트 데이터베이스 저장"""
|
469
|
+
|
470
|
+
event = Event(**event_data)
|
471
|
+
session.add(event)
|
472
|
+
|
473
|
+
try:
|
474
|
+
await session.commit()
|
475
|
+
except Exception:
|
476
|
+
await session.rollback()
|
477
|
+
raise
|
478
|
+
|
479
|
+
def _compress_bytes(self, data: bytes) -> bytes:
|
480
|
+
"""바이트 데이터 압축"""
|
481
|
+
buf = BytesIO()
|
482
|
+
with gzip.GzipFile(fileobj=buf, mode="wb") as gz:
|
483
|
+
gz.write(data)
|
484
|
+
return buf.getvalue()
|
485
|
+
|
486
|
+
def _s3_configured(self) -> bool:
|
487
|
+
"""S3 설정 확인"""
|
488
|
+
required_vars = ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_S3_BUCKET"]
|
489
|
+
return all(os.environ.get(var) for var in required_vars)
|
490
|
+
|
491
|
+
|
492
|
+
# 편의 함수들
|
493
|
+
def create_data_storage_manager(db_session: Optional[Session] = None) -> DataStorageManager:
|
494
|
+
"""DataStorageManager 팩토리 함수"""
|
495
|
+
return DataStorageManager(db_session)
|
496
|
+
|
497
|
+
|
498
|
+
def store_commit_data(
|
499
|
+
commit_data: CommitData,
|
500
|
+
diff_data: List[DiffData]
|
501
|
+
) -> StorageResult:
|
502
|
+
"""간편한 커밋 저장 함수"""
|
503
|
+
manager = create_data_storage_manager()
|
504
|
+
return manager.store_commit(commit_data, diff_data)
|
@@ -0,0 +1,39 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: yeonjae-universal-data-storage
|
3
|
+
Version: 1.0.1
|
4
|
+
Summary: Universal data storage module for persisting development data
|
5
|
+
Author-email: Yeonjae <dev@example.com>
|
6
|
+
License: MIT
|
7
|
+
Project-URL: Homepage, https://github.com/yeonjae/universal-modules
|
8
|
+
Project-URL: Repository, https://github.com/yeonjae/universal-modules
|
9
|
+
Project-URL: Issues, https://github.com/yeonjae/universal-modules/issues
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
11
|
+
Classifier: Intended Audience :: Developers
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
18
|
+
Requires-Python: >=3.9
|
19
|
+
Description-Content-Type: text/markdown
|
20
|
+
Requires-Dist: pydantic>=2.0.0
|
21
|
+
Requires-Dist: sqlalchemy>=2.0.0
|
22
|
+
Provides-Extra: dev
|
23
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
24
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
25
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
26
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
27
|
+
Requires-Dist: isort>=5.12.0; extra == "dev"
|
28
|
+
Requires-Dist: flake8>=6.0.0; extra == "dev"
|
29
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
30
|
+
|
31
|
+
# Universal data storage
|
32
|
+
|
33
|
+
범용 data storage 모듈
|
34
|
+
|
35
|
+
## 설치
|
36
|
+
|
37
|
+
```bash
|
38
|
+
pip install git+https://github.com/yeonjae-work/universal-modules.git#subdirectory=packages/universal-data-storage
|
39
|
+
```
|
@@ -0,0 +1,8 @@
|
|
1
|
+
universal_data_storage/__init__.py,sha256=UsZXoMuNKys3sVZ9sd3GVOldeKh3XT-DVOLZ6AlRp0w,153
|
2
|
+
universal_data_storage/exceptions.py,sha256=G4NwCx4bnXqLlIdMPPl0uYlSMZNKWg9M2Nh4ouUujMY,2921
|
3
|
+
universal_data_storage/models.py,sha256=01YUrSmihBJI60lQYjV7wQRKZA_UlvH6FXNT5xuHAhc,7342
|
4
|
+
universal_data_storage/service.py,sha256=6a7HWrERw1aOis6eckWBCD3K98YuBTqR0sZwGYYHJf4,18186
|
5
|
+
yeonjae_universal_data_storage-1.0.1.dist-info/METADATA,sha256=gphuhxd38NrEWBWT4y4-N5qqHixsq2mjGUwvM3jHdNk,1464
|
6
|
+
yeonjae_universal_data_storage-1.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
7
|
+
yeonjae_universal_data_storage-1.0.1.dist-info/top_level.txt,sha256=hFqUnzMmajixJKMqk83wkf8FTh9ofHDMRpxpXgJYKpY,23
|
8
|
+
yeonjae_universal_data_storage-1.0.1.dist-info/RECORD,,
|
@@ -0,0 +1 @@
|
|
1
|
+
universal_data_storage
|