yeonjae-universal-data-storage 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ """Universal Data Storage - 커밋 데이터와 집계 결과를 데이터베이스에 저장하고 관리하는 범용 모듈"""
2
+
3
+ __version__ = "1.0.0"
@@ -0,0 +1,92 @@
1
+ """
2
+ Universal Data Storage 예외 클래스
3
+
4
+ 데이터 저장과 관련된 예외들을 정의합니다.
5
+ """
6
+
7
+ from typing import Dict, Any, Optional
8
+
9
+
10
+ class DataStorageException(Exception):
11
+ """데이터 저장 관련 기본 예외 클래스"""
12
+
13
+ def __init__(self, message: str, details: Optional[Dict[str, Any]] = None):
14
+ self.message = message
15
+ self.details = details or {}
16
+ super().__init__(self.message)
17
+
18
+
19
+ class DatabaseConnectionException(DataStorageException):
20
+ """데이터베이스 연결 예외"""
21
+
22
+ def __init__(self, original_error: Exception):
23
+ message = "Database connection failed"
24
+ details = {
25
+ "error_type": type(original_error).__name__,
26
+ "error_message": str(original_error)
27
+ }
28
+ super().__init__(message, details)
29
+
30
+
31
+ class DuplicateDataException(DataStorageException):
32
+ """중복 데이터 예외"""
33
+
34
+ def __init__(self, commit_hash: str, existing_data: Dict[str, Any]):
35
+ message = f"Duplicate commit data found for hash: {commit_hash}"
36
+ details = {
37
+ "commit_hash": commit_hash,
38
+ "existing_data": existing_data
39
+ }
40
+ super().__init__(message, details)
41
+
42
+
43
+ class StorageValidationException(DataStorageException):
44
+ """저장 데이터 검증 예외"""
45
+
46
+ def __init__(self, field: str, value: Any, reason: str):
47
+ message = f"Storage validation failed for field '{field}': {reason}"
48
+ details = {
49
+ "field": field,
50
+ "value": value,
51
+ "reason": reason
52
+ }
53
+ super().__init__(message, details)
54
+
55
+
56
+ class StorageOperationException(DataStorageException):
57
+ """저장 작업 예외"""
58
+
59
+ def __init__(self, operation: str, original_error: Exception):
60
+ message = f"Storage operation '{operation}' failed"
61
+ details = {
62
+ "operation": operation,
63
+ "error_type": type(original_error).__name__,
64
+ "error_message": str(original_error)
65
+ }
66
+ super().__init__(message, details)
67
+
68
+
69
+ class BatchStorageException(DataStorageException):
70
+ """배치 저장 예외"""
71
+
72
+ def __init__(self, failed_count: int, total_count: int, errors: list):
73
+ message = f"Batch storage failed: {failed_count}/{total_count} items failed"
74
+ details = {
75
+ "failed_count": failed_count,
76
+ "total_count": total_count,
77
+ "errors": errors
78
+ }
79
+ super().__init__(message, details)
80
+
81
+
82
+ class CompressionException(DataStorageException):
83
+ """데이터 압축 예외"""
84
+
85
+ def __init__(self, data_type: str, original_error: Exception):
86
+ message = f"Data compression failed for {data_type}"
87
+ details = {
88
+ "data_type": data_type,
89
+ "error_type": type(original_error).__name__,
90
+ "error_message": str(original_error)
91
+ }
92
+ super().__init__(message, details)
@@ -0,0 +1,248 @@
1
+ """Data storage models for webhook events - MVP Version based on design spec."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import datetime
6
+ from typing import Optional, Dict, Any, List
7
+ from enum import Enum
8
+
9
+ from sqlalchemy import (
10
+ Column, String, DateTime, LargeBinary, Integer,
11
+ UniqueConstraint, ForeignKey, Text, Index, create_engine
12
+ )
13
+ from sqlalchemy.sql import func
14
+ from sqlalchemy.orm import relationship, declarative_base
15
+ from pydantic import BaseModel, ConfigDict
16
+
17
+ # SQLAlchemy Base 독립적으로 생성
18
+ Base = declarative_base()
19
+
20
+
21
+ class StorageStatus(str, Enum):
22
+ """저장 상태"""
23
+ SUCCESS = "success"
24
+ FAILED = "failed"
25
+ DUPLICATE = "duplicate"
26
+ PENDING = "pending"
27
+
28
+
29
+ # SQLAlchemy Models (Database Tables)
30
+ class CommitRecord(Base):
31
+ """커밋 정보 테이블 - MVP 버전"""
32
+
33
+ __tablename__ = "commits"
34
+
35
+ id = Column(Integer, primary_key=True, autoincrement=True)
36
+ hash = Column(String(40), unique=True, nullable=False, index=True)
37
+ message = Column(Text, nullable=False)
38
+ author = Column(String(255), nullable=False, index=True)
39
+ author_email = Column(String(255), nullable=True)
40
+ timestamp = Column(DateTime, nullable=False, index=True)
41
+ repository = Column(String(255), nullable=False, index=True)
42
+ branch = Column(String(255), nullable=False)
43
+ pusher = Column(String(255), nullable=True)
44
+ commit_count = Column(Integer, nullable=False, default=1)
45
+ created_at = Column(DateTime, nullable=False, default=func.now())
46
+
47
+ # 관계 설정
48
+ diffs = relationship("DiffRecord", back_populates="commit", cascade="all, delete-orphan")
49
+
50
+ def __repr__(self) -> str:
51
+ return f"<CommitRecord(id={self.id}, hash={self.hash[:8]}, repo={self.repository})>"
52
+
53
+
54
+ class DiffRecord(Base):
55
+ """Diff 정보 테이블 - MVP 버전"""
56
+
57
+ __tablename__ = "commit_diffs"
58
+
59
+ id = Column(Integer, primary_key=True, autoincrement=True)
60
+ commit_id = Column(Integer, ForeignKey("commits.id", ondelete="CASCADE"), nullable=False)
61
+ file_path = Column(Text, nullable=False)
62
+ additions = Column(Integer, nullable=False, default=0)
63
+ deletions = Column(Integer, nullable=False, default=0)
64
+ changes = Column(Text, nullable=True) # diff 내용 (압축된 형태)
65
+ diff_patch = Column(LargeBinary, nullable=True) # 압축된 diff 데이터
66
+ diff_url = Column(String, nullable=True) # S3 URL (큰 파일의 경우)
67
+ created_at = Column(DateTime, nullable=False, default=func.now())
68
+
69
+ # 관계 설정
70
+ commit = relationship("CommitRecord", back_populates="diffs")
71
+
72
+ # 인덱스 추가
73
+ __table_args__ = (
74
+ Index('idx_diffs_commit_id', 'commit_id'),
75
+ Index('idx_diffs_file_path', 'file_path'),
76
+ )
77
+
78
+ def __repr__(self) -> str:
79
+ return f"<DiffRecord(id={self.id}, commit_id={self.commit_id}, file={self.file_path})>"
80
+
81
+
82
+ # Legacy Event Model (기존 호환성 유지)
83
+ class Event(Base):
84
+ """Database model for GitHub webhook events - Legacy support."""
85
+
86
+ __tablename__ = "events"
87
+
88
+ id = Column(Integer, primary_key=True, autoincrement=True)
89
+ platform = Column(String, nullable=False, default="github")
90
+ repository = Column(String, nullable=False, index=True)
91
+ commit_sha = Column(String, nullable=False, index=True)
92
+ author_name = Column(String, nullable=True)
93
+ author_email = Column(String, nullable=True)
94
+ timestamp_utc = Column(DateTime, nullable=True)
95
+ ref = Column(String, nullable=True)
96
+ pusher = Column(String, nullable=False, index=True)
97
+ commit_count = Column(Integer, nullable=False, default=1)
98
+ diff_patch = Column(LargeBinary, nullable=True) # Compressed diff or None if stored in S3
99
+ diff_url = Column(String, nullable=True) # S3 URL if diff is too large
100
+ added_lines = Column(Integer, nullable=True)
101
+ deleted_lines = Column(Integer, nullable=True)
102
+ files_changed = Column(Integer, nullable=True)
103
+ payload = Column(String, nullable=False) # JSON string
104
+ created_at = Column(DateTime, nullable=False, default=func.now())
105
+
106
+ # Prevent duplicate events
107
+ __table_args__ = (
108
+ UniqueConstraint('repository', 'commit_sha', name='uq_repo_commit'),
109
+ )
110
+
111
+ def __repr__(self) -> str:
112
+ return f"<Event(id={self.id}, repo={self.repository}, sha={self.commit_sha[:8]})>"
113
+
114
+
115
+ # Pydantic Models (API Request/Response)
116
+ class CommitData(BaseModel):
117
+ """커밋 데이터 입력 모델"""
118
+
119
+ commit_hash: str
120
+ message: str
121
+ author: str
122
+ author_email: Optional[str] = None
123
+ timestamp: datetime
124
+ repository: str
125
+ branch: str
126
+ pusher: Optional[str] = None
127
+ commit_count: int = 1
128
+
129
+ model_config = ConfigDict(
130
+ from_attributes=True,
131
+ json_encoders={datetime: lambda v: v.isoformat()}
132
+ )
133
+
134
+
135
+ class DiffData(BaseModel):
136
+ """Diff 데이터 입력 모델"""
137
+
138
+ file_path: str
139
+ additions: int = 0
140
+ deletions: int = 0
141
+ changes: Optional[str] = None
142
+ diff_content: Optional[bytes] = None # 원본 diff 내용
143
+
144
+ model_config = ConfigDict(
145
+ from_attributes=True,
146
+ arbitrary_types_allowed=True
147
+ )
148
+
149
+
150
+ class StorageResult(BaseModel):
151
+ """저장 결과 응답 모델"""
152
+
153
+ success: bool
154
+ status: StorageStatus
155
+ commit_id: Optional[int] = None
156
+ message: str
157
+ timestamp: datetime
158
+ metadata: Dict[str, Any] = {}
159
+
160
+ model_config = ConfigDict(from_attributes=True)
161
+
162
+
163
+ class CommitSummary(BaseModel):
164
+ """커밋 요약 정보"""
165
+
166
+ id: int
167
+ hash: str
168
+ message: str
169
+ author: str
170
+ timestamp: datetime
171
+ repository: str
172
+ branch: str
173
+ diff_count: int
174
+ total_additions: int
175
+ total_deletions: int
176
+
177
+ model_config = ConfigDict(from_attributes=True)
178
+
179
+
180
+ class DiffSummary(BaseModel):
181
+ """Diff 요약 정보"""
182
+
183
+ id: int
184
+ file_path: str
185
+ additions: int
186
+ deletions: int
187
+ has_content: bool
188
+
189
+ model_config = ConfigDict(from_attributes=True)
190
+
191
+
192
+ # Legacy Models (기존 호환성)
193
+ class EventCreate(BaseModel):
194
+ """Pydantic model for creating new events."""
195
+
196
+ repository: str
197
+ commit_sha: str
198
+ event_type: str = "push"
199
+ payload: str
200
+ diff_data: Optional[bytes] = None
201
+ diff_s3_url: Optional[str] = None
202
+
203
+ model_config = ConfigDict(
204
+ from_attributes=True,
205
+ arbitrary_types_allowed=True
206
+ )
207
+
208
+
209
+ class EventResponse(BaseModel):
210
+ """Pydantic model for event API responses."""
211
+
212
+ id: int
213
+ repository: str
214
+ commit_sha: str
215
+ event_type: str
216
+ payload: Dict[str, Any]
217
+ diff_s3_url: Optional[str] = None
218
+ created_at: datetime
219
+
220
+ model_config = ConfigDict(from_attributes=True)
221
+
222
+
223
+ # Aggregated Models
224
+ class CommitWithDiffs(BaseModel):
225
+ """커밋과 관련 Diff 정보를 포함한 집계 모델"""
226
+
227
+ commit: CommitSummary
228
+ diffs: List[DiffSummary]
229
+
230
+ model_config = ConfigDict(from_attributes=True)
231
+
232
+
233
+ class BatchStorageResult(BaseModel):
234
+ """배치 저장 결과"""
235
+
236
+ total_commits: int
237
+ successful_commits: int
238
+ failed_commits: int
239
+ results: List[StorageResult]
240
+ duration_seconds: float
241
+
242
+ @property
243
+ def success_rate(self) -> float:
244
+ if self.total_commits == 0:
245
+ return 0.0
246
+ return self.successful_commits / self.total_commits
247
+
248
+ model_config = ConfigDict(from_attributes=True)
@@ -0,0 +1,504 @@
1
+ """Data storage service - MVP Version based on design specification."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import gzip
6
+ import json
7
+ import logging
8
+ import os
9
+ from datetime import datetime
10
+ from io import BytesIO
11
+ from typing import Any, Dict, List, Optional
12
+
13
+ from sqlalchemy import func
14
+ from sqlalchemy.ext.asyncio import AsyncSession
15
+ from sqlalchemy.orm import Session, selectinload
16
+ from sqlalchemy.future import select
17
+
18
+ from modules.data_storage.models import (
19
+ Event, CommitRecord, DiffRecord,
20
+ CommitData, DiffData, StorageResult, StorageStatus,
21
+ CommitSummary, DiffSummary, CommitWithDiffs, BatchStorageResult
22
+ )
23
+ from universal_git_data_parser.models import DiffData as GitDiffData
24
+ # Database configuration - standalone implementation
25
+ from sqlalchemy import create_engine
26
+ from sqlalchemy.orm import sessionmaker, declarative_base
27
+ from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
28
+ import os
29
+
30
+ # Create base for models
31
+ Base = declarative_base()
32
+
33
+ def get_session():
34
+ """Get database session - standalone implementation"""
35
+ database_url = os.getenv("DATABASE_URL", "sqlite:///./test.db")
36
+ engine = create_engine(database_url)
37
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
38
+ return SessionLocal()
39
+
40
+ async def get_async_session():
41
+ """Get async database session - standalone implementation"""
42
+ database_url = os.getenv("ASYNC_DATABASE_URL", "sqlite+aiosqlite:///./test.db")
43
+ engine = create_async_engine(database_url)
44
+ SessionLocal = async_sessionmaker(engine, class_=AsyncSession)
45
+ return SessionLocal()
46
+
47
+ # Simplified S3Client for standalone operation
48
+ class S3Client:
49
+ def __init__(self):
50
+ self.configured = False
51
+
52
+ def upload_file(self, key: str, data: bytes) -> str:
53
+ return f"s3://bucket/{key}"
54
+
55
+ # Simple logging class
56
+ class ModuleIOLogger:
57
+ def __init__(self, module_name: str):
58
+ self.module_name = module_name
59
+
60
+ def log_input(self, operation: str, data: dict):
61
+ pass
62
+
63
+ def log_output(self, operation: str, result: dict):
64
+ pass
65
+
66
+ logger = logging.getLogger(__name__)
67
+
68
+ GZIP_THRESHOLD = 256 * 1024 # 256 KiB
69
+
70
+
71
+ class DataStorageManager:
72
+ """MVP 버전 데이터 저장 관리자 - 설계서 기반 구현"""
73
+
74
+ def __init__(self, db_session: Optional[Session] = None):
75
+ """
76
+ DataStorageManager 초기화
77
+
78
+ Args:
79
+ db_session: 데이터베이스 세션 (선택적, 테스트용)
80
+ """
81
+ self.db_session = db_session
82
+ self.s3_client = S3Client() if self._s3_configured() else None
83
+ self.logger = logging.getLogger(__name__)
84
+
85
+ def store_commit(
86
+ self,
87
+ commit_data: CommitData,
88
+ diff_data: List[DiffData]
89
+ ) -> StorageResult:
90
+ """
91
+ MVP: 커밋 데이터와 diff 데이터 저장
92
+
93
+ Args:
94
+ commit_data: 커밋 정보
95
+ diff_data: diff 정보 리스트
96
+
97
+ Returns:
98
+ StorageResult: 저장 결과
99
+ """
100
+ start_time = datetime.now()
101
+
102
+ try:
103
+ # 세션 처리
104
+ if self.db_session:
105
+ return self._store_commit_sync(commit_data, diff_data, start_time)
106
+ else:
107
+ with get_session() as session:
108
+ self.db_session = session
109
+ return self._store_commit_sync(commit_data, diff_data, start_time)
110
+
111
+ except Exception as e:
112
+ self.logger.error(f"Failed to store commit {commit_data.commit_hash}: {e}")
113
+ return StorageResult(
114
+ success=False,
115
+ status=StorageStatus.FAILED,
116
+ message=f"Storage failed: {str(e)}",
117
+ timestamp=datetime.now()
118
+ )
119
+
120
+ def _store_commit_sync(
121
+ self,
122
+ commit_data: CommitData,
123
+ diff_data: List[DiffData],
124
+ start_time: datetime
125
+ ) -> StorageResult:
126
+ """동기 방식 커밋 저장 구현"""
127
+
128
+ try:
129
+ # 1. 중복 커밋 확인
130
+ if self._is_duplicate_commit(commit_data.commit_hash):
131
+ return StorageResult(
132
+ success=False,
133
+ status=StorageStatus.DUPLICATE,
134
+ message="Commit already exists",
135
+ timestamp=datetime.now(),
136
+ metadata={"commit_hash": commit_data.commit_hash}
137
+ )
138
+
139
+ # 2. 커밋 레코드 생성
140
+ commit_record = CommitRecord(
141
+ hash=commit_data.commit_hash,
142
+ message=commit_data.message,
143
+ author=commit_data.author,
144
+ author_email=commit_data.author_email,
145
+ timestamp=commit_data.timestamp,
146
+ repository=commit_data.repository,
147
+ branch=commit_data.branch,
148
+ pusher=commit_data.pusher,
149
+ commit_count=commit_data.commit_count
150
+ )
151
+
152
+ self.db_session.add(commit_record)
153
+ self.db_session.flush() # ID 생성을 위해 flush
154
+
155
+ # 3. Diff 레코드들 생성 및 저장
156
+ total_additions = 0
157
+ total_deletions = 0
158
+
159
+ for diff in diff_data:
160
+ # diff 압축 처리
161
+ compressed_diff = None
162
+ diff_url = None
163
+
164
+ if diff.diff_content:
165
+ compressed_data = self._compress_bytes(diff.diff_content)
166
+
167
+ if len(compressed_data) <= GZIP_THRESHOLD:
168
+ compressed_diff = compressed_data
169
+ elif self.s3_client:
170
+ # S3에 업로드
171
+ key = f"{commit_data.repository}/{commit_data.commit_hash}/{diff.file_path}.patch.gz"
172
+ diff_url = self._upload_to_s3_sync(key, compressed_data)
173
+ else:
174
+ # S3가 없으면 DB에 저장 (경고 로그)
175
+ compressed_diff = compressed_data
176
+ self.logger.warning(
177
+ f"Large diff stored in DB (S3 not configured): {len(compressed_data)} bytes"
178
+ )
179
+
180
+ diff_record = DiffRecord(
181
+ commit_id=commit_record.id,
182
+ file_path=diff.file_path,
183
+ additions=diff.additions,
184
+ deletions=diff.deletions,
185
+ changes=diff.changes,
186
+ diff_patch=compressed_diff,
187
+ diff_url=diff_url
188
+ )
189
+
190
+ self.db_session.add(diff_record)
191
+ total_additions += diff.additions
192
+ total_deletions += diff.deletions
193
+
194
+ # 4. 트랜잭션 커밋
195
+ self.db_session.commit()
196
+
197
+ # 5. 성공 로그 및 결과 반환
198
+ duration = (datetime.now() - start_time).total_seconds()
199
+ self.logger.info(
200
+ f"Stored commit {commit_data.commit_hash}: "
201
+ f"files={len(diff_data)}, +{total_additions}/-{total_deletions}, "
202
+ f"duration={duration:.2f}s"
203
+ )
204
+
205
+ return StorageResult(
206
+ success=True,
207
+ status=StorageStatus.SUCCESS,
208
+ commit_id=commit_record.id,
209
+ message="Commit stored successfully",
210
+ timestamp=datetime.now(),
211
+ metadata={
212
+ "commit_hash": commit_data.commit_hash,
213
+ "files_changed": len(diff_data),
214
+ "total_additions": total_additions,
215
+ "total_deletions": total_deletions,
216
+ "duration_seconds": duration
217
+ }
218
+ )
219
+
220
+ except Exception as e:
221
+ self.db_session.rollback()
222
+ raise e
223
+
224
+ def _is_duplicate_commit(self, commit_hash: str) -> bool:
225
+ """중복 커밋 확인"""
226
+ result = self.db_session.query(CommitRecord).filter(
227
+ CommitRecord.hash == commit_hash
228
+ ).first()
229
+ return result is not None
230
+
231
+ def get_commit_by_hash(self, commit_hash: str) -> Optional[CommitWithDiffs]:
232
+ """커밋 해시로 상세 정보 조회"""
233
+
234
+ commit = self.db_session.query(CommitRecord).filter(
235
+ CommitRecord.hash == commit_hash
236
+ ).first()
237
+
238
+ if not commit:
239
+ return None
240
+
241
+ # Diff 정보와 함께 조회
242
+ diffs = self.db_session.query(DiffRecord).filter(
243
+ DiffRecord.commit_id == commit.id
244
+ ).all()
245
+
246
+ # 요약 정보 생성
247
+ commit_summary = CommitSummary(
248
+ id=commit.id,
249
+ hash=commit.hash,
250
+ message=commit.message,
251
+ author=commit.author,
252
+ timestamp=commit.timestamp,
253
+ repository=commit.repository,
254
+ branch=commit.branch,
255
+ diff_count=len(diffs),
256
+ total_additions=sum(d.additions for d in diffs),
257
+ total_deletions=sum(d.deletions for d in diffs)
258
+ )
259
+
260
+ diff_summaries = [
261
+ DiffSummary(
262
+ id=diff.id,
263
+ file_path=diff.file_path,
264
+ additions=diff.additions,
265
+ deletions=diff.deletions,
266
+ has_content=bool(diff.diff_patch or diff.diff_url)
267
+ )
268
+ for diff in diffs
269
+ ]
270
+
271
+ return CommitWithDiffs(
272
+ commit=commit_summary,
273
+ diffs=diff_summaries
274
+ )
275
+
276
+ def get_recent_commits(
277
+ self,
278
+ repository: str,
279
+ limit: int = 10
280
+ ) -> List[CommitSummary]:
281
+ """최근 커밋 목록 조회"""
282
+
283
+ commits = self.db_session.query(CommitRecord).filter(
284
+ CommitRecord.repository == repository
285
+ ).order_by(
286
+ CommitRecord.timestamp.desc()
287
+ ).limit(limit).all()
288
+
289
+ results = []
290
+ for commit in commits:
291
+ # 각 커밋의 diff 통계 계산
292
+ diff_stats = self.db_session.query(
293
+ func.count(DiffRecord.id).label('diff_count'),
294
+ func.sum(DiffRecord.additions).label('total_additions'),
295
+ func.sum(DiffRecord.deletions).label('total_deletions')
296
+ ).filter(
297
+ DiffRecord.commit_id == commit.id
298
+ ).first()
299
+
300
+ results.append(CommitSummary(
301
+ id=commit.id,
302
+ hash=commit.hash,
303
+ message=commit.message,
304
+ author=commit.author,
305
+ timestamp=commit.timestamp,
306
+ repository=commit.repository,
307
+ branch=commit.branch,
308
+ diff_count=diff_stats.diff_count or 0,
309
+ total_additions=diff_stats.total_additions or 0,
310
+ total_deletions=diff_stats.total_deletions or 0
311
+ ))
312
+
313
+ return results
314
+
315
+ def _compress_bytes(self, data: bytes) -> bytes:
316
+ """바이트 데이터 gzip 압축"""
317
+ buf = BytesIO()
318
+ with gzip.GzipFile(fileobj=buf, mode="wb") as gz:
319
+ gz.write(data)
320
+ return buf.getvalue()
321
+
322
+ def _upload_to_s3_sync(self, key: str, data: bytes) -> Optional[str]:
323
+ """S3에 동기 방식으로 데이터 업로드"""
324
+ try:
325
+ # 실제 S3 클라이언트가 비동기일 수 있으므로 동기 버전 필요
326
+ # 여기서는 간단히 URL 반환 (실제 구현 시 s3_client 수정 필요)
327
+ return f"s3://{os.getenv('AWS_S3_BUCKET', 'codeping-diffs')}/{key}"
328
+ except Exception as e:
329
+ self.logger.error(f"Failed to upload to S3: {e}")
330
+ return None
331
+
332
+ def _s3_configured(self) -> bool:
333
+ """S3 설정 확인"""
334
+ required_vars = ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_S3_BUCKET"]
335
+ return all(os.environ.get(var) for var in required_vars)
336
+
337
+
338
+ class LegacyDataStorageService:
339
+ """기존 이벤트 저장 서비스 - 호환성 유지"""
340
+
341
+ def __init__(self):
342
+ self.s3_client = S3Client() if self._s3_configured() else None
343
+
344
+ # 입출력 로거 설정
345
+ self.io_logger = ModuleIOLogger("DataStorage")
346
+
347
+ def store_event_with_diff(
348
+ self,
349
+ payload: Dict[str, Any],
350
+ headers: Dict[str, str],
351
+ diff_data: GitDiffData
352
+ ) -> None:
353
+ """기존 이벤트 저장 방식 - 호환성 유지"""
354
+
355
+ # 입력 로깅
356
+ self.io_logger.log_input(
357
+ "store_event_with_diff",
358
+ data={"payload": payload, "diff_data": diff_data},
359
+ metadata={
360
+ "repository": diff_data.repository,
361
+ "commit_sha": diff_data.commit_sha,
362
+ "headers_count": len(headers),
363
+ "payload_size": len(str(payload)),
364
+ "diff_size": len(diff_data.diff_content) if diff_data.diff_content else 0,
365
+ "added_lines": diff_data.added_lines,
366
+ "deleted_lines": diff_data.deleted_lines,
367
+ "files_changed": diff_data.files_changed
368
+ }
369
+ )
370
+
371
+ try:
372
+ import asyncio
373
+ asyncio.run(self._store_event_async(payload, headers, diff_data))
374
+
375
+ # 출력 로깅 (성공)
376
+ self.io_logger.log_output(
377
+ "store_event_with_diff",
378
+ metadata={
379
+ "storage_success": True,
380
+ "repository": diff_data.repository,
381
+ "commit_sha": diff_data.commit_sha,
382
+ "storage_location": "determined_in_async"
383
+ }
384
+ )
385
+
386
+ except Exception as e:
387
+ # 오류 로깅
388
+ self.io_logger.log_error(
389
+ "store_event_with_diff",
390
+ e,
391
+ metadata={
392
+ "repository": diff_data.repository,
393
+ "commit_sha": diff_data.commit_sha
394
+ }
395
+ )
396
+ raise
397
+
398
+ async def _store_event_async(
399
+ self,
400
+ payload: Dict[str, Any],
401
+ headers: Dict[str, str],
402
+ diff_data: GitDiffData
403
+ ) -> None:
404
+ """기존 비동기 이벤트 저장 구현"""
405
+
406
+ # 압축 처리
407
+ diff_patch = None
408
+ diff_url = None
409
+
410
+ if diff_data.diff_content:
411
+ compressed_diff = self._compress_bytes(diff_data.diff_content)
412
+
413
+ if len(compressed_diff) <= GZIP_THRESHOLD:
414
+ diff_patch = compressed_diff
415
+ storage_location = "db"
416
+ else:
417
+ if self.s3_client:
418
+ key = f"{diff_data.repository}/{diff_data.commit_sha}.patch.gz"
419
+ diff_url = await self.s3_client.upload_diff(key, compressed_diff)
420
+ storage_location = "s3"
421
+ else:
422
+ diff_patch = compressed_diff
423
+ storage_location = "db_large"
424
+ logger.warning(
425
+ "Large diff stored in DB (S3 not configured): %d bytes",
426
+ len(compressed_diff)
427
+ )
428
+ else:
429
+ storage_location = "none"
430
+
431
+ # 이벤트 데이터 준비
432
+ platform = "github" if "x-github-event" in {k.lower() for k in headers} else "gitlab"
433
+
434
+ event_data = {
435
+ "platform": platform,
436
+ "repository": diff_data.repository,
437
+ "commit_sha": diff_data.commit_sha,
438
+ "author_name": payload.get("pusher", {}).get("name"),
439
+ "author_email": payload.get("pusher", {}).get("email"),
440
+ "timestamp_utc": None,
441
+ "ref": payload.get("ref"),
442
+ "pusher": payload.get("pusher", {}).get("name", "unknown"),
443
+ "commit_count": len(payload.get("commits", [])),
444
+ "diff_patch": diff_patch,
445
+ "diff_url": diff_url,
446
+ "added_lines": diff_data.added_lines,
447
+ "deleted_lines": diff_data.deleted_lines,
448
+ "files_changed": diff_data.files_changed,
449
+ "payload": json.dumps(payload),
450
+ }
451
+
452
+ # 데이터베이스 저장
453
+ async with get_async_session() as session:
454
+ await self._save_event(session, event_data)
455
+
456
+ logger.info(
457
+ "Stored event %s/%s: gzip_size=%s stored_in=%s added=%s deleted=%s files=%s",
458
+ diff_data.repository,
459
+ diff_data.commit_sha,
460
+ f"{len(compressed_diff) / 1024:.1f} KB" if diff_data.diff_content else "0 KB",
461
+ storage_location,
462
+ diff_data.added_lines or 0,
463
+ diff_data.deleted_lines or 0,
464
+ diff_data.files_changed or 0,
465
+ )
466
+
467
+ async def _save_event(self, session: AsyncSession, event_data: Dict[str, Any]) -> None:
468
+ """이벤트 데이터베이스 저장"""
469
+
470
+ event = Event(**event_data)
471
+ session.add(event)
472
+
473
+ try:
474
+ await session.commit()
475
+ except Exception:
476
+ await session.rollback()
477
+ raise
478
+
479
+ def _compress_bytes(self, data: bytes) -> bytes:
480
+ """바이트 데이터 압축"""
481
+ buf = BytesIO()
482
+ with gzip.GzipFile(fileobj=buf, mode="wb") as gz:
483
+ gz.write(data)
484
+ return buf.getvalue()
485
+
486
+ def _s3_configured(self) -> bool:
487
+ """S3 설정 확인"""
488
+ required_vars = ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_S3_BUCKET"]
489
+ return all(os.environ.get(var) for var in required_vars)
490
+
491
+
492
+ # 편의 함수들
493
+ def create_data_storage_manager(db_session: Optional[Session] = None) -> DataStorageManager:
494
+ """DataStorageManager 팩토리 함수"""
495
+ return DataStorageManager(db_session)
496
+
497
+
498
+ def store_commit_data(
499
+ commit_data: CommitData,
500
+ diff_data: List[DiffData]
501
+ ) -> StorageResult:
502
+ """간편한 커밋 저장 함수"""
503
+ manager = create_data_storage_manager()
504
+ return manager.store_commit(commit_data, diff_data)
@@ -0,0 +1,39 @@
1
+ Metadata-Version: 2.4
2
+ Name: yeonjae-universal-data-storage
3
+ Version: 1.0.1
4
+ Summary: Universal data storage module for persisting development data
5
+ Author-email: Yeonjae <dev@example.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/yeonjae/universal-modules
8
+ Project-URL: Repository, https://github.com/yeonjae/universal-modules
9
+ Project-URL: Issues, https://github.com/yeonjae/universal-modules/issues
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Requires-Python: >=3.9
19
+ Description-Content-Type: text/markdown
20
+ Requires-Dist: pydantic>=2.0.0
21
+ Requires-Dist: sqlalchemy>=2.0.0
22
+ Provides-Extra: dev
23
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
24
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
25
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
26
+ Requires-Dist: black>=23.0.0; extra == "dev"
27
+ Requires-Dist: isort>=5.12.0; extra == "dev"
28
+ Requires-Dist: flake8>=6.0.0; extra == "dev"
29
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
30
+
31
+ # Universal data storage
32
+
33
+ 범용 data storage 모듈
34
+
35
+ ## 설치
36
+
37
+ ```bash
38
+ pip install git+https://github.com/yeonjae-work/universal-modules.git#subdirectory=packages/universal-data-storage
39
+ ```
@@ -0,0 +1,8 @@
1
+ universal_data_storage/__init__.py,sha256=UsZXoMuNKys3sVZ9sd3GVOldeKh3XT-DVOLZ6AlRp0w,153
2
+ universal_data_storage/exceptions.py,sha256=G4NwCx4bnXqLlIdMPPl0uYlSMZNKWg9M2Nh4ouUujMY,2921
3
+ universal_data_storage/models.py,sha256=01YUrSmihBJI60lQYjV7wQRKZA_UlvH6FXNT5xuHAhc,7342
4
+ universal_data_storage/service.py,sha256=6a7HWrERw1aOis6eckWBCD3K98YuBTqR0sZwGYYHJf4,18186
5
+ yeonjae_universal_data_storage-1.0.1.dist-info/METADATA,sha256=gphuhxd38NrEWBWT4y4-N5qqHixsq2mjGUwvM3jHdNk,1464
6
+ yeonjae_universal_data_storage-1.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
+ yeonjae_universal_data_storage-1.0.1.dist-info/top_level.txt,sha256=hFqUnzMmajixJKMqk83wkf8FTh9ofHDMRpxpXgJYKpY,23
8
+ yeonjae_universal_data_storage-1.0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ universal_data_storage