ytcollector 1.2.0__tar.gz → 1.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ytcollector-1.2.4/MANIFEST.in +2 -0
- {ytcollector-1.2.0 → ytcollector-1.2.4}/PKG-INFO +1 -1
- {ytcollector-1.2.0 → ytcollector-1.2.4}/pyproject.toml +4 -1
- {ytcollector-1.2.0 → ytcollector-1.2.4}/ytcollector/__init__.py +3 -2
- {ytcollector-1.2.0 → ytcollector-1.2.4}/ytcollector/analyzer.py +19 -32
- {ytcollector-1.2.0 → ytcollector-1.2.4}/ytcollector/config.py +19 -4
- ytcollector-1.2.4/ytcollector/models/license_plate_detector.pt +0 -0
- {ytcollector-1.2.0 → ytcollector-1.2.4}/ytcollector.egg-info/PKG-INFO +1 -1
- {ytcollector-1.2.0 → ytcollector-1.2.4}/ytcollector.egg-info/SOURCES.txt +3 -1
- {ytcollector-1.2.0 → ytcollector-1.2.4}/README.md +0 -0
- {ytcollector-1.2.0 → ytcollector-1.2.4}/setup.cfg +0 -0
- {ytcollector-1.2.0 → ytcollector-1.2.4}/ytcollector/cli.py +0 -0
- {ytcollector-1.2.0 → ytcollector-1.2.4}/ytcollector/dataset_builder.py +0 -0
- {ytcollector-1.2.0 → ytcollector-1.2.4}/ytcollector/downloader.py +0 -0
- {ytcollector-1.2.0 → ytcollector-1.2.4}/ytcollector/utils.py +0 -0
- {ytcollector-1.2.0 → ytcollector-1.2.4}/ytcollector.egg-info/dependency_links.txt +0 -0
- {ytcollector-1.2.0 → ytcollector-1.2.4}/ytcollector.egg-info/entry_points.txt +0 -0
- {ytcollector-1.2.0 → ytcollector-1.2.4}/ytcollector.egg-info/requires.txt +0 -0
- {ytcollector-1.2.0 → ytcollector-1.2.4}/ytcollector.egg-info/top_level.txt +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "ytcollector"
|
|
7
|
-
version = "1.2.
|
|
7
|
+
version = "1.2.4"
|
|
8
8
|
description = "YouTube 콘텐츠 수집기 - 얼굴, 번호판, 타투, 텍스트 감지"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.8"
|
|
@@ -59,6 +59,9 @@ Repository = "https://github.com/yourusername/ytcollector"
|
|
|
59
59
|
where = ["."]
|
|
60
60
|
include = ["ytcollector*"]
|
|
61
61
|
|
|
62
|
+
[tool.setuptools.package-data]
|
|
63
|
+
ytcollector = ["models/*.pt"]
|
|
64
|
+
|
|
62
65
|
[tool.black]
|
|
63
66
|
line-length = 100
|
|
64
67
|
target-version = ['py38']
|
|
@@ -17,12 +17,12 @@ CLI 사용 예시:
|
|
|
17
17
|
ytc -c face text --fast
|
|
18
18
|
"""
|
|
19
19
|
|
|
20
|
-
from .config import CATEGORY_NAMES, CATEGORY_QUERIES, USER_AGENTS, LICENSE_PLATE_PATTERNS
|
|
20
|
+
from .config import CATEGORY_NAMES, CATEGORY_QUERIES, USER_AGENTS, LICENSE_PLATE_PATTERNS, LICENSE_PLATE_MODEL_PATH
|
|
21
21
|
from .analyzer import VideoAnalyzer, check_dependencies
|
|
22
22
|
from .downloader import YouTubeDownloader
|
|
23
23
|
from .cli import run, main as cli_main
|
|
24
24
|
|
|
25
|
-
__version__ = "1.
|
|
25
|
+
__version__ = "1.2.4"
|
|
26
26
|
__all__ = [
|
|
27
27
|
# 주요 클래스
|
|
28
28
|
"VideoAnalyzer",
|
|
@@ -32,6 +32,7 @@ __all__ = [
|
|
|
32
32
|
"CATEGORY_QUERIES",
|
|
33
33
|
"USER_AGENTS",
|
|
34
34
|
"LICENSE_PLATE_PATTERNS",
|
|
35
|
+
"LICENSE_PLATE_MODEL_PATH",
|
|
35
36
|
# 유틸리티
|
|
36
37
|
"check_dependencies",
|
|
37
38
|
"run",
|
|
@@ -32,7 +32,7 @@ try:
|
|
|
32
32
|
except ImportError:
|
|
33
33
|
USE_GPU = False
|
|
34
34
|
|
|
35
|
-
from .config import LICENSE_PLATE_PATTERNS,
|
|
35
|
+
from .config import LICENSE_PLATE_PATTERNS, LICENSE_PLATE_MODEL_PATH, YOLO_CONFIDENCE, YOLO_HIGH_CONFIDENCE
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
class VideoAnalyzer:
|
|
@@ -65,7 +65,7 @@ class VideoAnalyzer:
|
|
|
65
65
|
if self.yolo_model is None:
|
|
66
66
|
device = "cuda" if USE_GPU else "cpu"
|
|
67
67
|
print(f" YOLO 모델 로딩 중... (Device: {device})")
|
|
68
|
-
self.yolo_model = YOLO(
|
|
68
|
+
self.yolo_model = YOLO(str(LICENSE_PLATE_MODEL_PATH))
|
|
69
69
|
self.yolo_model.to(device)
|
|
70
70
|
|
|
71
71
|
def extract_frames(self, video_path, num_frames=10):
|
|
@@ -167,9 +167,9 @@ class VideoAnalyzer:
|
|
|
167
167
|
|
|
168
168
|
def detect_license_plate(self, frame, texts=None):
|
|
169
169
|
"""
|
|
170
|
-
ROI 기반 번호판 감지
|
|
170
|
+
ROI 기반 번호판 감지
|
|
171
171
|
1. YOLO로 번호판 영역(ROI)을 먼저 찾음
|
|
172
|
-
2.
|
|
172
|
+
2. 고신뢰도(>=0.6)면 바로 인정, 저신뢰도면 OCR 패턴 매칭 필요
|
|
173
173
|
"""
|
|
174
174
|
if not YOLO_AVAILABLE or frame is None:
|
|
175
175
|
return False
|
|
@@ -177,59 +177,46 @@ class VideoAnalyzer:
|
|
|
177
177
|
try:
|
|
178
178
|
self._init_yolo()
|
|
179
179
|
results = self.yolo_model(frame, verbose=False, conf=YOLO_CONFIDENCE)
|
|
180
|
-
|
|
181
|
-
yolo_detected = False
|
|
182
|
-
roi_ocr_matched = False
|
|
183
180
|
|
|
184
181
|
for r in results:
|
|
185
|
-
# 0: license plate
|
|
186
182
|
for box in r.boxes:
|
|
187
183
|
if box.cls == 0:
|
|
188
|
-
|
|
184
|
+
conf = float(box.conf[0])
|
|
189
185
|
|
|
190
|
-
# ROI 크기 필터링
|
|
186
|
+
# ROI 크기 필터링
|
|
191
187
|
x1, y1, x2, y2 = map(int, box.xyxy[0])
|
|
192
188
|
roi_w, roi_h = x2 - x1, y2 - y1
|
|
193
189
|
frame_h, frame_w = frame.shape[:2]
|
|
194
190
|
roi_area_ratio = (roi_w * roi_h) / (frame_w * frame_h)
|
|
195
191
|
|
|
196
|
-
# ROI가 전체 화면의 0.1% ~ 20% 사이여야 함
|
|
197
192
|
if roi_area_ratio < 0.001 or roi_area_ratio > 0.2:
|
|
198
193
|
continue
|
|
199
194
|
|
|
200
|
-
#
|
|
195
|
+
# 고신뢰도면 바로 인정
|
|
196
|
+
if conf >= YOLO_HIGH_CONFIDENCE:
|
|
197
|
+
return True
|
|
198
|
+
|
|
199
|
+
# 저신뢰도면 OCR 패턴 매칭 필요
|
|
201
200
|
h, w = frame.shape[:2]
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
201
|
+
pad_w = int((x2 - x1) * 0.15)
|
|
202
|
+
pad_h = int((y2 - y1) * 0.15)
|
|
203
|
+
|
|
206
204
|
crop_x1 = max(0, x1 - pad_w)
|
|
207
205
|
crop_y1 = max(0, y1 - pad_h)
|
|
208
206
|
crop_x2 = min(w, x2 + pad_w)
|
|
209
207
|
crop_y2 = min(h, y2 + pad_h)
|
|
210
|
-
|
|
208
|
+
|
|
211
209
|
roi = frame[crop_y1:crop_y2, crop_x1:crop_x2]
|
|
212
210
|
if roi.size > 0:
|
|
213
|
-
# ROI에 대해서만 OCR 실행
|
|
214
211
|
roi_texts = self.detect_text(roi)
|
|
215
212
|
if roi_texts:
|
|
216
|
-
|
|
213
|
+
combined = "".join([re.sub(r'[^0-9가-힣A-Za-z]', '', t) for t in roi_texts])
|
|
217
214
|
for pattern in LICENSE_PLATE_PATTERNS:
|
|
218
|
-
# 개별 텍스트 및 결합 텍스트 확인
|
|
219
215
|
if any(re.search(pattern, re.sub(r'[^0-9가-힣]', '', t)) for t in roi_texts) or \
|
|
220
|
-
re.search(pattern,
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
if roi_ocr_matched: break
|
|
225
|
-
if roi_ocr_matched: break
|
|
226
|
-
|
|
227
|
-
# 최종 판정: YOLO 감지 + OCR 패턴 매칭 둘 다 만족해야 함 (오탐지 방지)
|
|
228
|
-
if yolo_detected and roi_ocr_matched:
|
|
229
|
-
return True
|
|
230
|
-
|
|
216
|
+
re.search(pattern, combined):
|
|
217
|
+
return True
|
|
218
|
+
|
|
231
219
|
except Exception as e:
|
|
232
|
-
# print(f" ⚠ 번호판 ROI 분석 에러: {e}")
|
|
233
220
|
pass
|
|
234
221
|
|
|
235
222
|
return False
|
|
@@ -1,4 +1,9 @@
|
|
|
1
1
|
# 설정 상수
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
# 모델 경로 (패키지 내부)
|
|
5
|
+
MODEL_DIR = Path(__file__).parent / "models"
|
|
6
|
+
LICENSE_PLATE_MODEL_PATH = MODEL_DIR / "license_plate_detector.pt"
|
|
2
7
|
|
|
3
8
|
USER_AGENTS = [
|
|
4
9
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
@@ -57,8 +62,18 @@ CATEGORY_NAMES = {
|
|
|
57
62
|
# 카테고리별 제외 키워드 (제목에 포함 시 스킵)
|
|
58
63
|
BLACKLIST_KEYWORDS = {
|
|
59
64
|
'tattoo': [
|
|
60
|
-
|
|
61
|
-
"
|
|
65
|
+
# 반영구 화장 (Semi-permanent makeup)
|
|
66
|
+
"눈썹", "입술", "두피", "헤어라인", "아이라인", "구렛나룻",
|
|
67
|
+
"반영구", "영구화장", "립타투", "헤어타투", "SMP",
|
|
68
|
+
"마이크로블레이딩", "microblading", "엠보", "콤보", "PMU",
|
|
69
|
+
# 임시/가짜 타투
|
|
70
|
+
"헤나", "henna", "스티커", "페이크", "fake", "임시", "붙이는",
|
|
71
|
+
# 타투 제거
|
|
72
|
+
"제거", "레이저", "지우기", "removal",
|
|
73
|
+
# 도안/디자인만 (실제 시술 아님)
|
|
74
|
+
"도안", "디자인", "스케치", "드로잉", "그리기",
|
|
75
|
+
# 의료/미용 시술
|
|
76
|
+
"유두", "유륜", "흉터", "점",
|
|
62
77
|
],
|
|
63
78
|
'face': [],
|
|
64
79
|
'license_plate': [],
|
|
@@ -66,8 +81,8 @@ BLACKLIST_KEYWORDS = {
|
|
|
66
81
|
}
|
|
67
82
|
|
|
68
83
|
# YOLO 설정
|
|
69
|
-
|
|
70
|
-
|
|
84
|
+
YOLO_CONFIDENCE = 0.3 # 감지율 향상을 위해 낮춤
|
|
85
|
+
YOLO_HIGH_CONFIDENCE = 0.6 # 고신뢰도 (OCR 없이 바로 인정)
|
|
71
86
|
|
|
72
87
|
# 번호판 정규식 패턴 (한국 자동차 번호판 중심)
|
|
73
88
|
LICENSE_PLATE_PATTERNS = [
|
|
Binary file
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
MANIFEST.in
|
|
1
2
|
README.md
|
|
2
3
|
pyproject.toml
|
|
3
4
|
ytcollector/__init__.py
|
|
@@ -12,4 +13,5 @@ ytcollector.egg-info/SOURCES.txt
|
|
|
12
13
|
ytcollector.egg-info/dependency_links.txt
|
|
13
14
|
ytcollector.egg-info/entry_points.txt
|
|
14
15
|
ytcollector.egg-info/requires.txt
|
|
15
|
-
ytcollector.egg-info/top_level.txt
|
|
16
|
+
ytcollector.egg-info/top_level.txt
|
|
17
|
+
ytcollector/models/license_plate_detector.pt
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|