PyPI - ytcollector - Versions diffs - 1.2.0__tar.gz → 1.2.4__tar.gz - Mend

ytcollector 1.2.0tar.gz → 1.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

ytcollector-1.2.4/MANIFEST.in ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ include ytcollector/models/*.pt
2	+ recursive-include ytcollector *.pt

{ytcollector-1.2.0 → ytcollector-1.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ytcollector
-Version: 1.2.0
+Version: 1.2.4
 Summary: YouTube 콘텐츠 수집기 - 얼굴, 번호판, 타투, 텍스트 감지
 Author: YTCollector Team
 License: MIT

{ytcollector-1.2.0 → ytcollector-1.2.4}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "ytcollector"
-version = "1.2.0"
+version = "1.2.4"
 description = "YouTube 콘텐츠 수집기 - 얼굴, 번호판, 타투, 텍스트 감지"
 readme = "README.md"
 requires-python = ">=3.8"
@@ -59,6 +59,9 @@ Repository = "https://github.com/yourusername/ytcollector"
 where = ["."]
 include = ["ytcollector*"]
+[tool.setuptools.package-data]
+ytcollector = ["models/*.pt"]
 [tool.black]
 line-length = 100
 target-version = ['py38']

{ytcollector-1.2.0 → ytcollector-1.2.4}/ytcollector/__init__.py RENAMED Viewed

@@ -17,12 +17,12 @@ CLI 사용 예시:
     ytc -c face text --fast
 """
-from .config import CATEGORY_NAMES, CATEGORY_QUERIES, USER_AGENTS, LICENSE_PLATE_PATTERNS
+from .config import CATEGORY_NAMES, CATEGORY_QUERIES, USER_AGENTS, LICENSE_PLATE_PATTERNS, LICENSE_PLATE_MODEL_PATH
 from .analyzer import VideoAnalyzer, check_dependencies
 from .downloader import YouTubeDownloader
 from .cli import run, main as cli_main
-__version__ = "1.1.8"
+__version__ = "1.2.4"
 __all__ = [
     # 주요 클래스
     "VideoAnalyzer",
@@ -32,6 +32,7 @@ __all__ = [
     "CATEGORY_QUERIES",
     "USER_AGENTS",
     "LICENSE_PLATE_PATTERNS",
+    "LICENSE_PLATE_MODEL_PATH",
     # 유틸리티
     "check_dependencies",
     "run",

{ytcollector-1.2.0 → ytcollector-1.2.4}/ytcollector/analyzer.py RENAMED Viewed

@@ -32,7 +32,7 @@ try:
 except ImportError:
     USE_GPU = False
-from .config import LICENSE_PLATE_PATTERNS, YOLO_MODEL_NAME, YOLO_CONFIDENCE
+from .config import LICENSE_PLATE_PATTERNS, LICENSE_PLATE_MODEL_PATH, YOLO_CONFIDENCE, YOLO_HIGH_CONFIDENCE
 class VideoAnalyzer:
@@ -65,7 +65,7 @@ class VideoAnalyzer:
                 if self.yolo_model is None:
                     device = "cuda" if USE_GPU else "cpu"
                     print(f"  YOLO 모델 로딩 중... (Device: {device})")
-                    self.yolo_model = YOLO(YOLO_MODEL_NAME)
+                    self.yolo_model = YOLO(str(LICENSE_PLATE_MODEL_PATH))
                     self.yolo_model.to(device)
     def extract_frames(self, video_path, num_frames=10):
@@ -167,9 +167,9 @@ class VideoAnalyzer:
     def detect_license_plate(self, frame, texts=None):
         """
-        ROI 기반 번호판 감지 (최적화 버전)
+        ROI 기반 번호판 감지
         1. YOLO로 번호판 영역(ROI)을 먼저 찾음
-        2. 찾은 영역만 잘라서 OCR 수행 (속도 및 정확도 향상)
+        2. 고신뢰도(>=0.6)면 바로 인정, 저신뢰도면 OCR 패턴 매칭 필요
         """
         if not YOLO_AVAILABLE or frame is None:
             return False
@@ -177,59 +177,46 @@ class VideoAnalyzer:
         try:
             self._init_yolo()
             results = self.yolo_model(frame, verbose=False, conf=YOLO_CONFIDENCE)
-            yolo_detected = False
-            roi_ocr_matched = False
             for r in results:
-                # 0: license plate
                 for box in r.boxes:
                     if box.cls == 0:
-                        yolo_detected = True
+                        conf = float(box.conf[0])
-                        # ROI 크기 필터링 (너무 작거나 큰 영역 제외)
+                        # ROI 크기 필터링
                         x1, y1, x2, y2 = map(int, box.xyxy[0])
                         roi_w, roi_h = x2 - x1, y2 - y1
                         frame_h, frame_w = frame.shape[:2]
                         roi_area_ratio = (roi_w * roi_h) / (frame_w * frame_h)
-                        # ROI가 전체 화면의 0.1% ~ 20% 사이여야 함
                         if roi_area_ratio < 0.001 or roi_area_ratio > 0.2:
                             continue
-                        # ROI 크로핑 및 타겟 OCR
+                        # 고신뢰도면 바로 인정
+                        if conf >= YOLO_HIGH_CONFIDENCE:
+                            return True
+                        # 저신뢰도면 OCR 패턴 매칭 필요
                         h, w = frame.shape[:2]
-                        # 패딩 10% 추가
-                        pad_w = int((x2 - x1) * 0.1)
-                        pad_h = int((y2 - y1) * 0.1)
+                        pad_w = int((x2 - x1) * 0.15)
+                        pad_h = int((y2 - y1) * 0.15)
                         crop_x1 = max(0, x1 - pad_w)
                         crop_y1 = max(0, y1 - pad_h)
                         crop_x2 = min(w, x2 + pad_w)
                         crop_y2 = min(h, y2 + pad_h)
                         roi = frame[crop_y1:crop_y2, crop_x1:crop_x2]
                         if roi.size > 0:
-                            # ROI에 대해서만 OCR 실행
                             roi_texts = self.detect_text(roi)
                             if roi_texts:
-                                combined_roi = "".join([re.sub(r'[^0-9가-힣]', '', t) for t in roi_texts])
+                                combined = "".join([re.sub(r'[^0-9가-힣A-Za-z]', '', t) for t in roi_texts])
                                 for pattern in LICENSE_PLATE_PATTERNS:
-                                    # 개별 텍스트 및 결합 텍스트 확인
                                     if any(re.search(pattern, re.sub(r'[^0-9가-힣]', '', t)) for t in roi_texts) or \
-                                       re.search(pattern, combined_roi):
-                                        roi_ocr_matched = True
-                                        break
-                        if roi_ocr_matched: break
-                if roi_ocr_matched: break
-            # 최종 판정: YOLO 감지 + OCR 패턴 매칭 둘 다 만족해야 함 (오탐지 방지)
-            if yolo_detected and roi_ocr_matched:
-                return True
+                                       re.search(pattern, combined):
+                                        return True
         except Exception as e:
-            # print(f"  ⚠ 번호판 ROI 분석 에러: {e}")
             pass
         return False

{ytcollector-1.2.0 → ytcollector-1.2.4}/ytcollector/config.py RENAMED Viewed

@@ -1,4 +1,9 @@
 # 설정 상수
+from pathlib import Path
+# 모델 경로 (패키지 내부)
+MODEL_DIR = Path(__file__).parent / "models"
+LICENSE_PLATE_MODEL_PATH = MODEL_DIR / "license_plate_detector.pt"
 USER_AGENTS = [
     'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
@@ -57,8 +62,18 @@ CATEGORY_NAMES = {
 # 카테고리별 제외 키워드 (제목에 포함 시 스킵)
 BLACKLIST_KEYWORDS = {
     'tattoo': [
-        "눈썹", "입술", "두피", "영구",  # 반영구 시술 제외
-        "립타투", "헤어타투", "SMP"
+        # 반영구 화장 (Semi-permanent makeup)
+        "눈썹", "입술", "두피", "헤어라인", "아이라인", "구렛나룻",
+        "반영구", "영구화장", "립타투", "헤어타투", "SMP",
+        "마이크로블레이딩", "microblading", "엠보", "콤보", "PMU",
+        # 임시/가짜 타투
+        "헤나", "henna", "스티커", "페이크", "fake", "임시", "붙이는",
+        # 타투 제거
+        "제거", "레이저", "지우기", "removal",
+        # 도안/디자인만 (실제 시술 아님)
+        "도안", "디자인", "스케치", "드로잉", "그리기",
+        # 의료/미용 시술
+        "유두", "유륜", "흉터", "점",
     ],
     'face': [],
     'license_plate': [],
@@ -66,8 +81,8 @@ BLACKLIST_KEYWORDS = {
 }
 # YOLO 설정
-YOLO_MODEL_NAME = 'license_plate_detector.pt'  # 번호판 전용 학습 모델
-YOLO_CONFIDENCE = 0.5                          # 오탐지 방지를 위해 신뢰도 상향
+YOLO_CONFIDENCE = 0.3  # 감지율 향상을 위해 낮춤
+YOLO_HIGH_CONFIDENCE = 0.6  # 고신뢰도 (OCR 없이 바로 인정)
 # 번호판 정규식 패턴 (한국 자동차 번호판 중심)
 LICENSE_PLATE_PATTERNS = [

ytcollector-1.2.4/ytcollector/models/license_plate_detector.pt ADDED Viewed

Binary file

{ytcollector-1.2.0 → ytcollector-1.2.4}/ytcollector.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ytcollector
-Version: 1.2.0
+Version: 1.2.4
 Summary: YouTube 콘텐츠 수집기 - 얼굴, 번호판, 타투, 텍스트 감지
 Author: YTCollector Team
 License: MIT

{ytcollector-1.2.0 → ytcollector-1.2.4}/ytcollector.egg-info/SOURCES.txt RENAMED Viewed

@@ -1,3 +1,4 @@
+MANIFEST.in
 README.md
 pyproject.toml
 ytcollector/__init__.py
@@ -12,4 +13,5 @@ ytcollector.egg-info/SOURCES.txt
 ytcollector.egg-info/dependency_links.txt
 ytcollector.egg-info/entry_points.txt
 ytcollector.egg-info/requires.txt
-ytcollector.egg-info/top_level.txt
+ytcollector.egg-info/top_level.txt
+ytcollector/models/license_plate_detector.pt