PyPI - ytcollector - Versions diffs - 1.1.7__py3-none-any.whl → 1.1.9__py3-none-any.whl - Mend

ytcollector 1.1.7py3-none-any.whl → 1.1.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

ytcollector/__init__.py +1 -1
ytcollector/analyzer.py +62 -25
ytcollector/config.py +23 -21
{ytcollector-1.1.7.dist-info → ytcollector-1.1.9.dist-info}/METADATA +12 -12
ytcollector-1.1.9.dist-info/RECORD +12 -0
ytcollector-1.1.7.dist-info/RECORD +0 -12
{ytcollector-1.1.7.dist-info → ytcollector-1.1.9.dist-info}/WHEEL +0 -0
{ytcollector-1.1.7.dist-info → ytcollector-1.1.9.dist-info}/entry_points.txt +0 -0
{ytcollector-1.1.7.dist-info → ytcollector-1.1.9.dist-info}/top_level.txt +0 -0

ytcollector/__init__.py CHANGED Viewed

@@ -22,7 +22,7 @@ from .analyzer import VideoAnalyzer, check_dependencies
 from .downloader import YouTubeDownloader
 from .cli import run, main as cli_main
-__version__ = "1.1.7"
+__version__ = "1.1.8"
 __all__ = [
     # 주요 클래스
     "VideoAnalyzer",

ytcollector/analyzer.py CHANGED Viewed

@@ -21,7 +21,7 @@ except ImportError:
     EASYOCR_AVAILABLE = False
 try:
-    from ultralytics import YOLOWorld
+    from ultralytics import YOLO
     YOLO_AVAILABLE = True
 except ImportError:
     YOLO_AVAILABLE = False
@@ -32,7 +32,7 @@ try:
 except ImportError:
     USE_GPU = False
-from .config import LICENSE_PLATE_PATTERNS, YOLO_MODEL_NAME, YOLO_CONFIDENCE, YOLO_PROMPTS
+from .config import LICENSE_PLATE_PATTERNS, YOLO_MODEL_NAME, YOLO_CONFIDENCE
 class VideoAnalyzer:
@@ -59,16 +59,14 @@ class VideoAnalyzer:
                     self.ocr_reader = easyocr.Reader(['ko', 'en'], gpu=USE_GPU, verbose=False)
     def _init_yolo(self):
-        """YOLO-World 모델 초기화 (필요할 때만, 스레드 안전, GPU 가속 체크)"""
+        """YOLO 모델 초기화 (필요할 때만, 스레드 안전, GPU 가속 체크)"""
         if YOLO_AVAILABLE and self.yolo_model is None:
             with self._ocr_lock:
                 if self.yolo_model is None:
                     device = "cuda" if USE_GPU else "cpu"
-                    print(f"  YOLO-World 모델 로딩 중... (Device: {device})")
-                    self.yolo_model = YOLOWorld(YOLO_MODEL_NAME)
+                    print(f"  YOLO 모델 로딩 중... (Device: {device})")
+                    self.yolo_model = YOLO(YOLO_MODEL_NAME)
                     self.yolo_model.to(device)
-                    # 감지할 클래스(프롬프트) 설정
-                    self.yolo_model.set_classes(YOLO_PROMPTS)
     def extract_frames(self, video_path, num_frames=10):
         """영상에서 균등 간격으로 프레임 추출"""
@@ -106,7 +104,7 @@ class VideoAnalyzer:
             gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
         )
-    def detect_text(self, frame):
+    def detect_text(self, frame, return_positions=False):
         """EasyOCR로 텍스트 감지 (스레드 안전)"""
         if not EASYOCR_AVAILABLE:
             return []
@@ -114,7 +112,8 @@ class VideoAnalyzer:
         self._init_ocr()
         try:
             h, w = frame.shape[:2]
+            scale = 1.0
             # 가독성 개선을 위해 1080p 수준으로 리사이즈 (너무 작으면 인식률 저하)
             if w > 1280:
                 scale = 1280 / w
@@ -124,22 +123,48 @@ class VideoAnalyzer:
                 scale = 960 / w
                 frame = cv2.resize(frame, (960, int(h * scale)), interpolation=cv2.INTER_CUBIC)
+            resized_h, resized_w = frame.shape[:2]
             # 전처리: 그레이스케일 및 대비 강화 (옵션)
             gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
             clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
             processed = clahe.apply(gray)
             with self._ocr_lock:
-                # 원본(컬러)과 전처리(그레이) 중 선택 가능하나 보통 EasyOCR은 컬러에서 잘 작동함
-                # 대비 강화된 그레이스케일을 사용해봄
                 results = self.ocr_reader.readtext(processed)
+            if return_positions:
+                # 위치 정보와 함께 반환 (bbox, text, conf, y_ratio)
+                texts_with_pos = []
+                for r in results:
+                    if r[2] > 0.25:
+                        bbox = r[0]
+                        # bbox의 y 중심점 계산 (0~1 비율)
+                        y_center = (bbox[0][1] + bbox[2][1]) / 2
+                        y_ratio = y_center / resized_h
+                        texts_with_pos.append({
+                            'text': r[1],
+                            'conf': r[2],
+                            'y_ratio': y_ratio
+                        })
+                return texts_with_pos
             # 신뢰도 임계값 0.25로 약간 하향 조정 (기존 0.3)
             return [r[1] for r in results if r[2] > 0.25]
         except Exception as e:
             print(f"  ⚠ OCR 에러: {e}")
             return []
+    def detect_subtitle(self, frame):
+        """자막 감지 - 화면 하단 60%~95% 영역의 텍스트만 필터링"""
+        texts_with_pos = self.detect_text(frame, return_positions=True)
+        if not texts_with_pos:
+            return []
+        # 자막은 주로 화면 하단에 위치 (y_ratio 0.6 ~ 0.95)
+        subtitles = [t['text'] for t in texts_with_pos if 0.6 <= t['y_ratio'] <= 0.95]
+        return subtitles
     def detect_license_plate(self, frame, texts=None):
         """
         ROI 기반 번호판 감지 (최적화 버전)
@@ -153,19 +178,26 @@ class VideoAnalyzer:
             self._init_yolo()
             results = self.yolo_model(frame, verbose=False, conf=YOLO_CONFIDENCE)
-            yolo_high_conf = False
+            yolo_detected = False
             roi_ocr_matched = False
             for r in results:
                 # 0: license plate
                 for box in r.boxes:
                     if box.cls == 0:
-                        conf = float(box.conf)
-                        if conf > 0.8:
-                            yolo_high_conf = True
-                        # ROI 크로핑 및 타겟 OCR
+                        yolo_detected = True
+                        # ROI 크기 필터링 (너무 작거나 큰 영역 제외)
                         x1, y1, x2, y2 = map(int, box.xyxy[0])
+                        roi_w, roi_h = x2 - x1, y2 - y1
+                        frame_h, frame_w = frame.shape[:2]
+                        roi_area_ratio = (roi_w * roi_h) / (frame_w * frame_h)
+                        # ROI가 전체 화면의 0.1% ~ 20% 사이여야 함
+                        if roi_area_ratio < 0.001 or roi_area_ratio > 0.2:
+                            continue
+                        # ROI 크로핑 및 타겟 OCR
                         h, w = frame.shape[:2]
                         # 패딩 10% 추가
                         pad_w = int((x2 - x1) * 0.1)
@@ -192,8 +224,8 @@ class VideoAnalyzer:
                         if roi_ocr_matched: break
                 if roi_ocr_matched: break
-            # 최종 판정
-            if roi_ocr_matched or yolo_high_conf:
+            # 최종 판정: YOLO 감지 + OCR 패턴 매칭 둘 다 만족해야 함 (오탐지 방지)
+            if yolo_detected and roi_ocr_matched:
                 return True
         except Exception as e:
@@ -305,14 +337,19 @@ class VideoAnalyzer:
                     results['license_plate'] = True
                     detected_now = True
-            # 텍스트 감지 (일반 텍스트 카테고리이거나 번호판 수집 중에도 텍스트 로그 기록을 위해 실행)
-            # 번호판 감지가 필요 없는 경우 전체 OCR을 건너뛰어 속도 향상 가능
-            if target_category == 'text' or (detected_now and target_category != 'license_plate'):
-                texts = self.detect_text(frame)
+            # 텍스트/자막 감지
+            if target_category == 'text':
+                # 자막 영역 필터링 (화면 하단 60%~95%)
+                texts = self.detect_subtitle(frame)
                 if texts:
                     results['text'] = True
                     all_texts.extend(texts)
                     detected_now = True
+            elif detected_now and target_category != 'license_plate':
+                texts = self.detect_text(frame)
+                if texts:
+                    results['text'] = True
+                    all_texts.extend(texts)
             elif target_category == 'license_plate' and not results['license_plate']:
                 # 번호판을 못 찾은 경우에만 전체 화면 OCR 한 번 더 시도 (보수적 접근)
                 texts = self.detect_text(frame)

ytcollector/config.py CHANGED Viewed

@@ -11,21 +11,21 @@ USER_AGENTS = [
 CATEGORY_QUERIES = {
     'face': [
         "SBS 인터뷰 클립",
-        "런닝맨 멤버 인터뷰",
-        "SBS 뉴스 인터뷰",
-        "미운우리새끼 인터뷰",
-        "SBS 스페셜 인물",
-        "집사부일체 인터뷰",
-        "그것이알고싶다 인터뷰",
-        "SBS 연예대상 소감",
+        "MBC 뉴스 인터뷰",
+        "tvN 유퀴즈 일반인 인터뷰",
+        "JTBC 뉴스룸 초대석",
+        "기자회견 현장",
+        "연예대상 레드카펫 직캠",
+        "길거리 인터뷰 브이로그",
+        "무대인사 직캠 얼굴",
     ],
     'license_plate': [
-        "중고차 매물 소개",
-        "자동차 세차 영상",
-        "신차 출고 브이로그",
-        "자동차 튜닝 작업",
-        "엔카 허위매물",
-        "주차장 만차",
+        "SBS 드라마 자동차 추격 장면",
+        "SBS 드라마 주차장 씬",
+        "SBS 드라마 차량 씬",
+        "SBS 드라마 운전 장면",
+        "SBS 드라마 택시 장면",
+        "SBS 드라마 교통사고 장면",
     ],
     'tattoo': [
         "타투 시술 영상",
@@ -36,11 +36,14 @@ CATEGORY_QUERIES = {
         "tattoo session",
     ],
     'text': [
-        "SBS 런닝맨 레전드",
-        "SBS 예능 쇼츠",
-        "재미있는 자막 영상 쇼츠",
-        "SBS 파워FM 보이는 라디오",
-        "SBS 연예대상 소감",
+        "SBS 런닝맨 자막 레전드",
+        "SBS 미우새 자막 모음",
+        "tvN 놀라운토요일 자막",
+        "JTBC 아는형님 자막 레전드",
+        "MBC 나혼자산다 자막",
+        "유튜버 예능 자막 편집",
+        "브이로그 자막 효과",
+        "먹방 자막 편집",
     ],
 }
@@ -69,9 +72,8 @@ BLACKLIST_KEYWORDS = {
 }
 # YOLO 설정
-YOLO_MODEL_NAME = 'yolov8s-world.pt' # YOLO-World 모델 (Open Vocabulary)
-YOLO_CONFIDENCE = 0.3                # YOLO-World는 임계값을 약간 낮게 설정 가능
-YOLO_PROMPTS = ["license plate"]
+YOLO_MODEL_NAME = 'license_plate_detector.pt'  # 번호판 전용 학습 모델
+YOLO_CONFIDENCE = 0.5                          # 오탐지 방지를 위해 신뢰도 상향
 # 번호판 정규식 패턴 (한국 자동차 번호판 중심)
 LICENSE_PLATE_PATTERNS = [

{ytcollector-1.1.7.dist-info → ytcollector-1.1.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ytcollector
-Version: 1.1.7
+Version: 1.1.9
 Summary: YouTube 콘텐츠 수집기 - 얼굴, 번호판, 타투, 텍스트 감지
 Author: YTCollector Team
 License: MIT
@@ -44,14 +44,12 @@ Requires-Dist: ytcollector[analysis,dev]; extra == "all"
 pip install yt-dlp
 ```
-### 분석 기능용 패키지 (권장 - v1.1.6+)
+### 분석 기능용 패키지 (권장 - v1.1.9+)
 분석 기능을 원활하게 사용하려면 아래 패키지들이 필요합니다. GPU(CUDA)가 설치된 경우 자동으로 가속이 활성화됩니다.
 ```bash
 pip install opencv-python easyocr numpy ultralytics
-# YOLO-World 기능을 사용하려면 아래 CLIP 라이브러리 수동 설치가 필요합니다.
-pip install "git+https://github.com/ultralytics/CLIP.git"
 ```
 ## 사용법
@@ -80,10 +78,10 @@ ytcollector
 | 카테고리 | 설명 | 검색 소스 |
 |----------|------|-----------|
-| `face` | 얼굴/인물 | SBS 인터뷰, 런닝맨, 미운우리새끼 등 |
-| `license_plate` | 자동차 번호판 | 중고차 매물, 세차 영상, 신차 출고 등 |
+| `face` | 얼굴/인물 | SBS/MBC/tvN/JTBC 인터뷰, 기자회견 등 |
+| `license_plate` | 자동차 번호판 | SBS 드라마 자동차/추격/주차장 씬 등 |
 | `tattoo` | 타투/문신 | 타투 시술, 타투이스트 작업 영상 |
-| `text` | 텍스트/자막 | SBS 예능 (런닝맨, 골목식당 등) |
+| `text` | 텍스트/자막 | SBS/tvN/JTBC/MBC 예능, 유튜버 자막 편집 등 |
 ## 예시
@@ -190,14 +188,16 @@ https://www.youtube.com/watch?v=aqz-KE-bpKQ, 00:10, sample_task
 | 감지 항목 | 사용 기술 | 설명 |
 |-----------|-----------|------|
 | 얼굴 | OpenCV Haar Cascade | 정면 얼굴 감지 |
-| 텍스트 | EasyOCR | 한국어/영어 문자 인식 (분석 품질 및 프레임 수 개선) |
-| 번호판 | YOLO-World + ROI OCR | v1.1.6: YOLO로 감지 후 해당 영역만 OCR (속도 2x, 정확도 향상) |
+| 텍스트 | EasyOCR | 한국어/영어 문자 인식 |
+| 번호판 | YOLOv8 전용 모델 + OCR | v1.1.9: 번호판 전용 학습 모델 + 한국 번호판 패턴 매칭 |
 | 타투 | OpenCV HSV 분석 | 피부 영역 내 잉크 패턴 감지 |
-### 주요 최적화 (v1.1.5~1.1.6)
-- **ROI 기반 감지**: 전체 화면이 아닌 YOLO가 지정한 영역만 OCR하여 속도와 정확도 대폭 향상
+### 주요 최적화 (v1.1.5~1.1.9)
+- **번호판 전용 모델** (v1.1.9): YOLO-World → 번호판 전용 학습 모델로 교체 (감지율 대폭 향상, 모델 크기 27MB→6MB)
+- **오탐지 방지** (v1.1.9): YOLO 감지 + OCR 패턴 매칭 둘 다 만족해야 번호판 판정, ROI 크기 필터링 추가
+- **ROI 기반 감지**: 전체 화면이 아닌 YOLO가 지정한 영역만 OCR하여 속도와 정확도 향상
 - **GPU 가속 지원**: CUDA 사용 가능 시 YOLO 및 OCR 자동 가속
-- **로그 기반 중복 방지**: 로컬 파일이 없어도 `youtube_url_*.txt` 기록을 참조하여 중복 분석 방지
+- **로그 기반 중복 방지**: `youtube_url_*.txt` 기록을 참조하여 중복 분석 방지
 ## 주의사항

ytcollector-1.1.9.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+ytcollector/__init__.py,sha256=EaxBP_0Fv0LEFdg067uZxBrQHwOKGX8u08Y4b5uF1-Q,1094
+ytcollector/analyzer.py,sha256=7VJt4chc25HsEz8OwBDZhTz_8LnkpgSBM6mJKQpIUls,14391
+ytcollector/cli.py,sha256=aHF4EuQRPLKh65lnkI_dZ0ResztlVjpHlS5iHfzmpig,5577
+ytcollector/config.py,sha256=HmcFqMV1Z3kClnGKYi0q9cRZIXMRPAxIEI8cggatfrU,3199
+ytcollector/dataset_builder.py,sha256=nfArEwszoCln48n3T0Eff_4OOaYv8FF0YH8cARBGMWQ,2608
+ytcollector/downloader.py,sha256=TeC6agUmSPHZSZ9jdoc42i8i_NobzTEkoRtAIgW80kI,14544
+ytcollector/utils.py,sha256=6XDif-e3GbMHmUvTsBT0YblxNxYnS-2I8HnmjMBZs-M,4254
+ytcollector-1.1.9.dist-info/METADATA,sha256=WQIup61B02yB9ddtOaFGtlDYoV4nJVRD-vvVI_sVjc0,7208
+ytcollector-1.1.9.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+ytcollector-1.1.9.dist-info/entry_points.txt,sha256=waiVuSJJYt-6_DAal-T4JkHgejo7wKYLdKrEI7tZ-ms,127
+ytcollector-1.1.9.dist-info/top_level.txt,sha256=wozNyCUm0eMOm-9U81yTql6oGaM2O5rWVBXDb93zzyQ,12
+ytcollector-1.1.9.dist-info/RECORD,,

ytcollector-1.1.7.dist-info/RECORD DELETED Viewed

@@ -1,12 +0,0 @@
-ytcollector/__init__.py,sha256=uXLmOZ3da_7GwVej0hDAuiUJsw5XweBBUrUs4sJo7J4,1094
-ytcollector/analyzer.py,sha256=raw7tcERbNG8cDTnKDr95VA3Nju9We4jW7XGheUUPWE,13097
-ytcollector/cli.py,sha256=aHF4EuQRPLKh65lnkI_dZ0ResztlVjpHlS5iHfzmpig,5577
-ytcollector/config.py,sha256=ZjyDWQg4haJPwUlP-eW0hXa_I2g9wyNaI8y5mxEU0vc,3040
-ytcollector/dataset_builder.py,sha256=nfArEwszoCln48n3T0Eff_4OOaYv8FF0YH8cARBGMWQ,2608
-ytcollector/downloader.py,sha256=TeC6agUmSPHZSZ9jdoc42i8i_NobzTEkoRtAIgW80kI,14544
-ytcollector/utils.py,sha256=6XDif-e3GbMHmUvTsBT0YblxNxYnS-2I8HnmjMBZs-M,4254
-ytcollector-1.1.7.dist-info/METADATA,sha256=Qyop1AVyK-BmWgrjsGUMiYbE55kmOZSwMKYWEADQzRk,7156
-ytcollector-1.1.7.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
-ytcollector-1.1.7.dist-info/entry_points.txt,sha256=waiVuSJJYt-6_DAal-T4JkHgejo7wKYLdKrEI7tZ-ms,127
-ytcollector-1.1.7.dist-info/top_level.txt,sha256=wozNyCUm0eMOm-9U81yTql6oGaM2O5rWVBXDb93zzyQ,12
-ytcollector-1.1.7.dist-info/RECORD,,

{ytcollector-1.1.7.dist-info → ytcollector-1.1.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{ytcollector-1.1.7.dist-info → ytcollector-1.1.9.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ytcollector-1.1.7.dist-info → ytcollector-1.1.9.dist-info}/top_level.txt RENAMED Viewed

File without changes

ytcollector 1.1.7__py3-none-any.whl → 1.1.9__py3-none-any.whl

ytcollector 1.1.7py3-none-any.whl → 1.1.9py3-none-any.whl