yt-metrics-cli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
src/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """YT Metrics CLI - Channel analyzer and transcript downloader."""
2
+
3
+ __version__ = "0.1.0"
src/analyzer.py ADDED
@@ -0,0 +1,314 @@
1
+ """YouTube channel analysis and API interaction."""
2
+
3
+ import logging
4
+ import time
5
+ from typing import Any
6
+
7
+ import googleapiclient.discovery
8
+ from googleapiclient.errors import HttpError
9
+
10
+ from .config import get_settings
11
+ from .metrics import calculate_engagement_metrics
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ MAX_RETRIES = 3
16
+ BASE_DELAY = 1.0
17
+
18
+
19
+ def _execute_with_retry(request: Any) -> dict[str, Any]:
20
+ """Execute an API request with exponential backoff on quota/rate errors."""
21
+ for attempt in range(MAX_RETRIES + 1):
22
+ try:
23
+ result: dict[str, Any] = request.execute()
24
+ return result
25
+ except HttpError as e:
26
+ if e.resp.status in (403, 429) and attempt < MAX_RETRIES:
27
+ delay = BASE_DELAY * (2 ** attempt)
28
+ logger.warning(
29
+ "API rate limit (HTTP %d), retry %d/%d in %.1fs...",
30
+ e.resp.status, attempt + 1, MAX_RETRIES, delay,
31
+ )
32
+ time.sleep(delay)
33
+ else:
34
+ raise
35
+ return {} # unreachable but satisfies mypy
36
+
37
+
38
+ class YouTubeChannelAnalyzer:
39
+ def __init__(self, api_key: str | None = None) -> None:
40
+ settings = get_settings()
41
+ self.api_key = api_key or settings.youtube_api_key
42
+ if not self.api_key:
43
+ msg = (
44
+ "YouTube API key required. "
45
+ "Set YOUTUBE_API_KEY environment variable or pass it directly."
46
+ )
47
+ raise ValueError(msg)
48
+
49
+ self.youtube = googleapiclient.discovery.build(
50
+ "youtube", "v3", developerKey=self.api_key
51
+ )
52
+
53
+ def validate_api_key(self) -> bool:
54
+ """Validate the YouTube API key by making a minimal API call."""
55
+ try:
56
+ request = self.youtube.i18nRegions().list(part="snippet")
57
+ request.execute()
58
+ return True
59
+ except HttpError as e:
60
+ error_content = e.content.decode("utf-8") if e.content else str(e)
61
+ if e.resp.status == 400:
62
+ self._handle_400_error(error_content, e)
63
+ elif e.resp.status == 403:
64
+ self._handle_403_error(error_content, e)
65
+ else:
66
+ msg = f"API validation failed with status {e.resp.status}: {error_content}"
67
+ raise RuntimeError(msg) from e
68
+ except (ValueError, RuntimeError):
69
+ raise
70
+ except Exception as e:
71
+ msg = f"Unexpected error validating API key: {e}"
72
+ raise RuntimeError(msg) from e
73
+ return False # unreachable but satisfies mypy
74
+
75
+ def get_channel_id_from_username(self, username: str) -> str:
76
+ """Resolve channel ID from legacy username."""
77
+ request = self.youtube.channels().list(part="id", forUsername=username)
78
+ response = _execute_with_retry(request)
79
+
80
+ if "items" in response and len(response["items"]) > 0:
81
+ return str(response["items"][0]["id"])
82
+ msg = f"No channel found with username: {username}"
83
+ raise ValueError(msg)
84
+
85
+ def get_channel_id_from_custom_url(self, custom_url: str) -> str:
86
+ """Resolve channel ID from custom URL (@handle)."""
87
+ if custom_url.startswith("@"):
88
+ custom_url = custom_url[1:]
89
+
90
+ request = self.youtube.search().list(
91
+ part="snippet", q=custom_url, type="channel", maxResults=1
92
+ )
93
+ response = _execute_with_retry(request)
94
+
95
+ if "items" in response and len(response["items"]) > 0:
96
+ return str(response["items"][0]["snippet"]["channelId"])
97
+ msg = f"No channel found with custom URL: @{custom_url}"
98
+ raise ValueError(msg)
99
+
100
+ def get_channel_info(self, channel_id: str) -> dict[str, Any]:
101
+ """Get channel metadata and statistics."""
102
+ request = self.youtube.channels().list(part="snippet,statistics", id=channel_id)
103
+ response = _execute_with_retry(request)
104
+
105
+ if "items" not in response or len(response["items"]) == 0:
106
+ msg = f"No channel found with ID: {channel_id}"
107
+ raise ValueError(msg)
108
+
109
+ channel_item = response["items"][0]
110
+ return {
111
+ "id": channel_id,
112
+ "title": channel_item["snippet"]["title"],
113
+ "description": channel_item["snippet"]["description"],
114
+ "subscriber_count": channel_item["statistics"].get("subscriberCount"),
115
+ "video_count": channel_item["statistics"].get("videoCount"),
116
+ "view_count": channel_item["statistics"].get("viewCount"),
117
+ "thumbnail": channel_item["snippet"]["thumbnails"]["default"]["url"],
118
+ "url": f"https://www.youtube.com/channel/{channel_id}",
119
+ }
120
+
121
+ def get_channel_videos(
122
+ self,
123
+ channel_id: str | None = None,
124
+ username: str | None = None,
125
+ custom_url: str | None = None,
126
+ max_results: int = 50,
127
+ ) -> tuple[dict[str, Any], list[dict[str, Any]]]:
128
+ """Get videos from a channel with statistics."""
129
+ if channel_id is None and username is None and custom_url is None:
130
+ msg = "Must provide channel_id, username, or custom_url"
131
+ raise ValueError(msg)
132
+
133
+ if channel_id is None:
134
+ if username:
135
+ channel_id = self.get_channel_id_from_username(username)
136
+ elif custom_url:
137
+ channel_id = self.get_channel_id_from_custom_url(custom_url)
138
+
139
+ assert channel_id is not None # guaranteed by logic above
140
+ channel_info = self.get_channel_info(channel_id)
141
+ uploads_playlist_id = self._get_uploads_playlist(channel_id)
142
+ videos = self._fetch_playlist_videos(uploads_playlist_id, max_results)
143
+ videos_with_stats = self._get_videos_statistics(videos)
144
+
145
+ return channel_info, videos_with_stats
146
+
147
+ def get_multiple_channels_videos(
148
+ self,
149
+ channel_list: list[dict[str, Any]],
150
+ max_results_per_channel: int = 20,
151
+ ) -> list[dict[str, Any]]:
152
+ """Get videos from multiple channels with engagement metrics."""
153
+ all_channels_data: list[dict[str, Any]] = []
154
+ failed_channels: list[dict[str, Any]] = []
155
+
156
+ for channel in channel_list:
157
+ try:
158
+ channel_info, videos = self.get_channel_videos(
159
+ channel_id=channel.get("channel_id"),
160
+ username=channel.get("username"),
161
+ custom_url=channel.get("custom_url"),
162
+ max_results=max_results_per_channel,
163
+ )
164
+
165
+ subscriber_count = int(channel_info.get("subscriber_count", 0) or 0)
166
+ videos_with_metrics = calculate_engagement_metrics(videos, subscriber_count)
167
+
168
+ all_channels_data.append({"channel": channel_info, "videos": videos_with_metrics})
169
+ logger.info("Retrieved %d videos from %s", len(videos), channel_info["title"])
170
+
171
+ except Exception as e:
172
+ failed_channels.append({"channel": channel, "error": str(e)})
173
+ logger.warning("Error retrieving channel %s: %s", channel, e)
174
+
175
+ if failed_channels:
176
+ logger.warning("Failed to retrieve %d channels", len(failed_channels))
177
+
178
+ return all_channels_data
179
+
180
+ # --- Private helpers ---
181
+
182
+ def _get_uploads_playlist(self, channel_id: str) -> str:
183
+ """Get the uploads playlist ID for a channel."""
184
+ request = self.youtube.channels().list(part="contentDetails", id=channel_id)
185
+ response = _execute_with_retry(request)
186
+
187
+ if "items" not in response or len(response["items"]) == 0:
188
+ msg = f"No channel found with ID: {channel_id}"
189
+ raise ValueError(msg)
190
+
191
+ return str(
192
+ response["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]
193
+ )
194
+
195
+ def _fetch_playlist_videos(
196
+ self, playlist_id: str, max_results: int
197
+ ) -> list[dict[str, Any]]:
198
+ """Fetch video IDs and metadata from a playlist."""
199
+ videos: list[dict[str, Any]] = []
200
+ next_page_token: str | None = None
201
+
202
+ while len(videos) < max_results:
203
+ request = self.youtube.playlistItems().list(
204
+ part="snippet,contentDetails",
205
+ playlistId=playlist_id,
206
+ maxResults=min(50, max_results - len(videos)),
207
+ pageToken=next_page_token,
208
+ )
209
+ response = _execute_with_retry(request)
210
+
211
+ for item in response["items"]:
212
+ video_id = item["contentDetails"]["videoId"]
213
+ videos.append(
214
+ {
215
+ "id": video_id,
216
+ "title": item["snippet"]["title"],
217
+ "published_at": item["snippet"]["publishedAt"],
218
+ "url": f"https://www.youtube.com/watch?v={video_id}",
219
+ }
220
+ )
221
+
222
+ next_page_token = response.get("nextPageToken")
223
+ if not next_page_token or len(videos) >= max_results:
224
+ break
225
+
226
+ return videos
227
+
228
+ def _get_videos_statistics(
229
+ self, videos: list[dict[str, Any]]
230
+ ) -> list[dict[str, Any]]:
231
+ """Fetch detailed statistics for videos in batches."""
232
+ if not videos:
233
+ return videos
234
+
235
+ settings = get_settings()
236
+ videos_with_stats: list[dict[str, Any]] = []
237
+
238
+ for i in range(0, len(videos), settings.api_batch_size):
239
+ batch = videos[i : i + settings.api_batch_size]
240
+ video_ids = [video["id"] for video in batch]
241
+
242
+ request = self.youtube.videos().list(
243
+ part="statistics,contentDetails", id=",".join(video_ids)
244
+ )
245
+ response = _execute_with_retry(request)
246
+
247
+ stats_map: dict[str, dict[str, Any]] = {}
248
+ for item in response.get("items", []):
249
+ stats_map[item["id"]] = {
250
+ "statistics": item.get("statistics", {}),
251
+ "contentDetails": item.get("contentDetails", {}),
252
+ }
253
+
254
+ for video in batch:
255
+ vid = video["id"]
256
+ if vid in stats_map:
257
+ stats = stats_map[vid]["statistics"]
258
+ content = stats_map[vid]["contentDetails"]
259
+ video.update(
260
+ {
261
+ "view_count": int(stats.get("viewCount", 0)),
262
+ "like_count": int(stats.get("likeCount", 0)),
263
+ "comment_count": int(stats.get("commentCount", 0)),
264
+ "duration": content.get("duration", "PT0S"),
265
+ }
266
+ )
267
+ else:
268
+ video.update(
269
+ {
270
+ "view_count": 0,
271
+ "like_count": 0,
272
+ "comment_count": 0,
273
+ "duration": "PT0S",
274
+ }
275
+ )
276
+ videos_with_stats.append(video)
277
+
278
+ return videos_with_stats
279
+
280
+ @staticmethod
281
+ def _handle_400_error(error_content: str, original: HttpError) -> None:
282
+ """Handle HTTP 400 errors from the YouTube API."""
283
+ if "API key not valid" in error_content or "invalid" in error_content.lower():
284
+ msg = (
285
+ "Invalid YouTube API key. Check your YOUTUBE_API_KEY in .env file.\n"
286
+ "Get a valid key: https://console.cloud.google.com/apis/credentials"
287
+ )
288
+ raise ValueError(msg) from original
289
+ if "API key expired" in error_content:
290
+ msg = (
291
+ "YouTube API key has expired. Generate a new key at:\n"
292
+ "https://console.cloud.google.com/apis/credentials"
293
+ )
294
+ raise ValueError(msg) from original
295
+ msg = f"API key validation failed: {error_content}"
296
+ raise ValueError(msg) from original
297
+
298
+ @staticmethod
299
+ def _handle_403_error(error_content: str, original: HttpError) -> None:
300
+ """Handle HTTP 403 errors from the YouTube API."""
301
+ if "quotaExceeded" in error_content:
302
+ msg = (
303
+ "YouTube API quota exceeded. Daily limit reached.\n"
304
+ "Quota resets at midnight Pacific Time. Try again later."
305
+ )
306
+ raise ValueError(msg) from original
307
+ if "accessNotConfigured" in error_content:
308
+ msg = (
309
+ "YouTube Data API v3 is not enabled for this key.\n"
310
+ "Enable: https://console.cloud.google.com/apis/library/youtube.googleapis.com"
311
+ )
312
+ raise ValueError(msg) from original
313
+ msg = f"API access forbidden: {error_content}"
314
+ raise ValueError(msg) from original
src/config.py ADDED
@@ -0,0 +1,62 @@
1
+ """Configuration via environment variables."""
2
+
3
+ from pathlib import Path
4
+
5
+ from pydantic_settings import BaseSettings
6
+
7
+
8
+ class Settings(BaseSettings):
9
+ """Application settings loaded from environment variables."""
10
+
11
+ # API credentials (empty = not set, validated per command)
12
+ youtube_api_key: str = ""
13
+
14
+ # API settings
15
+ max_results_per_channel: int = 50
16
+ api_batch_size: int = 50
17
+
18
+ # Output
19
+ output_dir: Path = Path("./output")
20
+
21
+ # Transcript
22
+ video_id: str = "tLkRAqmAEtE"
23
+ youtube_transcript_fixtures_dir: str = ""
24
+ transcript_languages: list[str] = ["es", "en"] # noqa: B006
25
+
26
+ # Channels config
27
+ channels_file: Path = Path("channels.yml")
28
+
29
+ # Metric thresholds (for performance classification)
30
+ high_performance_multiplier: float = 1.5
31
+ low_performance_multiplier: float = 0.5
32
+
33
+ # Duration thresholds (seconds)
34
+ short_video_max: int = 300
35
+ long_video_min: int = 900
36
+
37
+
38
+ _settings: Settings | None = None
39
+
40
+
41
+ def get_settings() -> Settings:
42
+ """Load and cache settings from environment."""
43
+ global _settings # noqa: PLW0603
44
+ if _settings is None:
45
+ _settings = Settings()
46
+ return _settings
47
+
48
+
49
+ def reset_settings() -> None:
50
+ """Reset cached settings (useful for testing)."""
51
+ global _settings # noqa: PLW0603
52
+ _settings = None
53
+
54
+
55
+ def format_number(value: object) -> str:
56
+ """Format number with thousand separators or return 'N/A'."""
57
+ if value is None:
58
+ return "N/A"
59
+ try:
60
+ return f"{int(str(value)):,}"
61
+ except (ValueError, TypeError):
62
+ return "N/A"
@@ -0,0 +1,15 @@
1
+ """Report generators for yt-metrics-cli."""
2
+
3
+ from src.exporters.csv_exporter import export_to_csv
4
+ from src.exporters.readme_exporter import export_output_readme
5
+ from src.exporters.text_exporter import export_channel_stats, export_engagement_trends_report
6
+ from src.exporters.url_exporter import export_best_videos_report, export_latest_videos_report
7
+
8
+ __all__ = [
9
+ "export_best_videos_report",
10
+ "export_channel_stats",
11
+ "export_engagement_trends_report",
12
+ "export_latest_videos_report",
13
+ "export_output_readme",
14
+ "export_to_csv",
15
+ ]
@@ -0,0 +1,55 @@
1
+ """CSV export for channel and video data."""
2
+
3
+ import csv
4
+ import logging
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ def export_to_csv(channels_data: list[dict[str, Any]], filename: str | Path) -> None:
12
+ """Export channel and video data to CSV."""
13
+ if not channels_data:
14
+ logger.warning("No data to export.")
15
+ return
16
+
17
+ with open(filename, "w", newline="", encoding="utf-8") as csvfile:
18
+ writer = csv.writer(csvfile)
19
+
20
+ writer.writerow([
21
+ "Channel", "Subscribers", "Video Title", "Published Date", "Video URL",
22
+ "Views", "Likes", "Comments", "Duration (seconds)",
23
+ "Engagement Rate (Views %)", "Engagement Rate (Subscribers %)",
24
+ "View Rate (%)", "Like Rate (%)", "Comment Rate (%)", "Views per Minute",
25
+ ])
26
+
27
+ for channel_data in channels_data:
28
+ channel_info = channel_data["channel"]
29
+ channel_name = channel_info["title"]
30
+ subscriber_count = channel_info.get("subscriber_count", "N/A")
31
+
32
+ for video in channel_data["videos"]:
33
+ writer.writerow([
34
+ channel_name,
35
+ subscriber_count,
36
+ video["title"],
37
+ video["published_at"],
38
+ video["url"],
39
+ video.get("view_count", 0),
40
+ video.get("like_count", 0),
41
+ video.get("comment_count", 0),
42
+ video.get("duration_seconds", 0),
43
+ video.get("engagement_rate_views", 0),
44
+ video.get("engagement_rate_subscribers", 0),
45
+ video.get("view_rate", 0),
46
+ video.get("like_rate", 0),
47
+ video.get("comment_rate", 0),
48
+ video.get("views_per_minute", 0),
49
+ ])
50
+
51
+ total_videos = sum(len(cd["videos"]) for cd in channels_data)
52
+ logger.info(
53
+ "Exported %d videos from %d channels to %s",
54
+ total_videos, len(channels_data), filename,
55
+ )
@@ -0,0 +1,115 @@
1
+ """README generator for output directories."""
2
+
3
+ import logging
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from src.config import format_number
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def export_output_readme(
14
+ output_dir: Path, timestamp: str, channels_data: list[dict[str, Any]]
15
+ ) -> None:
16
+ """Generate README.md in output directory explaining all generated files."""
17
+ readme_path = output_dir / "README.md"
18
+ total_channels = len(channels_data)
19
+ total_videos = sum(len(cd["videos"]) for cd in channels_data)
20
+ avg_videos = total_videos / total_channels if total_channels > 0 else 0
21
+
22
+ with open(readme_path, "w", encoding="utf-8") as f:
23
+ f.write(f"# YouTube Analysis Report - {timestamp}\n\n")
24
+ f.write(f"Generated on: {datetime.now().strftime('%Y-%m-%d at %H:%M:%S')}\n\n")
25
+
26
+ f.write("## Analysis Summary\n\n")
27
+ f.write(f"- **Channels Analyzed:** {total_channels}\n")
28
+ f.write(f"- **Total Videos:** {total_videos}\n")
29
+ f.write(f"- **Average Videos per Channel:** {avg_videos:.1f}\n\n")
30
+
31
+ _write_file_descriptions(f, timestamp)
32
+ _write_metrics_table(f)
33
+ _write_channels_list(f, channels_data)
34
+
35
+ f.write("\n---\n\n")
36
+ f.write("*Generated by YT Metrics CLI*\n")
37
+
38
+ logger.info("Generated README.md in output directory: %s", readme_path)
39
+
40
+
41
+ def _write_file_descriptions(f: Any, timestamp: str) -> None:
42
+ """Write descriptions for each generated file."""
43
+ files = [
44
+ (
45
+ f"youtube_channels_videos_{timestamp}.csv",
46
+ "CSV (Comma-Separated Values)",
47
+ "Raw data export with complete metrics for all videos.",
48
+ ["Import into Excel/Google Sheets", "Data visualization with BI tools"],
49
+ ),
50
+ (
51
+ f"youtube_channel_stats_{timestamp}.txt",
52
+ "Plain text report",
53
+ "Per-channel statistics, top videos, and performance distribution.",
54
+ ["Quick overview of channel performance", "Identify content patterns"],
55
+ ),
56
+ (
57
+ f"youtube_engagement_trends_{timestamp}.txt",
58
+ "Plain text report",
59
+ "Cross-channel comparison, rankings, and trend analysis.",
60
+ ["Compare channels", "Discover viral content patterns"],
61
+ ),
62
+ (
63
+ f"youtube_best_videos_{timestamp}.txt",
64
+ "Plain text (URL list)",
65
+ "Top 15 videos with highest engagement rate from each channel.",
66
+ ["Quick access to best content", "Create playlists"],
67
+ ),
68
+ (
69
+ f"youtube_latest_videos_{timestamp}.txt",
70
+ "Plain text (URL list)",
71
+ "15 most recent videos from each channel.",
72
+ ["Track recent content", "Monitor channel activity"],
73
+ ),
74
+ ]
75
+
76
+ f.write("## Generated Files\n\n")
77
+ for name, fmt, desc, uses in files:
78
+ f.write(f"### `{name}`\n")
79
+ f.write(f"**Format:** {fmt}\n\n")
80
+ f.write(f"**Description:** {desc}\n\n")
81
+ f.write("**Use Cases:**\n")
82
+ for use in uses:
83
+ f.write(f"- {use}\n")
84
+ f.write("\n---\n\n")
85
+
86
+
87
+ def _write_metrics_table(f: Any) -> None:
88
+ """Write engagement metrics explanation table."""
89
+ f.write("## Engagement Metrics Explained\n\n")
90
+ f.write("| Metric | Formula | Interpretation |\n")
91
+ f.write("|--------|---------|----------------|\n")
92
+ f.write(
93
+ "| **Engagement Rate (Views)** "
94
+ "| `(likes + comments) / views x 100` "
95
+ "| Audience interaction |\n"
96
+ )
97
+ f.write(
98
+ "| **Engagement Rate (Subs)** "
99
+ "| `(likes + comments) / subscribers x 100` "
100
+ "| Relative to size |\n"
101
+ )
102
+ f.write("| **View Rate** | `views / subscribers x 100` | >100% = viral potential |\n")
103
+ f.write("| **Like Rate** | `likes / views x 100` | Viewer satisfaction |\n")
104
+ f.write("| **Comment Rate** | `comments / views x 100` | Discussion level |\n")
105
+ f.write("| **Views per Minute** | `views / (duration / 60)` | Content efficiency |\n\n")
106
+
107
+
108
+ def _write_channels_list(f: Any, channels_data: list[dict[str, Any]]) -> None:
109
+ """Write list of analyzed channels."""
110
+ f.write("## Channels Analyzed\n\n")
111
+ for i, cd in enumerate(channels_data, 1):
112
+ info = cd["channel"]
113
+ subs = format_number(info.get("subscriber_count"))
114
+ count = len(cd["videos"])
115
+ f.write(f"{i}. **{info['title']}** - {subs} subscribers ({count} videos analyzed)\n")