yt-metrics-cli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- src/__init__.py +3 -0
- src/analyzer.py +314 -0
- src/config.py +62 -0
- src/exporters/__init__.py +15 -0
- src/exporters/csv_exporter.py +55 -0
- src/exporters/readme_exporter.py +115 -0
- src/exporters/text_exporter.py +325 -0
- src/exporters/url_exporter.py +49 -0
- src/main.py +213 -0
- src/metrics.py +75 -0
- src/transcript.py +83 -0
- yt_metrics_cli-0.2.0.dist-info/METADATA +144 -0
- yt_metrics_cli-0.2.0.dist-info/RECORD +16 -0
- yt_metrics_cli-0.2.0.dist-info/WHEEL +4 -0
- yt_metrics_cli-0.2.0.dist-info/entry_points.txt +2 -0
- yt_metrics_cli-0.2.0.dist-info/licenses/LICENSE +21 -0
src/__init__.py
ADDED
src/analyzer.py
ADDED
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
"""YouTube channel analysis and API interaction."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import time
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import googleapiclient.discovery
|
|
8
|
+
from googleapiclient.errors import HttpError
|
|
9
|
+
|
|
10
|
+
from .config import get_settings
|
|
11
|
+
from .metrics import calculate_engagement_metrics
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
MAX_RETRIES = 3
|
|
16
|
+
BASE_DELAY = 1.0
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _execute_with_retry(request: Any) -> dict[str, Any]:
|
|
20
|
+
"""Execute an API request with exponential backoff on quota/rate errors."""
|
|
21
|
+
for attempt in range(MAX_RETRIES + 1):
|
|
22
|
+
try:
|
|
23
|
+
result: dict[str, Any] = request.execute()
|
|
24
|
+
return result
|
|
25
|
+
except HttpError as e:
|
|
26
|
+
if e.resp.status in (403, 429) and attempt < MAX_RETRIES:
|
|
27
|
+
delay = BASE_DELAY * (2 ** attempt)
|
|
28
|
+
logger.warning(
|
|
29
|
+
"API rate limit (HTTP %d), retry %d/%d in %.1fs...",
|
|
30
|
+
e.resp.status, attempt + 1, MAX_RETRIES, delay,
|
|
31
|
+
)
|
|
32
|
+
time.sleep(delay)
|
|
33
|
+
else:
|
|
34
|
+
raise
|
|
35
|
+
return {} # unreachable but satisfies mypy
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class YouTubeChannelAnalyzer:
|
|
39
|
+
def __init__(self, api_key: str | None = None) -> None:
|
|
40
|
+
settings = get_settings()
|
|
41
|
+
self.api_key = api_key or settings.youtube_api_key
|
|
42
|
+
if not self.api_key:
|
|
43
|
+
msg = (
|
|
44
|
+
"YouTube API key required. "
|
|
45
|
+
"Set YOUTUBE_API_KEY environment variable or pass it directly."
|
|
46
|
+
)
|
|
47
|
+
raise ValueError(msg)
|
|
48
|
+
|
|
49
|
+
self.youtube = googleapiclient.discovery.build(
|
|
50
|
+
"youtube", "v3", developerKey=self.api_key
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
def validate_api_key(self) -> bool:
|
|
54
|
+
"""Validate the YouTube API key by making a minimal API call."""
|
|
55
|
+
try:
|
|
56
|
+
request = self.youtube.i18nRegions().list(part="snippet")
|
|
57
|
+
request.execute()
|
|
58
|
+
return True
|
|
59
|
+
except HttpError as e:
|
|
60
|
+
error_content = e.content.decode("utf-8") if e.content else str(e)
|
|
61
|
+
if e.resp.status == 400:
|
|
62
|
+
self._handle_400_error(error_content, e)
|
|
63
|
+
elif e.resp.status == 403:
|
|
64
|
+
self._handle_403_error(error_content, e)
|
|
65
|
+
else:
|
|
66
|
+
msg = f"API validation failed with status {e.resp.status}: {error_content}"
|
|
67
|
+
raise RuntimeError(msg) from e
|
|
68
|
+
except (ValueError, RuntimeError):
|
|
69
|
+
raise
|
|
70
|
+
except Exception as e:
|
|
71
|
+
msg = f"Unexpected error validating API key: {e}"
|
|
72
|
+
raise RuntimeError(msg) from e
|
|
73
|
+
return False # unreachable but satisfies mypy
|
|
74
|
+
|
|
75
|
+
def get_channel_id_from_username(self, username: str) -> str:
|
|
76
|
+
"""Resolve channel ID from legacy username."""
|
|
77
|
+
request = self.youtube.channels().list(part="id", forUsername=username)
|
|
78
|
+
response = _execute_with_retry(request)
|
|
79
|
+
|
|
80
|
+
if "items" in response and len(response["items"]) > 0:
|
|
81
|
+
return str(response["items"][0]["id"])
|
|
82
|
+
msg = f"No channel found with username: {username}"
|
|
83
|
+
raise ValueError(msg)
|
|
84
|
+
|
|
85
|
+
def get_channel_id_from_custom_url(self, custom_url: str) -> str:
|
|
86
|
+
"""Resolve channel ID from custom URL (@handle)."""
|
|
87
|
+
if custom_url.startswith("@"):
|
|
88
|
+
custom_url = custom_url[1:]
|
|
89
|
+
|
|
90
|
+
request = self.youtube.search().list(
|
|
91
|
+
part="snippet", q=custom_url, type="channel", maxResults=1
|
|
92
|
+
)
|
|
93
|
+
response = _execute_with_retry(request)
|
|
94
|
+
|
|
95
|
+
if "items" in response and len(response["items"]) > 0:
|
|
96
|
+
return str(response["items"][0]["snippet"]["channelId"])
|
|
97
|
+
msg = f"No channel found with custom URL: @{custom_url}"
|
|
98
|
+
raise ValueError(msg)
|
|
99
|
+
|
|
100
|
+
def get_channel_info(self, channel_id: str) -> dict[str, Any]:
|
|
101
|
+
"""Get channel metadata and statistics."""
|
|
102
|
+
request = self.youtube.channels().list(part="snippet,statistics", id=channel_id)
|
|
103
|
+
response = _execute_with_retry(request)
|
|
104
|
+
|
|
105
|
+
if "items" not in response or len(response["items"]) == 0:
|
|
106
|
+
msg = f"No channel found with ID: {channel_id}"
|
|
107
|
+
raise ValueError(msg)
|
|
108
|
+
|
|
109
|
+
channel_item = response["items"][0]
|
|
110
|
+
return {
|
|
111
|
+
"id": channel_id,
|
|
112
|
+
"title": channel_item["snippet"]["title"],
|
|
113
|
+
"description": channel_item["snippet"]["description"],
|
|
114
|
+
"subscriber_count": channel_item["statistics"].get("subscriberCount"),
|
|
115
|
+
"video_count": channel_item["statistics"].get("videoCount"),
|
|
116
|
+
"view_count": channel_item["statistics"].get("viewCount"),
|
|
117
|
+
"thumbnail": channel_item["snippet"]["thumbnails"]["default"]["url"],
|
|
118
|
+
"url": f"https://www.youtube.com/channel/{channel_id}",
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
def get_channel_videos(
|
|
122
|
+
self,
|
|
123
|
+
channel_id: str | None = None,
|
|
124
|
+
username: str | None = None,
|
|
125
|
+
custom_url: str | None = None,
|
|
126
|
+
max_results: int = 50,
|
|
127
|
+
) -> tuple[dict[str, Any], list[dict[str, Any]]]:
|
|
128
|
+
"""Get videos from a channel with statistics."""
|
|
129
|
+
if channel_id is None and username is None and custom_url is None:
|
|
130
|
+
msg = "Must provide channel_id, username, or custom_url"
|
|
131
|
+
raise ValueError(msg)
|
|
132
|
+
|
|
133
|
+
if channel_id is None:
|
|
134
|
+
if username:
|
|
135
|
+
channel_id = self.get_channel_id_from_username(username)
|
|
136
|
+
elif custom_url:
|
|
137
|
+
channel_id = self.get_channel_id_from_custom_url(custom_url)
|
|
138
|
+
|
|
139
|
+
assert channel_id is not None # guaranteed by logic above
|
|
140
|
+
channel_info = self.get_channel_info(channel_id)
|
|
141
|
+
uploads_playlist_id = self._get_uploads_playlist(channel_id)
|
|
142
|
+
videos = self._fetch_playlist_videos(uploads_playlist_id, max_results)
|
|
143
|
+
videos_with_stats = self._get_videos_statistics(videos)
|
|
144
|
+
|
|
145
|
+
return channel_info, videos_with_stats
|
|
146
|
+
|
|
147
|
+
def get_multiple_channels_videos(
|
|
148
|
+
self,
|
|
149
|
+
channel_list: list[dict[str, Any]],
|
|
150
|
+
max_results_per_channel: int = 20,
|
|
151
|
+
) -> list[dict[str, Any]]:
|
|
152
|
+
"""Get videos from multiple channels with engagement metrics."""
|
|
153
|
+
all_channels_data: list[dict[str, Any]] = []
|
|
154
|
+
failed_channels: list[dict[str, Any]] = []
|
|
155
|
+
|
|
156
|
+
for channel in channel_list:
|
|
157
|
+
try:
|
|
158
|
+
channel_info, videos = self.get_channel_videos(
|
|
159
|
+
channel_id=channel.get("channel_id"),
|
|
160
|
+
username=channel.get("username"),
|
|
161
|
+
custom_url=channel.get("custom_url"),
|
|
162
|
+
max_results=max_results_per_channel,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
subscriber_count = int(channel_info.get("subscriber_count", 0) or 0)
|
|
166
|
+
videos_with_metrics = calculate_engagement_metrics(videos, subscriber_count)
|
|
167
|
+
|
|
168
|
+
all_channels_data.append({"channel": channel_info, "videos": videos_with_metrics})
|
|
169
|
+
logger.info("Retrieved %d videos from %s", len(videos), channel_info["title"])
|
|
170
|
+
|
|
171
|
+
except Exception as e:
|
|
172
|
+
failed_channels.append({"channel": channel, "error": str(e)})
|
|
173
|
+
logger.warning("Error retrieving channel %s: %s", channel, e)
|
|
174
|
+
|
|
175
|
+
if failed_channels:
|
|
176
|
+
logger.warning("Failed to retrieve %d channels", len(failed_channels))
|
|
177
|
+
|
|
178
|
+
return all_channels_data
|
|
179
|
+
|
|
180
|
+
# --- Private helpers ---
|
|
181
|
+
|
|
182
|
+
def _get_uploads_playlist(self, channel_id: str) -> str:
|
|
183
|
+
"""Get the uploads playlist ID for a channel."""
|
|
184
|
+
request = self.youtube.channels().list(part="contentDetails", id=channel_id)
|
|
185
|
+
response = _execute_with_retry(request)
|
|
186
|
+
|
|
187
|
+
if "items" not in response or len(response["items"]) == 0:
|
|
188
|
+
msg = f"No channel found with ID: {channel_id}"
|
|
189
|
+
raise ValueError(msg)
|
|
190
|
+
|
|
191
|
+
return str(
|
|
192
|
+
response["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
def _fetch_playlist_videos(
|
|
196
|
+
self, playlist_id: str, max_results: int
|
|
197
|
+
) -> list[dict[str, Any]]:
|
|
198
|
+
"""Fetch video IDs and metadata from a playlist."""
|
|
199
|
+
videos: list[dict[str, Any]] = []
|
|
200
|
+
next_page_token: str | None = None
|
|
201
|
+
|
|
202
|
+
while len(videos) < max_results:
|
|
203
|
+
request = self.youtube.playlistItems().list(
|
|
204
|
+
part="snippet,contentDetails",
|
|
205
|
+
playlistId=playlist_id,
|
|
206
|
+
maxResults=min(50, max_results - len(videos)),
|
|
207
|
+
pageToken=next_page_token,
|
|
208
|
+
)
|
|
209
|
+
response = _execute_with_retry(request)
|
|
210
|
+
|
|
211
|
+
for item in response["items"]:
|
|
212
|
+
video_id = item["contentDetails"]["videoId"]
|
|
213
|
+
videos.append(
|
|
214
|
+
{
|
|
215
|
+
"id": video_id,
|
|
216
|
+
"title": item["snippet"]["title"],
|
|
217
|
+
"published_at": item["snippet"]["publishedAt"],
|
|
218
|
+
"url": f"https://www.youtube.com/watch?v={video_id}",
|
|
219
|
+
}
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
next_page_token = response.get("nextPageToken")
|
|
223
|
+
if not next_page_token or len(videos) >= max_results:
|
|
224
|
+
break
|
|
225
|
+
|
|
226
|
+
return videos
|
|
227
|
+
|
|
228
|
+
def _get_videos_statistics(
|
|
229
|
+
self, videos: list[dict[str, Any]]
|
|
230
|
+
) -> list[dict[str, Any]]:
|
|
231
|
+
"""Fetch detailed statistics for videos in batches."""
|
|
232
|
+
if not videos:
|
|
233
|
+
return videos
|
|
234
|
+
|
|
235
|
+
settings = get_settings()
|
|
236
|
+
videos_with_stats: list[dict[str, Any]] = []
|
|
237
|
+
|
|
238
|
+
for i in range(0, len(videos), settings.api_batch_size):
|
|
239
|
+
batch = videos[i : i + settings.api_batch_size]
|
|
240
|
+
video_ids = [video["id"] for video in batch]
|
|
241
|
+
|
|
242
|
+
request = self.youtube.videos().list(
|
|
243
|
+
part="statistics,contentDetails", id=",".join(video_ids)
|
|
244
|
+
)
|
|
245
|
+
response = _execute_with_retry(request)
|
|
246
|
+
|
|
247
|
+
stats_map: dict[str, dict[str, Any]] = {}
|
|
248
|
+
for item in response.get("items", []):
|
|
249
|
+
stats_map[item["id"]] = {
|
|
250
|
+
"statistics": item.get("statistics", {}),
|
|
251
|
+
"contentDetails": item.get("contentDetails", {}),
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
for video in batch:
|
|
255
|
+
vid = video["id"]
|
|
256
|
+
if vid in stats_map:
|
|
257
|
+
stats = stats_map[vid]["statistics"]
|
|
258
|
+
content = stats_map[vid]["contentDetails"]
|
|
259
|
+
video.update(
|
|
260
|
+
{
|
|
261
|
+
"view_count": int(stats.get("viewCount", 0)),
|
|
262
|
+
"like_count": int(stats.get("likeCount", 0)),
|
|
263
|
+
"comment_count": int(stats.get("commentCount", 0)),
|
|
264
|
+
"duration": content.get("duration", "PT0S"),
|
|
265
|
+
}
|
|
266
|
+
)
|
|
267
|
+
else:
|
|
268
|
+
video.update(
|
|
269
|
+
{
|
|
270
|
+
"view_count": 0,
|
|
271
|
+
"like_count": 0,
|
|
272
|
+
"comment_count": 0,
|
|
273
|
+
"duration": "PT0S",
|
|
274
|
+
}
|
|
275
|
+
)
|
|
276
|
+
videos_with_stats.append(video)
|
|
277
|
+
|
|
278
|
+
return videos_with_stats
|
|
279
|
+
|
|
280
|
+
@staticmethod
|
|
281
|
+
def _handle_400_error(error_content: str, original: HttpError) -> None:
|
|
282
|
+
"""Handle HTTP 400 errors from the YouTube API."""
|
|
283
|
+
if "API key not valid" in error_content or "invalid" in error_content.lower():
|
|
284
|
+
msg = (
|
|
285
|
+
"Invalid YouTube API key. Check your YOUTUBE_API_KEY in .env file.\n"
|
|
286
|
+
"Get a valid key: https://console.cloud.google.com/apis/credentials"
|
|
287
|
+
)
|
|
288
|
+
raise ValueError(msg) from original
|
|
289
|
+
if "API key expired" in error_content:
|
|
290
|
+
msg = (
|
|
291
|
+
"YouTube API key has expired. Generate a new key at:\n"
|
|
292
|
+
"https://console.cloud.google.com/apis/credentials"
|
|
293
|
+
)
|
|
294
|
+
raise ValueError(msg) from original
|
|
295
|
+
msg = f"API key validation failed: {error_content}"
|
|
296
|
+
raise ValueError(msg) from original
|
|
297
|
+
|
|
298
|
+
@staticmethod
|
|
299
|
+
def _handle_403_error(error_content: str, original: HttpError) -> None:
|
|
300
|
+
"""Handle HTTP 403 errors from the YouTube API."""
|
|
301
|
+
if "quotaExceeded" in error_content:
|
|
302
|
+
msg = (
|
|
303
|
+
"YouTube API quota exceeded. Daily limit reached.\n"
|
|
304
|
+
"Quota resets at midnight Pacific Time. Try again later."
|
|
305
|
+
)
|
|
306
|
+
raise ValueError(msg) from original
|
|
307
|
+
if "accessNotConfigured" in error_content:
|
|
308
|
+
msg = (
|
|
309
|
+
"YouTube Data API v3 is not enabled for this key.\n"
|
|
310
|
+
"Enable: https://console.cloud.google.com/apis/library/youtube.googleapis.com"
|
|
311
|
+
)
|
|
312
|
+
raise ValueError(msg) from original
|
|
313
|
+
msg = f"API access forbidden: {error_content}"
|
|
314
|
+
raise ValueError(msg) from original
|
src/config.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Configuration via environment variables."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from pydantic_settings import BaseSettings
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Settings(BaseSettings):
|
|
9
|
+
"""Application settings loaded from environment variables."""
|
|
10
|
+
|
|
11
|
+
# API credentials (empty = not set, validated per command)
|
|
12
|
+
youtube_api_key: str = ""
|
|
13
|
+
|
|
14
|
+
# API settings
|
|
15
|
+
max_results_per_channel: int = 50
|
|
16
|
+
api_batch_size: int = 50
|
|
17
|
+
|
|
18
|
+
# Output
|
|
19
|
+
output_dir: Path = Path("./output")
|
|
20
|
+
|
|
21
|
+
# Transcript
|
|
22
|
+
video_id: str = "tLkRAqmAEtE"
|
|
23
|
+
youtube_transcript_fixtures_dir: str = ""
|
|
24
|
+
transcript_languages: list[str] = ["es", "en"] # noqa: B006
|
|
25
|
+
|
|
26
|
+
# Channels config
|
|
27
|
+
channels_file: Path = Path("channels.yml")
|
|
28
|
+
|
|
29
|
+
# Metric thresholds (for performance classification)
|
|
30
|
+
high_performance_multiplier: float = 1.5
|
|
31
|
+
low_performance_multiplier: float = 0.5
|
|
32
|
+
|
|
33
|
+
# Duration thresholds (seconds)
|
|
34
|
+
short_video_max: int = 300
|
|
35
|
+
long_video_min: int = 900
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
_settings: Settings | None = None
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def get_settings() -> Settings:
|
|
42
|
+
"""Load and cache settings from environment."""
|
|
43
|
+
global _settings # noqa: PLW0603
|
|
44
|
+
if _settings is None:
|
|
45
|
+
_settings = Settings()
|
|
46
|
+
return _settings
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def reset_settings() -> None:
|
|
50
|
+
"""Reset cached settings (useful for testing)."""
|
|
51
|
+
global _settings # noqa: PLW0603
|
|
52
|
+
_settings = None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def format_number(value: object) -> str:
|
|
56
|
+
"""Format number with thousand separators or return 'N/A'."""
|
|
57
|
+
if value is None:
|
|
58
|
+
return "N/A"
|
|
59
|
+
try:
|
|
60
|
+
return f"{int(str(value)):,}"
|
|
61
|
+
except (ValueError, TypeError):
|
|
62
|
+
return "N/A"
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Report generators for yt-metrics-cli."""
|
|
2
|
+
|
|
3
|
+
from src.exporters.csv_exporter import export_to_csv
|
|
4
|
+
from src.exporters.readme_exporter import export_output_readme
|
|
5
|
+
from src.exporters.text_exporter import export_channel_stats, export_engagement_trends_report
|
|
6
|
+
from src.exporters.url_exporter import export_best_videos_report, export_latest_videos_report
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"export_best_videos_report",
|
|
10
|
+
"export_channel_stats",
|
|
11
|
+
"export_engagement_trends_report",
|
|
12
|
+
"export_latest_videos_report",
|
|
13
|
+
"export_output_readme",
|
|
14
|
+
"export_to_csv",
|
|
15
|
+
]
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""CSV export for channel and video data."""
|
|
2
|
+
|
|
3
|
+
import csv
|
|
4
|
+
import logging
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def export_to_csv(channels_data: list[dict[str, Any]], filename: str | Path) -> None:
|
|
12
|
+
"""Export channel and video data to CSV."""
|
|
13
|
+
if not channels_data:
|
|
14
|
+
logger.warning("No data to export.")
|
|
15
|
+
return
|
|
16
|
+
|
|
17
|
+
with open(filename, "w", newline="", encoding="utf-8") as csvfile:
|
|
18
|
+
writer = csv.writer(csvfile)
|
|
19
|
+
|
|
20
|
+
writer.writerow([
|
|
21
|
+
"Channel", "Subscribers", "Video Title", "Published Date", "Video URL",
|
|
22
|
+
"Views", "Likes", "Comments", "Duration (seconds)",
|
|
23
|
+
"Engagement Rate (Views %)", "Engagement Rate (Subscribers %)",
|
|
24
|
+
"View Rate (%)", "Like Rate (%)", "Comment Rate (%)", "Views per Minute",
|
|
25
|
+
])
|
|
26
|
+
|
|
27
|
+
for channel_data in channels_data:
|
|
28
|
+
channel_info = channel_data["channel"]
|
|
29
|
+
channel_name = channel_info["title"]
|
|
30
|
+
subscriber_count = channel_info.get("subscriber_count", "N/A")
|
|
31
|
+
|
|
32
|
+
for video in channel_data["videos"]:
|
|
33
|
+
writer.writerow([
|
|
34
|
+
channel_name,
|
|
35
|
+
subscriber_count,
|
|
36
|
+
video["title"],
|
|
37
|
+
video["published_at"],
|
|
38
|
+
video["url"],
|
|
39
|
+
video.get("view_count", 0),
|
|
40
|
+
video.get("like_count", 0),
|
|
41
|
+
video.get("comment_count", 0),
|
|
42
|
+
video.get("duration_seconds", 0),
|
|
43
|
+
video.get("engagement_rate_views", 0),
|
|
44
|
+
video.get("engagement_rate_subscribers", 0),
|
|
45
|
+
video.get("view_rate", 0),
|
|
46
|
+
video.get("like_rate", 0),
|
|
47
|
+
video.get("comment_rate", 0),
|
|
48
|
+
video.get("views_per_minute", 0),
|
|
49
|
+
])
|
|
50
|
+
|
|
51
|
+
total_videos = sum(len(cd["videos"]) for cd in channels_data)
|
|
52
|
+
logger.info(
|
|
53
|
+
"Exported %d videos from %d channels to %s",
|
|
54
|
+
total_videos, len(channels_data), filename,
|
|
55
|
+
)
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""README generator for output directories."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from src.config import format_number
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def export_output_readme(
|
|
14
|
+
output_dir: Path, timestamp: str, channels_data: list[dict[str, Any]]
|
|
15
|
+
) -> None:
|
|
16
|
+
"""Generate README.md in output directory explaining all generated files."""
|
|
17
|
+
readme_path = output_dir / "README.md"
|
|
18
|
+
total_channels = len(channels_data)
|
|
19
|
+
total_videos = sum(len(cd["videos"]) for cd in channels_data)
|
|
20
|
+
avg_videos = total_videos / total_channels if total_channels > 0 else 0
|
|
21
|
+
|
|
22
|
+
with open(readme_path, "w", encoding="utf-8") as f:
|
|
23
|
+
f.write(f"# YouTube Analysis Report - {timestamp}\n\n")
|
|
24
|
+
f.write(f"Generated on: {datetime.now().strftime('%Y-%m-%d at %H:%M:%S')}\n\n")
|
|
25
|
+
|
|
26
|
+
f.write("## Analysis Summary\n\n")
|
|
27
|
+
f.write(f"- **Channels Analyzed:** {total_channels}\n")
|
|
28
|
+
f.write(f"- **Total Videos:** {total_videos}\n")
|
|
29
|
+
f.write(f"- **Average Videos per Channel:** {avg_videos:.1f}\n\n")
|
|
30
|
+
|
|
31
|
+
_write_file_descriptions(f, timestamp)
|
|
32
|
+
_write_metrics_table(f)
|
|
33
|
+
_write_channels_list(f, channels_data)
|
|
34
|
+
|
|
35
|
+
f.write("\n---\n\n")
|
|
36
|
+
f.write("*Generated by YT Metrics CLI*\n")
|
|
37
|
+
|
|
38
|
+
logger.info("Generated README.md in output directory: %s", readme_path)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _write_file_descriptions(f: Any, timestamp: str) -> None:
|
|
42
|
+
"""Write descriptions for each generated file."""
|
|
43
|
+
files = [
|
|
44
|
+
(
|
|
45
|
+
f"youtube_channels_videos_{timestamp}.csv",
|
|
46
|
+
"CSV (Comma-Separated Values)",
|
|
47
|
+
"Raw data export with complete metrics for all videos.",
|
|
48
|
+
["Import into Excel/Google Sheets", "Data visualization with BI tools"],
|
|
49
|
+
),
|
|
50
|
+
(
|
|
51
|
+
f"youtube_channel_stats_{timestamp}.txt",
|
|
52
|
+
"Plain text report",
|
|
53
|
+
"Per-channel statistics, top videos, and performance distribution.",
|
|
54
|
+
["Quick overview of channel performance", "Identify content patterns"],
|
|
55
|
+
),
|
|
56
|
+
(
|
|
57
|
+
f"youtube_engagement_trends_{timestamp}.txt",
|
|
58
|
+
"Plain text report",
|
|
59
|
+
"Cross-channel comparison, rankings, and trend analysis.",
|
|
60
|
+
["Compare channels", "Discover viral content patterns"],
|
|
61
|
+
),
|
|
62
|
+
(
|
|
63
|
+
f"youtube_best_videos_{timestamp}.txt",
|
|
64
|
+
"Plain text (URL list)",
|
|
65
|
+
"Top 15 videos with highest engagement rate from each channel.",
|
|
66
|
+
["Quick access to best content", "Create playlists"],
|
|
67
|
+
),
|
|
68
|
+
(
|
|
69
|
+
f"youtube_latest_videos_{timestamp}.txt",
|
|
70
|
+
"Plain text (URL list)",
|
|
71
|
+
"15 most recent videos from each channel.",
|
|
72
|
+
["Track recent content", "Monitor channel activity"],
|
|
73
|
+
),
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
f.write("## Generated Files\n\n")
|
|
77
|
+
for name, fmt, desc, uses in files:
|
|
78
|
+
f.write(f"### `{name}`\n")
|
|
79
|
+
f.write(f"**Format:** {fmt}\n\n")
|
|
80
|
+
f.write(f"**Description:** {desc}\n\n")
|
|
81
|
+
f.write("**Use Cases:**\n")
|
|
82
|
+
for use in uses:
|
|
83
|
+
f.write(f"- {use}\n")
|
|
84
|
+
f.write("\n---\n\n")
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _write_metrics_table(f: Any) -> None:
|
|
88
|
+
"""Write engagement metrics explanation table."""
|
|
89
|
+
f.write("## Engagement Metrics Explained\n\n")
|
|
90
|
+
f.write("| Metric | Formula | Interpretation |\n")
|
|
91
|
+
f.write("|--------|---------|----------------|\n")
|
|
92
|
+
f.write(
|
|
93
|
+
"| **Engagement Rate (Views)** "
|
|
94
|
+
"| `(likes + comments) / views x 100` "
|
|
95
|
+
"| Audience interaction |\n"
|
|
96
|
+
)
|
|
97
|
+
f.write(
|
|
98
|
+
"| **Engagement Rate (Subs)** "
|
|
99
|
+
"| `(likes + comments) / subscribers x 100` "
|
|
100
|
+
"| Relative to size |\n"
|
|
101
|
+
)
|
|
102
|
+
f.write("| **View Rate** | `views / subscribers x 100` | >100% = viral potential |\n")
|
|
103
|
+
f.write("| **Like Rate** | `likes / views x 100` | Viewer satisfaction |\n")
|
|
104
|
+
f.write("| **Comment Rate** | `comments / views x 100` | Discussion level |\n")
|
|
105
|
+
f.write("| **Views per Minute** | `views / (duration / 60)` | Content efficiency |\n\n")
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _write_channels_list(f: Any, channels_data: list[dict[str, Any]]) -> None:
|
|
109
|
+
"""Write list of analyzed channels."""
|
|
110
|
+
f.write("## Channels Analyzed\n\n")
|
|
111
|
+
for i, cd in enumerate(channels_data, 1):
|
|
112
|
+
info = cd["channel"]
|
|
113
|
+
subs = format_number(info.get("subscriber_count"))
|
|
114
|
+
count = len(cd["videos"])
|
|
115
|
+
f.write(f"{i}. **{info['title']}** - {subs} subscribers ({count} videos analyzed)\n")
|