supadata 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,60 @@
1
+ # Python
2
+ **/__pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ **/build/
8
+ develop-eggs/
9
+ **/dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ **/*.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+ MANIFEST
23
+
24
+ # Virtual Environment
25
+ .env
26
+ **/.venv/
27
+ **/env/
28
+ **/venv/
29
+ ENV/
30
+ env.bak/
31
+ venv.bak/
32
+
33
+ # IDE
34
+ **/.idea/
35
+ **/.vscode/
36
+ *.swp
37
+ *.swo
38
+ .DS_Store
39
+
40
+ # Testing
41
+ .coverage
42
+ **/.pytest_cache/
43
+ **/htmlcov/
44
+ **/.tox/
45
+ **/.nox/
46
+ coverage.xml
47
+ *.cover
48
+ *.py,cover
49
+ **/.hypothesis/
50
+
51
+ # Distribution / packaging
52
+ .Python
53
+ *.manifest
54
+ *.spec
55
+
56
+ # Jupyter Notebook
57
+ **/.ipynb_checkpoints
58
+
59
+ # Logs
60
+ *.log
supadata-1.0.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Supadata
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,97 @@
1
+ Metadata-Version: 2.4
2
+ Name: supadata
3
+ Version: 1.0.0
4
+ Summary: The official Python SDK for Supadata - scrape web content and YouTube transcripts with ease
5
+ Project-URL: homepage, https://supadata.ai
6
+ Project-URL: repository, https://github.com/supadata/supadata-py
7
+ Project-URL: documentation, https://supadata.ai/documentation
8
+ Author-email: Supadata <support@supadata.ai>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: ai,api,llm,supadata,transcripts,web-scraping,youtube
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Requires-Python: >=3.7
16
+ Requires-Dist: requests>=2.28.1
17
+ Provides-Extra: test
18
+ Requires-Dist: pytest>=7.0.0; extra == 'test'
19
+ Requires-Dist: requests-mock>=1.11.0; extra == 'test'
20
+ Description-Content-Type: text/markdown
21
+
22
+ # Supadata Python SDK
23
+
24
+ [![PyPI version](https://badge.fury.io/py/supadata.svg)](https://badge.fury.io/py/supadata)
25
+ [![MIT license](https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat)](http://opensource.org/licenses/MIT)
26
+
27
+ The official Python SDK for Supadata.
28
+
29
+ Get your free API key at [supadata.ai](https://supadata.ai) and start scraping data in minutes.
30
+
31
+ ## Installation
32
+
33
+ ```bash
34
+ pip install supadata
35
+ ```
36
+
37
+ ## Usage
38
+
39
+ ```python
40
+ from supadata import Supadata
41
+
42
+ # Initialize the client
43
+ client = Supadata(api_key="YOUR_API_KEY")
44
+
45
+ # Get YouTube transcript
46
+ transcript = client.get_transcript(video_id="VIDEO_ID")
47
+ print(f"Got transcript in {transcript['lang']}")
48
+
49
+ # Translate YouTube transcript to Spanish
50
+ translated = client.translate_transcript(
51
+ video_id="VIDEO_ID",
52
+ lang="es"
53
+ )
54
+ print(f"Got translated transcript in {translated['lang']}")
55
+
56
+ # Get plain text transcript
57
+ text_transcript = client.get_transcript(
58
+ video_id="VIDEO_ID",
59
+ text=True
60
+ )
61
+ print(text_transcript['content'])
62
+
63
+ # Scrape web content
64
+ web_content = client.scrape("https://supadata.ai")
65
+ print(f"Page title: {web_content['name']}")
66
+ print(f"Content length: {web_content['countCharacters']} characters")
67
+
68
+ # Map website URLs
69
+ site_map = client.map("https://supadata.ai")
70
+ print(f"Found {len(site_map['urls'])} URLs")
71
+ ```
72
+
73
+ ## Error Handling
74
+
75
+ The SDK uses the standard `requests` library and will raise `requests.exceptions.RequestException` for API-related errors:
76
+
77
+ ```python
78
+ from requests.exceptions import RequestException
79
+
80
+ try:
81
+ transcript = client.get_transcript(video_id="INVALID_ID")
82
+ except RequestException as error:
83
+ print(f"API request failed: {error}")
84
+ if error.response is not None:
85
+ error_data = error.response.json()
86
+ print(f"Error code: {error_data.get('code')}")
87
+ print(f"Error title: {error_data.get('title')}")
88
+ print(f"Error description: {error_data.get('description')}")
89
+ ```
90
+
91
+ ## API Reference
92
+
93
+ See the [Documentation](https://supadata.ai/documentation) for more details on all possible parameters and options.
94
+
95
+ ## License
96
+
97
+ MIT
@@ -0,0 +1,76 @@
1
+ # Supadata Python SDK
2
+
3
+ [![PyPI version](https://badge.fury.io/py/supadata.svg)](https://badge.fury.io/py/supadata)
4
+ [![MIT license](https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat)](http://opensource.org/licenses/MIT)
5
+
6
+ The official Python SDK for Supadata.
7
+
8
+ Get your free API key at [supadata.ai](https://supadata.ai) and start scraping data in minutes.
9
+
10
+ ## Installation
11
+
12
+ ```bash
13
+ pip install supadata
14
+ ```
15
+
16
+ ## Usage
17
+
18
+ ```python
19
+ from supadata import Supadata
20
+
21
+ # Initialize the client
22
+ client = Supadata(api_key="YOUR_API_KEY")
23
+
24
+ # Get YouTube transcript
25
+ transcript = client.get_transcript(video_id="VIDEO_ID")
26
+ print(f"Got transcript in {transcript['lang']}")
27
+
28
+ # Translate YouTube transcript to Spanish
29
+ translated = client.translate_transcript(
30
+ video_id="VIDEO_ID",
31
+ lang="es"
32
+ )
33
+ print(f"Got translated transcript in {translated['lang']}")
34
+
35
+ # Get plain text transcript
36
+ text_transcript = client.get_transcript(
37
+ video_id="VIDEO_ID",
38
+ text=True
39
+ )
40
+ print(text_transcript['content'])
41
+
42
+ # Scrape web content
43
+ web_content = client.scrape("https://supadata.ai")
44
+ print(f"Page title: {web_content['name']}")
45
+ print(f"Content length: {web_content['countCharacters']} characters")
46
+
47
+ # Map website URLs
48
+ site_map = client.map("https://supadata.ai")
49
+ print(f"Found {len(site_map['urls'])} URLs")
50
+ ```
51
+
52
+ ## Error Handling
53
+
54
+ The SDK uses the standard `requests` library and will raise `requests.exceptions.RequestException` for API-related errors:
55
+
56
+ ```python
57
+ from requests.exceptions import RequestException
58
+
59
+ try:
60
+ transcript = client.get_transcript(video_id="INVALID_ID")
61
+ except RequestException as error:
62
+ print(f"API request failed: {error}")
63
+ if error.response is not None:
64
+ error_data = error.response.json()
65
+ print(f"Error code: {error_data.get('code')}")
66
+ print(f"Error title: {error_data.get('title')}")
67
+ print(f"Error description: {error_data.get('description')}")
68
+ ```
69
+
70
+ ## API Reference
71
+
72
+ See the [Documentation](https://supadata.ai/documentation) for more details on all possible parameters and options.
73
+
74
+ ## License
75
+
76
+ MIT
@@ -0,0 +1,30 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "supadata"
7
+ version = "1.0.0"
8
+ authors = [{ name = "Supadata", email = "support@supadata.ai" }]
9
+ dependencies = ["requests >= 2.28.1"]
10
+ description = "The official Python SDK for Supadata - scrape web content and YouTube transcripts with ease"
11
+ readme = "README.md"
12
+ requires-python = ">=3.7"
13
+ license = "MIT"
14
+ keywords = ["supadata", "web-scraping", "youtube", "transcripts", "api", "llm", "ai"]
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Operating System :: OS Independent",
19
+ ]
20
+
21
+ [project.urls]
22
+ homepage = "https://supadata.ai"
23
+ repository = "https://github.com/supadata/supadata-py"
24
+ documentation = "https://supadata.ai/documentation"
25
+
26
+ [project.optional-dependencies]
27
+ test = [
28
+ "pytest >= 7.0.0",
29
+ "requests-mock >= 1.11.0",
30
+ ]
@@ -0,0 +1,26 @@
1
+ """
2
+ Supadata Python SDK
3
+
4
+ The official Python SDK for Supadata - scrape web and YouTube content with ease.
5
+ """
6
+ from importlib.metadata import version
7
+ from supadata.client import Supadata
8
+ from supadata.types import (
9
+ Transcript,
10
+ TranslatedTranscript,
11
+ TranscriptChunk,
12
+ Scrape,
13
+ Map,
14
+ Error,
15
+ )
16
+
17
+ __version__ = version("supadata")
18
+ __all__ = [
19
+ "Supadata",
20
+ "Transcript",
21
+ "TranslatedTranscript",
22
+ "TranscriptChunk",
23
+ "Scrape",
24
+ "Map",
25
+ "Error",
26
+ ]
@@ -0,0 +1,164 @@
1
+ """Main Supadata client implementation."""
2
+
3
+ from typing import Dict, Any
4
+ import requests
5
+ from dataclasses import asdict
6
+
7
+ from .types import (
8
+ Transcript,
9
+ TranslatedTranscript,
10
+ TranscriptChunk,
11
+ Scrape,
12
+ Map,
13
+ Error,
14
+ )
15
+
16
+
17
+ class Supadata:
18
+ """Main Supadata client."""
19
+
20
+ def __init__(self, api_key: str, base_url: str = "https://api.supadata.ai/v1"):
21
+ """Initialize Supadata client.
22
+
23
+ Args:
24
+ api_key: Your Supadata API key
25
+ base_url: Optional custom API base URL
26
+ """
27
+ self.base_url = base_url
28
+ self.session = requests.Session()
29
+ self.session.headers.update({
30
+ "x-api-key": api_key,
31
+ "Accept": "application/json"
32
+ })
33
+
34
+ def get_transcript(self, video_id: str, text: bool = False) -> Transcript:
35
+ """Get transcript for a YouTube video.
36
+
37
+ Args:
38
+ video_id: YouTube video ID
39
+ text: Whether to return plain text instead of segments
40
+
41
+ Returns:
42
+ Transcript object containing content, language and available languages
43
+
44
+ Raises:
45
+ requests.exceptions.RequestException: If the API request fails
46
+ """
47
+ response = self._request("GET", "/youtube/transcript", params={
48
+ "videoId": video_id,
49
+ "text": text
50
+ })
51
+
52
+ # Convert chunks if present
53
+ if not text and isinstance(response["content"], list):
54
+ response["content"] = [
55
+ TranscriptChunk(**chunk) for chunk in response["content"]
56
+ ]
57
+
58
+ return Transcript(**response)
59
+
60
+ def translate_transcript(
61
+ self,
62
+ video_id: str,
63
+ lang: str,
64
+ text: bool = False
65
+ ) -> TranslatedTranscript:
66
+ """Get translated transcript for a YouTube video.
67
+
68
+ Args:
69
+ video_id: YouTube video ID
70
+ lang: Target language code (e.g., 'es' for Spanish)
71
+ text: Whether to return plain text instead of segments
72
+
73
+ Returns:
74
+ TranslatedTranscript object containing translated content
75
+
76
+ Raises:
77
+ requests.exceptions.RequestException: If the API request fails
78
+ """
79
+ response = self._request("GET", "/youtube/transcript/translate", params={
80
+ "videoId": video_id,
81
+ "lang": lang,
82
+ "text": text
83
+ })
84
+
85
+ # Convert chunks if present
86
+ if not text and isinstance(response["content"], list):
87
+ response["content"] = [
88
+ TranscriptChunk(**chunk) for chunk in response["content"]
89
+ ]
90
+
91
+ return TranslatedTranscript(**response)
92
+
93
+ def scrape(self, url: str) -> Scrape:
94
+ """Scrape content from a web page.
95
+
96
+ Args:
97
+ url: URL to scrape
98
+
99
+ Returns:
100
+ Scrape object containing the extracted content
101
+
102
+ Raises:
103
+ requests.exceptions.RequestException: If the API request fails
104
+ """
105
+ response = self._request("GET", "/web/scrape", params={"url": url})
106
+ return Scrape(**response)
107
+
108
+ def map(self, url: str) -> Map:
109
+ """Generate a site map for a website.
110
+
111
+ Args:
112
+ url: Base URL to map
113
+
114
+ Returns:
115
+ Map object containing discovered URLs
116
+
117
+ Raises:
118
+ requests.exceptions.RequestException: If the API request fails
119
+ """
120
+ response = self._request("GET", "/web/map", params={"url": url})
121
+ return Map(**response)
122
+
123
+ def _camel_to_snake(self, d: Dict[str, Any]) -> Dict[str, Any]:
124
+ """Convert dictionary keys from camelCase to snake_case."""
125
+ import re
126
+ def convert(name: str) -> str:
127
+ name = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
128
+ return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower()
129
+
130
+ if isinstance(d, dict):
131
+ return {convert(k): self._camel_to_snake(v) for k, v in d.items()}
132
+ if isinstance(d, list):
133
+ return [self._camel_to_snake(i) for i in d]
134
+ return d
135
+
136
+ def _request(self, method: str, path: str, **kwargs: Dict[str, Any]) -> Dict[str, Any]:
137
+ """Make an HTTP request to the Supadata API.
138
+
139
+ Args:
140
+ method: HTTP method
141
+ path: API endpoint path
142
+ **kwargs: Additional arguments to pass to requests
143
+
144
+ Returns:
145
+ dict: Parsed JSON response
146
+
147
+ Raises:
148
+ requests.exceptions.RequestException: If the API request fails
149
+ """
150
+ url = f"{self.base_url}{path}"
151
+ response = self.session.request(method, url, **kwargs)
152
+
153
+ try:
154
+ response.raise_for_status()
155
+ return self._camel_to_snake(response.json())
156
+ except requests.exceptions.HTTPError as e:
157
+ if e.response is not None:
158
+ try:
159
+ error_data = self._camel_to_snake(e.response.json())
160
+ error = Error(**error_data)
161
+ raise requests.exceptions.HTTPError(error) from e
162
+ except (ValueError, TypeError):
163
+ pass
164
+ raise
@@ -0,0 +1,31 @@
1
+ """Custom exceptions for Supadata SDK."""
2
+
3
+ from typing import Optional
4
+ from dataclasses import dataclass
5
+
6
+
7
+ @dataclass
8
+ class SupadataError(Exception):
9
+ """Base exception for all Supadata errors.
10
+
11
+ Attributes:
12
+ code: Error code identifying the type of error (e.g., 'video-not-found')
13
+ title: Human readable error title
14
+ description: Detailed error description
15
+ documentation_url: URL to error documentation
16
+ """
17
+ code: str
18
+ title: str
19
+ description: str
20
+ documentation_url: Optional[str] = None
21
+
22
+ def __str__(self) -> str:
23
+ """Return string representation of the error."""
24
+ parts = [self.description]
25
+ if self.code:
26
+ parts.append(f"Code: {self.code}")
27
+ if self.title:
28
+ parts.append(f"Title: {self.title}")
29
+ if self.documentation_url:
30
+ parts.append(f"Documentation: {self.documentation_url}")
31
+ return " | ".join(parts)
@@ -0,0 +1,95 @@
1
+ """Type definitions for Supadata API responses."""
2
+
3
+ from typing import List, Optional, Union
4
+ from dataclasses import dataclass
5
+
6
+
7
+ @dataclass
8
+ class TranscriptChunk:
9
+ """A chunk of a video transcript.
10
+
11
+ Attributes:
12
+ text: Transcript segment text
13
+ offset: Start time in milliseconds
14
+ duration: Duration in milliseconds
15
+ lang: ISO 639-1 language code of chunk
16
+ """
17
+ text: str
18
+ offset: int
19
+ duration: int
20
+ lang: str
21
+
22
+
23
+ @dataclass
24
+ class Transcript:
25
+ """A complete video transcript.
26
+
27
+ Attributes:
28
+ content: List of transcript chunks or plain text when text=true
29
+ lang: ISO 639-1 language code of transcript
30
+ available_langs: List of available language codes
31
+ """
32
+ content: Union[List[TranscriptChunk], str]
33
+ lang: str
34
+ available_langs: List[str]
35
+
36
+
37
+ @dataclass
38
+ class TranslatedTranscript:
39
+ """A translated video transcript.
40
+
41
+ Attributes:
42
+ content: List of transcript chunks or plain text when text=true
43
+ lang: ISO 639-1 language code of translation
44
+ """
45
+ content: Union[List[TranscriptChunk], str]
46
+ lang: str
47
+
48
+
49
+ @dataclass
50
+ class Scrape:
51
+ """Scraped web content.
52
+
53
+ Attributes:
54
+ url: The URL that was scraped
55
+ content: The Markdown content extracted from the URL
56
+ name: The name of the webpage
57
+ description: A description of the webpage
58
+ og_url: Open Graph URL for the webpage
59
+ count_characters: The number of characters in the content
60
+ urls: List of URLs found on the webpage
61
+ """
62
+ url: str
63
+ content: str
64
+ name: str
65
+ description: str
66
+ og_url: Optional[str]
67
+ count_characters: int
68
+ urls: List[str]
69
+
70
+
71
+ @dataclass
72
+ class Map:
73
+ """A site map containing URLs.
74
+
75
+ Attributes:
76
+ urls: List of URLs found on the webpage
77
+ """
78
+ urls: List[str]
79
+
80
+
81
+ @dataclass
82
+ class Error:
83
+ """Standard error response format.
84
+
85
+ Attributes:
86
+ code: Error code identifying the type of error
87
+ title: Human readable error title
88
+ description: Detailed error description
89
+ documentation_url: URL to error documentation
90
+ """
91
+ code: str
92
+ title: str
93
+ description: str
94
+ documentation_url: Optional[str]
95
+
@@ -0,0 +1 @@
1
+ """Tests for the Supadata package."""
@@ -0,0 +1,180 @@
1
+ """Tests for the Supadata client."""
2
+
3
+ import pytest
4
+ import requests
5
+ from requests import Response
6
+
7
+ from supadata import (
8
+ Supadata,
9
+ Transcript,
10
+ TranslatedTranscript,
11
+ TranscriptChunk,
12
+ Scrape,
13
+ Map,
14
+ Error,
15
+ )
16
+
17
+
18
+ @pytest.fixture
19
+ def api_key() -> str:
20
+ """Return a dummy API key for testing."""
21
+ return "test_api_key"
22
+
23
+
24
+ @pytest.fixture
25
+ def base_url() -> str:
26
+ """Return a dummy base URL for testing."""
27
+ return "https://api.test.com/v1"
28
+
29
+
30
+ @pytest.fixture
31
+ def client(api_key: str, base_url: str) -> Supadata:
32
+ """Return a configured Supadata client."""
33
+ return Supadata(api_key=api_key, base_url=base_url)
34
+
35
+
36
+ def test_client_initialization(api_key: str, base_url: str) -> None:
37
+ """Test client initialization."""
38
+ client = Supadata(api_key=api_key, base_url=base_url)
39
+ assert client.base_url == base_url
40
+ assert client.session.headers["x-api-key"] == api_key
41
+ assert client.session.headers["Accept"] == "application/json"
42
+
43
+
44
+ def test_get_transcript_chunks(client: Supadata, requests_mock) -> None:
45
+ """Test getting YouTube transcript with chunks."""
46
+ video_id = "test123"
47
+ mock_response = {
48
+ "content": [
49
+ {
50
+ "text": "Hello",
51
+ "offset": 0,
52
+ "duration": 1000,
53
+ "lang": "en"
54
+ }
55
+ ],
56
+ "lang": "en",
57
+ "availableLangs": ["en", "es"]
58
+ }
59
+ requests_mock.get(
60
+ f"{client.base_url}/youtube/transcript",
61
+ json=mock_response
62
+ )
63
+
64
+ transcript = client.get_transcript(video_id=video_id)
65
+ assert isinstance(transcript, Transcript)
66
+ assert isinstance(transcript.content[0], TranscriptChunk)
67
+ assert transcript.content[0].text == "Hello"
68
+ assert transcript.lang == "en"
69
+ assert transcript.available_langs == ["en", "es"]
70
+
71
+
72
+ def test_get_transcript_text(client: Supadata, requests_mock) -> None:
73
+ """Test getting YouTube transcript as plain text."""
74
+ video_id = "test123"
75
+ mock_response = {
76
+ "content": "Hello, this is a test transcript",
77
+ "lang": "en",
78
+ "availableLangs": ["en", "es"]
79
+ }
80
+ requests_mock.get(
81
+ f"{client.base_url}/youtube/transcript",
82
+ json=mock_response
83
+ )
84
+
85
+ transcript = client.get_transcript(video_id=video_id, text=True)
86
+ assert isinstance(transcript, Transcript)
87
+ assert isinstance(transcript.content, str)
88
+ assert transcript.content == "Hello, this is a test transcript"
89
+
90
+
91
+ def test_translate_transcript(client: Supadata, requests_mock) -> None:
92
+ """Test translating YouTube transcript."""
93
+ video_id = "test123"
94
+ mock_response = {
95
+ "content": "Hola, esto es una prueba",
96
+ "lang": "es"
97
+ }
98
+ requests_mock.get(
99
+ f"{client.base_url}/youtube/transcript/translate",
100
+ json=mock_response
101
+ )
102
+
103
+ transcript = client.translate_transcript(
104
+ video_id=video_id,
105
+ lang="es",
106
+ text=True
107
+ )
108
+ assert isinstance(transcript, TranslatedTranscript)
109
+ assert transcript.content == "Hola, esto es una prueba"
110
+ assert transcript.lang == "es"
111
+
112
+
113
+ def test_scrape(client: Supadata, requests_mock) -> None:
114
+ """Test web scraping."""
115
+ url = "https://test.com"
116
+ mock_response = {
117
+ "url": url,
118
+ "content": "# Test\nThis is a test page",
119
+ "name": "Test Page",
120
+ "description": "A test page",
121
+ "ogUrl": "https://test.com/og.png",
122
+ "countCharacters": 100,
123
+ "urls": ["https://test.com/about"]
124
+ }
125
+ requests_mock.get(
126
+ f"{client.base_url}/web/scrape",
127
+ json=mock_response
128
+ )
129
+
130
+ content = client.scrape(url=url)
131
+ assert isinstance(content, Scrape)
132
+ assert content.url == url
133
+ assert content.name == "Test Page"
134
+ assert content.og_url == "https://test.com/og.png"
135
+ assert content.count_characters == 100
136
+
137
+
138
+ def test_map(client: Supadata, requests_mock) -> None:
139
+ """Test site mapping."""
140
+ url = "https://test.com"
141
+ mock_response = {
142
+ "urls": [
143
+ "https://test.com",
144
+ "https://test.com/about"
145
+ ]
146
+ }
147
+ requests_mock.get(
148
+ f"{client.base_url}/web/map",
149
+ json=mock_response
150
+ )
151
+
152
+ site_map = client.map(url=url)
153
+ assert isinstance(site_map, Map)
154
+ assert len(site_map.urls) == 2
155
+
156
+
157
+ def test_error_handling(client: Supadata, requests_mock) -> None:
158
+ """Test error handling."""
159
+ video_id = "invalid"
160
+ error_response = {
161
+ "code": "video-not-found",
162
+ "title": "Video Not Found",
163
+ "description": "The specified video was not found",
164
+ "documentationUrl": "https://docs.test.com/errors#video-not-found"
165
+ }
166
+ requests_mock.get(
167
+ f"{client.base_url}/youtube/transcript",
168
+ status_code=404,
169
+ json=error_response
170
+ )
171
+
172
+ with pytest.raises(requests.exceptions.HTTPError) as exc_info:
173
+ client.get_transcript(video_id=video_id)
174
+
175
+ error = exc_info.value.args[0]
176
+ assert isinstance(error, Error)
177
+ assert error.code == error_response["code"]
178
+ assert error.title == error_response["title"]
179
+ assert error.description == error_response["description"]
180
+ assert error.documentation_url == error_response["documentationUrl"]