web-queue2 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
web_queue/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
web_queue/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ import pathlib
2
+
3
+ __version__ = pathlib.Path(__file__).parent.joinpath("VERSION").read_text().strip()
web_queue/app.py ADDED
@@ -0,0 +1,63 @@
1
+ import asyncio
2
+ import logging
3
+ import typing
4
+
5
+ import huey
6
+ import logfire
7
+ import logging_bullet_train as lbt
8
+ from huey.api import Task
9
+
10
+ import web_queue.config
11
+
12
+ if typing.TYPE_CHECKING:
13
+ from web_queue.client import WebQueueClient
14
+ from web_queue.types.fetch_html_message import FetchHTMLMessage
15
+ from web_queue.types.html_content import HTMLContent
16
+
17
+ lbt.set_logger("web_queue")
18
+
19
+ logfire.configure()
20
+ logfire.instrument_openai()
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ logger.info("Web queue app starting...")
25
+
26
+ web_queue_settings = web_queue.config.Settings()
27
+ logger.info(f"Web queue connecting to redis: {web_queue_settings.web_queue_safe_url}")
28
+
29
+ huey_app = huey.RedisExpireHuey(
30
+ web_queue_settings.WEB_QUEUE_NAME,
31
+ url=web_queue_settings.WEB_QUEUE_URL.get_secret_value(),
32
+ expire_time=24 * 60 * 60, # 24 hours
33
+ )
34
+
35
+
36
+ @huey_app.task(
37
+ retries=1,
38
+ retry_delay=8,
39
+ expires=24 * 60 * 60,
40
+ context=True,
41
+ )
42
+ def fetch_html(
43
+ message: typing.Union["FetchHTMLMessage", str, bytes], task: Task
44
+ ) -> str:
45
+ from web_queue.types.fetch_html_message import FetchHTMLMessage
46
+
47
+ message = FetchHTMLMessage.from_any(message)
48
+ message.id = task.id
49
+
50
+ logger.info(f"Fetching HTML from {message.data.url}")
51
+
52
+ loop = asyncio.new_event_loop()
53
+ asyncio.set_event_loop(loop)
54
+
55
+ try:
56
+ wq_client: "WebQueueClient" = web_queue_settings.web_queue_client
57
+ html_content: "HTMLContent" = loop.run_until_complete(
58
+ wq_client.fetch(**message.data.model_dump())
59
+ )
60
+ return html_content.model_dump_json()
61
+
62
+ finally:
63
+ loop.close()
@@ -0,0 +1,3 @@
1
+ from web_queue.client._client import WebQueueClient
2
+
3
+ __all__ = ["WebQueueClient"]
@@ -0,0 +1,95 @@
1
+ import functools
2
+ import typing
3
+
4
+ import httpx
5
+ import yarl
6
+
7
+ if typing.TYPE_CHECKING:
8
+ from web_queue.client.ai import AI
9
+ from web_queue.client.clean import Clean
10
+ from web_queue.client.config import Settings
11
+ from web_queue.client.web import Web
12
+ from web_queue.types.html_content import HTMLContent
13
+
14
+
15
+ class WebQueueClient:
16
+ def __init__(self, settings: typing.Optional["Settings"] = None):
17
+ from web_queue.client.config import Settings
18
+
19
+ self.settings = settings or Settings()
20
+
21
+ @functools.cached_property
22
+ def web(self) -> "Web":
23
+ from web_queue.client.web import Web
24
+
25
+ return Web(self)
26
+
27
+ @functools.cached_property
28
+ def clean(self) -> "Clean":
29
+ from web_queue.client.clean import Clean
30
+
31
+ return Clean(self)
32
+
33
+ @functools.cached_property
34
+ def ai(self) -> "AI":
35
+ from web_queue.client.ai import AI
36
+
37
+ return AI(self)
38
+
39
+ async def fetch(
40
+ self,
41
+ url: yarl.URL | httpx.URL | str,
42
+ *,
43
+ headless: bool = False,
44
+ goto_timeout: int = 4000, # 4 seconds
45
+ circling_times: int = 2,
46
+ scrolling_times: int = 3,
47
+ human_delay_base_delay: float = 1.2,
48
+ dynamic_content_loading_delay: float = 2.0,
49
+ ) -> "HTMLContent":
50
+ from web_queue.types.html_content import HTMLContent
51
+ from web_queue.utils.html_to_str import htmls_to_str
52
+
53
+ # Fetch HTML
54
+ html = await self.web.fetch(
55
+ url,
56
+ headless=headless,
57
+ goto_timeout=goto_timeout,
58
+ circling_times=circling_times,
59
+ scrolling_times=scrolling_times,
60
+ human_delay_base_delay=human_delay_base_delay,
61
+ dynamic_content_loading_delay=dynamic_content_loading_delay,
62
+ )
63
+
64
+ # Clean HTML
65
+ html = self.clean.as_main_content(html)
66
+
67
+ # Extract content metadata
68
+ html_metadata = await self.ai.as_html_metadata(html)
69
+
70
+ if not html_metadata:
71
+ raise ValueError(f"Failed to retrieve content metadata for url: {url}")
72
+
73
+ # Extract content body
74
+ content_body_htmls = html.select(html_metadata.content_body_css_selector)
75
+ if not content_body_htmls:
76
+ raise ValueError(
77
+ "Failed to retrieve content body by css selector "
78
+ + f"'{html_metadata.content_body_css_selector}' "
79
+ + f"for url: '{url}'"
80
+ )
81
+
82
+ content_body_text = htmls_to_str(content_body_htmls)
83
+
84
+ html_content = HTMLContent(
85
+ title=html_metadata.title,
86
+ author=html_metadata.author,
87
+ chapter_id=html_metadata.chapter_id,
88
+ chapter_number=html_metadata.chapter_number,
89
+ content=content_body_text,
90
+ created_date=html_metadata.created_date,
91
+ updated_date=html_metadata.updated_date,
92
+ )
93
+
94
+ html_content._html = str(html)
95
+ return html_content
@@ -0,0 +1,3 @@
1
+ from web_queue.client.ai._ai import AI
2
+
3
+ __all__ = ["AI"]
@@ -0,0 +1,140 @@
1
+ import asyncio
2
+ import datetime
3
+ import hashlib
4
+ import logging
5
+ import textwrap
6
+ import typing
7
+ import zoneinfo
8
+
9
+ import logfire
10
+ from rich.pretty import pretty_repr
11
+
12
+ from web_queue.client import WebQueueClient
13
+ from web_queue.types.html_metadata_response import HTMLMetadataResponse
14
+ from web_queue.utils.compression import compress, decompress
15
+
16
+ if typing.TYPE_CHECKING:
17
+ import bs4
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class AI:
23
+ def __init__(self, client: WebQueueClient):
24
+ self.client = client
25
+
26
+ @logfire.instrument
27
+ async def as_html_metadata(
28
+ self, html: typing.Union["bs4.BeautifulSoup", typing.Text]
29
+ ) -> typing.Optional[HTMLMetadataResponse]:
30
+ """Extract content metadata and CSS selector from HTML.
31
+
32
+ Analyzes HTML to find content body selector and extract metadata values.
33
+ """
34
+ openai_client = self.client.settings.openai_client
35
+ model_name = self.client.settings.OPENAI_MODEL
36
+
37
+ html = str(html)
38
+
39
+ logger.info(f"AI is extracting content metadata from HTML: {html}")
40
+
41
+ cache_key = (
42
+ "retrieve_html_content_metadata:"
43
+ + f"{hashlib.md5(html.encode('utf-8')).hexdigest()}"
44
+ )
45
+
46
+ might_cached_data: typing.Text | None = await asyncio.to_thread(
47
+ self.client.settings.compressed_base64_cache.get, cache_key
48
+ )
49
+ if might_cached_data is not None:
50
+ logger.debug(
51
+ "Hit cache 'as_html_content_metadata':"
52
+ + f"{pretty_repr(html, max_string=32)}"
53
+ )
54
+ return HTMLMetadataResponse.model_validate_json(
55
+ decompress(might_cached_data)
56
+ )
57
+
58
+ # Get current time in Asia/Taipei timezone for relative date parsing
59
+ current_time = datetime.datetime.now(zoneinfo.ZoneInfo("Asia/Taipei"))
60
+ current_time_iso = current_time.isoformat()
61
+
62
+ system_prompt = textwrap.dedent(
63
+ f"""
64
+ You are an HTML structure analysis expert. Task: From the provided HTML, extract content metadata and identify CSS selectors.
65
+
66
+ Current time (Asia/Taipei timezone): {current_time_iso}
67
+
68
+ Instructions:
69
+ 1. **content_body_css_selector**: Find the CSS selector for the main content body element containing ONLY the article text.
70
+ - Look for semantic tags like <article>, <main>, or <div> with classes/IDs like 'body', 'content', 'text', 'novel-body'.
71
+ - EXCLUDE elements containing metadata (title, author, dates, navigation, footer, ads, comments).
72
+ - Example: 'div.article-body', 'div#novel-content', 'div.p-novel__text'.
73
+ - Return empty string if not found.
74
+
75
+ 2. **title**: Extract the actual title text (chapter title, article title).
76
+ - Look in <h1>, <h2>, or elements with class/id containing 'title', 'heading'.
77
+ - Return the text content, not the CSS selector.
78
+ - Return empty string if not found.
79
+
80
+ 3. **author**: Extract the actual author name or username.
81
+ - Look in elements with class/id containing 'author', 'writer', 'username'.
82
+ - Return the text content.
83
+ - Return empty string if not found.
84
+
85
+ 4. **chapter_id**: Extract the actual chapter identifier (e.g., '12345', 'ch-001').
86
+ - Look in data attributes, URLs, or element IDs.
87
+ - Return empty string if not found.
88
+
89
+ 5. **chapter_number**: Extract the actual chapter number (e.g., '1', '42', 'Chapter 5').
90
+ - Return empty string if not found.
91
+
92
+ 6. **created_date** and **updated_date**: Parse dates to ISO 8601 format with +08:00 timezone.
93
+ - For absolute dates: Convert to 'YYYY-MM-DDTHH:MM:SS+08:00' format.
94
+ - For relative dates ('2 days ago', '3 hours ago'): Calculate from current_time and format.
95
+ - Return empty string if not found.
96
+
97
+ Rules:
98
+ - If any field is not found or unclear, return empty string "".
99
+ - Do not guess or make up information.
100
+ - Focus on precision and accuracy.
101
+
102
+ Now, analyze the provided HTML and extract all available metadata.
103
+ """ # noqa: E501
104
+ ).strip()
105
+
106
+ try:
107
+ parsed_cmpl = await openai_client.chat.completions.parse(
108
+ messages=[
109
+ {"role": "system", "content": system_prompt},
110
+ {"role": "user", "content": html},
111
+ ],
112
+ model=model_name,
113
+ response_format=HTMLMetadataResponse,
114
+ )
115
+ response_msg = parsed_cmpl.choices[0].message
116
+ if response_msg.refusal:
117
+ logger.error(f"LLM refusal: {response_msg.refusal}")
118
+ return None
119
+
120
+ elif response_msg.parsed:
121
+ output: HTMLMetadataResponse = response_msg.parsed
122
+ output._html = html
123
+ logger.info(f"LLM response: {output}")
124
+
125
+ # Cache the response
126
+ await asyncio.to_thread(
127
+ self.client.settings.compressed_base64_cache.set,
128
+ cache_key,
129
+ compress(output.model_dump_json()),
130
+ )
131
+
132
+ return output
133
+
134
+ else:
135
+ logger.error(f"LLM Error for message: {response_msg}")
136
+ return None
137
+
138
+ except Exception as e:
139
+ logger.error(f"Parsing failed: {e}")
140
+ return None
@@ -0,0 +1,3 @@
1
+ from web_queue.client.clean._clean import Clean
2
+
3
+ __all__ = ["Clean"]
@@ -0,0 +1,25 @@
1
+ import logging
2
+ import typing
3
+
4
+ import bs4
5
+
6
+ from web_queue.client import WebQueueClient
7
+ from web_queue.utils.html_cleaner import HTMLCleaner
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class Clean:
13
+ def __init__(self, client: WebQueueClient):
14
+ self.client = client
15
+
16
+ def as_main_content(self, html: bs4.BeautifulSoup | str) -> bs4.BeautifulSoup:
17
+ html = (
18
+ bs4.BeautifulSoup(html, "html.parser")
19
+ if isinstance(html, typing.Text)
20
+ else html
21
+ )
22
+
23
+ logger.info(f"Cleaning HTML: {html}")
24
+ cleaned_html = HTMLCleaner.clean_as_main_content_html_str(html)
25
+ return bs4.BeautifulSoup(cleaned_html, "html.parser")
@@ -0,0 +1,61 @@
1
+ import functools
2
+ import pathlib
3
+ import typing
4
+
5
+ import cachetic
6
+ import openai
7
+ import pydantic as pydantic
8
+ import pydantic_settings
9
+
10
+
11
+ class Settings(pydantic_settings.BaseSettings):
12
+ OPENAI_MODEL: str = pydantic.Field(default="gpt-4.1-nano")
13
+ OPENAI_API_KEY: pydantic.SecretStr = pydantic.SecretStr("")
14
+
15
+ # Cache
16
+ WEB_CACHE_PATH: typing.Text = pydantic.Field(default="./.cache/web.cache")
17
+ WEB_CACHE_EXPIRE_SECONDS: int = pydantic.Field(default=60 * 60 * 24) # 1 day
18
+ WEB_SCREENSHOT_PATH: typing.Text = pydantic.Field(default="./data/screenshots")
19
+ WEB_PDF_PATH: typing.Text = pydantic.Field(default="./data/pdfs")
20
+ COMPRESSED_BASE64_CACHE_PATH: typing.Text = pydantic.Field(
21
+ default="./.cache/compressed_base64.cache"
22
+ )
23
+ COMPRESSED_BASE64_CACHE_EXPIRE_SECONDS: int = pydantic.Field(
24
+ default=60 * 60 * 24
25
+ ) # 1 day
26
+
27
+ @functools.cached_property
28
+ def openai_client(self) -> openai.AsyncOpenAI:
29
+ return openai.AsyncOpenAI(api_key=self.OPENAI_API_KEY.get_secret_value())
30
+
31
+ @functools.cached_property
32
+ def web_cache(self) -> "cachetic.Cachetic[typing.Text]":
33
+ import cachetic
34
+
35
+ return cachetic.Cachetic(
36
+ object_type=pydantic.TypeAdapter(typing.Text),
37
+ cache_url=pathlib.Path(self.WEB_CACHE_PATH),
38
+ default_ttl=self.WEB_CACHE_EXPIRE_SECONDS,
39
+ )
40
+
41
+ @functools.cached_property
42
+ def compressed_base64_cache(self) -> "cachetic.Cachetic[typing.Text]":
43
+ import cachetic
44
+
45
+ return cachetic.Cachetic(
46
+ object_type=pydantic.TypeAdapter(typing.Text),
47
+ cache_url=pathlib.Path(self.COMPRESSED_BASE64_CACHE_PATH),
48
+ default_ttl=self.COMPRESSED_BASE64_CACHE_EXPIRE_SECONDS,
49
+ )
50
+
51
+ @property
52
+ def web_screenshot_path(self) -> pathlib.Path:
53
+ _path = pathlib.Path(self.WEB_SCREENSHOT_PATH)
54
+ _path.mkdir(parents=True, exist_ok=True)
55
+ return _path
56
+
57
+ @property
58
+ def web_pdf_path(self) -> pathlib.Path:
59
+ _path = pathlib.Path(self.WEB_PDF_PATH)
60
+ _path.mkdir(parents=True, exist_ok=True)
61
+ return _path
@@ -0,0 +1,3 @@
1
+ from web_queue.client.web._web import Web
2
+
3
+ __all__ = ["Web"]
@@ -0,0 +1,175 @@
1
+ import asyncio
2
+ import logging
3
+ import secrets
4
+ import time
5
+ import typing
6
+
7
+ import bs4
8
+ import fastapi
9
+ import httpx
10
+ import yarl
11
+ from playwright._impl._api_structures import ViewportSize
12
+ from playwright.async_api import async_playwright
13
+ from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
14
+ from str_or_none import str_or_none
15
+
16
+ from web_queue.client import WebQueueClient
17
+ from web_queue.utils.compression import compress, decompress
18
+ from web_queue.utils.human_delay import human_delay
19
+ from web_queue.utils.page_with_init_script import page_with_init_script
20
+ from web_queue.utils.simulate_mouse_circling import simulate_mouse_circling
21
+ from web_queue.utils.simulate_scrolling import simulate_scrolling
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class Web:
27
+ USER_AGENTS: typing.ClassVar[typing.Tuple[typing.Text, ...]] = (
28
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", # noqa: E501
29
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", # noqa: E501
30
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", # noqa: E501
31
+ )
32
+ VIEWPORT_SIZES: typing.ClassVar[typing.Tuple[typing.Tuple[int, int], ...]] = (
33
+ (1920, 1080),
34
+ (1366, 768),
35
+ (1440, 900),
36
+ )
37
+
38
+ def __init__(self, client: WebQueueClient):
39
+ self.client = client
40
+
41
+ async def fetch(
42
+ self,
43
+ url: typing.Text | yarl.URL | httpx.URL,
44
+ *,
45
+ headless: bool = True,
46
+ goto_timeout: int = 4000, # 4 seconds
47
+ circling_times: int = 3,
48
+ scrolling_times: int = 3,
49
+ human_delay_base_delay: float = 1.2,
50
+ dynamic_content_loading_delay: float = 2.0,
51
+ ) -> bs4.BeautifulSoup:
52
+ _url = str_or_none(str(url))
53
+ if not _url:
54
+ raise fastapi.exceptions.HTTPException(status_code=400, detail="Empty URL")
55
+
56
+ html_content: typing.Text | None = None
57
+ h_delay = human_delay_base_delay
58
+ d_delay = dynamic_content_loading_delay
59
+
60
+ logger.info(f"Browser is fetching {_url}")
61
+ maybe_html_content = self.client.settings.web_cache.get(_url)
62
+ if maybe_html_content:
63
+ logger.debug(f"Hit web cache for {_url}")
64
+ html_content = await asyncio.to_thread(
65
+ decompress, maybe_html_content, format="zstd"
66
+ )
67
+ return bs4.BeautifulSoup(html_content, "html.parser")
68
+
69
+ async with async_playwright() as p:
70
+ browser = await p.chromium.launch(
71
+ headless=headless,
72
+ args=[
73
+ "--no-sandbox",
74
+ "--disable-blink-features=AutomationControlled",
75
+ "--disable-dev-shm-usage",
76
+ "--disable-web-security",
77
+ "--disable-features=VizDisplayCompositor",
78
+ ],
79
+ )
80
+
81
+ # Create context
82
+ _viewport_size = secrets.choice(self.VIEWPORT_SIZES)
83
+ _viewport = ViewportSize(width=_viewport_size[0], height=_viewport_size[1])
84
+ context = await browser.new_context(
85
+ user_agent=secrets.choice(self.USER_AGENTS),
86
+ viewport=_viewport,
87
+ locale="en-US",
88
+ timezone_id="Asia/Tokyo",
89
+ permissions=["geolocation"],
90
+ extra_http_headers={
91
+ "Accept-Language": "en-US,en;q=0.9,ja;q=0.8",
92
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", # noqa: E501
93
+ "Accept-Encoding": "gzip, deflate, br",
94
+ "Accept-Charset": "utf-8",
95
+ },
96
+ )
97
+
98
+ # Create new page
99
+ page = await context.new_page()
100
+
101
+ # Inject script to hide automation features
102
+ page = await page_with_init_script(page)
103
+
104
+ try:
105
+ # Navigate to URL
106
+ logger.debug(f"Navigating (timeout: {goto_timeout}ms) to {_url}")
107
+ try:
108
+ await page.goto(
109
+ _url, wait_until="domcontentloaded", timeout=goto_timeout
110
+ ) # Wait for network idle
111
+ except PlaywrightTimeoutError:
112
+ logger.info(f"Timeout for goto '{_url}', continuing...")
113
+ await human_delay(h_delay) # Initial delay
114
+
115
+ # Wait for full page load (additional checks)
116
+ logger.debug(f"Waiting {h_delay}s for full page load")
117
+ await page.wait_for_load_state("domcontentloaded")
118
+ await human_delay(h_delay)
119
+
120
+ # Simulate smooth mouse circling three times
121
+ start_position = None
122
+ for i in range(circling_times):
123
+ logger.debug(f"Simulating mouse circling {i+1} of {circling_times}")
124
+ start_position = await simulate_mouse_circling(
125
+ page, _viewport, start_position=start_position
126
+ )
127
+ await human_delay(h_delay)
128
+
129
+ # Simulate scrolling three times
130
+ for i in range(scrolling_times):
131
+ logger.debug(f"Simulating scrolling {i+1} of {scrolling_times}")
132
+ await simulate_scrolling(page, scroll_direction="down")
133
+ await human_delay(h_delay)
134
+
135
+ # Extra delay for dynamic content loading
136
+ logger.debug(f"Delaying {d_delay}s for dynamic content loading")
137
+ await human_delay(d_delay)
138
+
139
+ # Get full HTML content
140
+ html_content = await page.content()
141
+ html_content = str_or_none(html_content)
142
+ html_content_size = len(html_content or " ")
143
+
144
+ logger.info(
145
+ f"Fetched HTML content size: {html_content_size} for {_url}"
146
+ )
147
+
148
+ # Screenshot and PDF
149
+ snapshot_filename = f"{int(time.time()*1E3)}_{secrets.token_hex(2)}"
150
+ screenshot_path = self.client.settings.web_screenshot_path.joinpath(
151
+ f"{snapshot_filename}.png"
152
+ )
153
+ screenshot_path.write_bytes(await page.screenshot())
154
+ logger.info(f"Screenshot saved to {screenshot_path}")
155
+ pdf_path = self.client.settings.web_pdf_path.joinpath(
156
+ f"{snapshot_filename}.pdf"
157
+ )
158
+ await page.pdf(path=pdf_path, print_background=True)
159
+ logger.info(f"PDF saved to {pdf_path}")
160
+
161
+ finally:
162
+ await browser.close()
163
+
164
+ if not html_content:
165
+ raise fastapi.exceptions.HTTPException(
166
+ status_code=500, detail="Failed to fetch content"
167
+ )
168
+
169
+ await asyncio.to_thread(
170
+ self.client.settings.web_cache.set,
171
+ _url,
172
+ compress(html_content, format="zstd"),
173
+ )
174
+
175
+ return bs4.BeautifulSoup(html_content, "html.parser")
web_queue/config.py ADDED
@@ -0,0 +1,33 @@
1
+ import functools
2
+ import typing
3
+
4
+ import pydantic
5
+ import pydantic_settings
6
+ import yarl
7
+ from str_or_none import str_or_none
8
+
9
+ if typing.TYPE_CHECKING:
10
+ from web_queue.client import WebQueueClient
11
+
12
+
13
+ class Settings(pydantic_settings.BaseSettings):
14
+ WEB_QUEUE_NAME: str = pydantic.Field(default="web-queue")
15
+ WEB_QUEUE_URL: pydantic.SecretStr = pydantic.SecretStr("")
16
+
17
+ @pydantic.model_validator(mode="after")
18
+ def validate_values(self) -> typing.Self:
19
+ if str_or_none(self.WEB_QUEUE_NAME) is None:
20
+ raise ValueError("WEB_QUEUE_NAME is required")
21
+ if str_or_none(self.WEB_QUEUE_URL.get_secret_value()) is None:
22
+ raise ValueError("WEB_QUEUE_URL is required")
23
+ return self
24
+
25
+ @functools.cached_property
26
+ def web_queue_client(self) -> "WebQueueClient":
27
+ from web_queue.client import WebQueueClient
28
+
29
+ return WebQueueClient()
30
+
31
+ @property
32
+ def web_queue_safe_url(self) -> str:
33
+ return str(yarl.URL(self.WEB_QUEUE_URL.get_secret_value()).with_password("***"))
File without changes
@@ -0,0 +1,26 @@
1
+ import typing
2
+
3
+ import pydantic
4
+ from str_or_none import str_or_none
5
+
6
+ from web_queue.types.message import Message
7
+
8
+
9
+ class FetchHTMLMessageRequest(pydantic.BaseModel):
10
+ url: str
11
+ headless: bool = False
12
+ goto_timeout: int = 4000
13
+ circling_times: int = 2
14
+ scrolling_times: int = 3
15
+ human_delay_base_delay: float = 1.2
16
+ dynamic_content_loading_delay: float = 2
17
+
18
+ @pydantic.model_validator(mode="after")
19
+ def validate_url(self) -> typing.Self:
20
+ if not str_or_none(self.url):
21
+ raise ValueError("URL is required")
22
+ return self
23
+
24
+
25
+ class FetchHTMLMessage(Message):
26
+ data: FetchHTMLMessageRequest
@@ -0,0 +1,18 @@
1
+ import logging
2
+
3
+ import pydantic
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ class HTMLContent(pydantic.BaseModel):
9
+ title: str = pydantic.Field(default="")
10
+ author: str = pydantic.Field(default="")
11
+ chapter_id: str = pydantic.Field(default="")
12
+ chapter_number: str = pydantic.Field(default="")
13
+ content: str = pydantic.Field(default="")
14
+ created_date: str = pydantic.Field(default="")
15
+ updated_date: str = pydantic.Field(default="")
16
+
17
+ # Private attributes
18
+ _html: str = pydantic.PrivateAttr(default="")
@@ -0,0 +1,82 @@
1
+ import logging
2
+
3
+ import pydantic
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ class HTMLMetadataResponse(pydantic.BaseModel):
9
+ """Structured response for HTML content metadata and element locators.
10
+
11
+ Extracts content body CSS selector and metadata values.
12
+ """
13
+
14
+ title: str = pydantic.Field(
15
+ default="",
16
+ description=(
17
+ "The actual title text of the content "
18
+ "(e.g., chapter title, article title). "
19
+ "Return empty string if not found."
20
+ ),
21
+ )
22
+
23
+ author: str = pydantic.Field(
24
+ default="",
25
+ description=(
26
+ "The actual author name or username. " "Return empty string if not found."
27
+ ),
28
+ )
29
+
30
+ chapter_id: str = pydantic.Field(
31
+ default="",
32
+ description=(
33
+ "The actual chapter ID or identifier (e.g., '12345', 'ch-001'). "
34
+ "Return empty string if not found."
35
+ ),
36
+ )
37
+
38
+ chapter_number: str = pydantic.Field(
39
+ default="",
40
+ description=(
41
+ "The actual chapter number (e.g., '1', '42', 'Chapter 5'). "
42
+ "Return empty string if not found."
43
+ ),
44
+ )
45
+
46
+ content_body_css_selector: str = pydantic.Field(
47
+ default="",
48
+ description=(
49
+ "CSS selector for the main content body element "
50
+ "containing article text only. "
51
+ "Exclude metadata like title, author, dates. "
52
+ "Example: 'div.article-body', 'div#novel-content'. "
53
+ "Use standard CSS syntax. Return empty string if not found."
54
+ ),
55
+ )
56
+
57
+ created_date: str = pydantic.Field(
58
+ default="",
59
+ description=(
60
+ "The content creation date in ISO 8601 format "
61
+ "with Asia/Taipei timezone "
62
+ "(e.g., '2025-10-12T14:30:00+08:00'). "
63
+ "Parse relative dates like '2 days ago' "
64
+ "using the current_time provided in the system prompt. "
65
+ "Return empty string if not found."
66
+ ),
67
+ )
68
+
69
+ updated_date: str = pydantic.Field(
70
+ default="",
71
+ description=(
72
+ "The content last update date in ISO 8601 format "
73
+ "with Asia/Taipei timezone "
74
+ "(e.g., '2025-10-12T14:30:00+08:00'). "
75
+ "Parse relative dates like '2 days ago' "
76
+ "using the current_time provided in the system prompt. "
77
+ "Return empty string if not found."
78
+ ),
79
+ )
80
+
81
+ # Private attributes
82
+ _html: str = pydantic.PrivateAttr(default="")
@@ -0,0 +1,34 @@
1
+ import enum
2
+ import json
3
+ import typing
4
+
5
+ import pydantic
6
+
7
+
8
+ class MessageStatus(enum.StrEnum):
9
+ PENDING = "pending"
10
+ RUNNING = "running"
11
+ COMPLETED = "completed"
12
+ FAILED = "failed"
13
+
14
+
15
+ class Message(pydantic.BaseModel):
16
+ id: str | None = None
17
+ data: typing.Any
18
+ status: MessageStatus = pydantic.Field(default=MessageStatus.PENDING)
19
+ total_steps: int = pydantic.Field(default=100)
20
+ completed_steps: int = pydantic.Field(default=0)
21
+ error: typing.Optional[str] = pydantic.Field(default=None)
22
+
23
+ @classmethod
24
+ def from_any(cls, any: typing.Union[pydantic.BaseModel, typing.Dict, str, bytes]):
25
+ if isinstance(any, pydantic.BaseModel):
26
+ return cls.model_validate_json(any.model_dump_json())
27
+ elif isinstance(any, typing.Dict):
28
+ return cls.model_validate_json(json.dumps(any))
29
+ elif isinstance(any, str):
30
+ return cls.model_validate_json(any)
31
+ elif isinstance(any, bytes):
32
+ return cls.model_validate_json(any)
33
+ else:
34
+ raise ValueError(f"Invalid type: {type(any)}")
File without changes
@@ -0,0 +1,29 @@
1
+ import base64
2
+ import typing
3
+
4
+ import fastapi
5
+ import zstandard
6
+
7
+
8
+ def compress(
9
+ data: str, *, level: int = 9, format: typing.Literal["zstd"] = "zstd"
10
+ ) -> str:
11
+ if format == "zstd":
12
+ return base64.b64encode(
13
+ zstandard.compress(data.encode("utf-8"), level=level)
14
+ ).decode("utf-8")
15
+ else:
16
+ raise fastapi.exceptions.HTTPException(
17
+ status_code=400, detail=f"Invalid format: {format}"
18
+ )
19
+
20
+
21
+ def decompress(data: str, *, format: typing.Literal["zstd"] = "zstd") -> str:
22
+ if format == "zstd":
23
+ return zstandard.decompress(base64.b64decode(data.encode("utf-8"))).decode(
24
+ "utf-8"
25
+ )
26
+ else:
27
+ raise fastapi.exceptions.HTTPException(
28
+ status_code=400, detail=f"Invalid format: {format}"
29
+ )
@@ -0,0 +1,145 @@
1
+ import re
2
+ import typing
3
+
4
+ import bs4
5
+
6
+ DEFAULT_KEEP_TAGS: typing.Tuple[typing.Text, ...] = (
7
+ "a",
8
+ "article",
9
+ "body",
10
+ "br",
11
+ "div",
12
+ "h1",
13
+ "h2",
14
+ "h3",
15
+ "h4",
16
+ "h5",
17
+ "h6",
18
+ "hr",
19
+ "html",
20
+ "li",
21
+ "main",
22
+ "ol",
23
+ "p",
24
+ "section",
25
+ "table",
26
+ "tbody",
27
+ "td",
28
+ "th",
29
+ "tr",
30
+ "ul",
31
+ )
32
+ DEFAULT_KEEP_ATTRIBUTES: typing.Tuple[typing.Text, ...] = ("id", "class")
33
+ DEFAULT_DROP_TAGS: typing.Tuple[typing.Text, ...] = ("script", "style", "iframe")
34
+
35
+
36
+ class HTMLCleaner:
37
+ @staticmethod
38
+ def clean_as_main_content_html(
39
+ html: typing.Text | bs4.BeautifulSoup,
40
+ ) -> bs4.BeautifulSoup:
41
+ html = (
42
+ bs4.BeautifulSoup(html, "html.parser")
43
+ if isinstance(html, typing.Text)
44
+ else html
45
+ )
46
+ html = HTMLCleaner.clean_all_comments(html)
47
+ html = HTMLCleaner.keep_only_tags(html)
48
+ html = HTMLCleaner.clean_tags(html)
49
+ html = HTMLCleaner.clean_attributes(html)
50
+ html = HTMLCleaner.keep_first_class_name(html)
51
+ return html
52
+
53
+ @staticmethod
54
+ def clean_as_main_content_html_str(
55
+ html: typing.Text | bs4.BeautifulSoup,
56
+ ) -> str:
57
+ html = HTMLCleaner.clean_as_main_content_html(html)
58
+ return re.sub(r">\s+<", "><", str(html))
59
+
60
+ @staticmethod
61
+ def keep_only_tags(
62
+ html: typing.Text | bs4.BeautifulSoup,
63
+ tags: typing.List[typing.Text] | None = None,
64
+ ) -> bs4.BeautifulSoup:
65
+ html = (
66
+ bs4.BeautifulSoup(html, "html.parser")
67
+ if isinstance(html, typing.Text)
68
+ else html
69
+ )
70
+ tags = tags or list(DEFAULT_KEEP_TAGS)
71
+
72
+ # Find all tags that are not in the keep list and decompose them
73
+ for tag in html.find_all():
74
+ if tag.name not in tags:
75
+ tag.decompose()
76
+
77
+ return html
78
+
79
+ @staticmethod
80
+ def keep_first_class_name(
81
+ html: typing.Text | bs4.BeautifulSoup,
82
+ ) -> bs4.BeautifulSoup:
83
+ html = (
84
+ bs4.BeautifulSoup(html, "html.parser")
85
+ if isinstance(html, typing.Text)
86
+ else html
87
+ )
88
+
89
+ # Keep only the first class name for elements with multiple classes
90
+ for tag in html.find_all(attrs={"class": True}):
91
+ class_attr = tag.get("class")
92
+ if isinstance(class_attr, list) and len(class_attr) > 1:
93
+ tag["class"] = class_attr[0]
94
+ elif isinstance(class_attr, str):
95
+ classes = class_attr.split()
96
+ if len(classes) > 1:
97
+ tag["class"] = classes[0]
98
+
99
+ return html
100
+
101
+ @staticmethod
102
+ def clean_attributes(
103
+ html: typing.Text | bs4.BeautifulSoup,
104
+ attributes: typing.List[typing.Text] | None = None,
105
+ ) -> bs4.BeautifulSoup:
106
+ html = (
107
+ bs4.BeautifulSoup(html, "html.parser")
108
+ if isinstance(html, typing.Text)
109
+ else html
110
+ )
111
+ attributes = attributes or list(DEFAULT_KEEP_ATTRIBUTES)
112
+ for tag in html.find_all():
113
+ for attribute in list(tag.attrs):
114
+ if attribute not in attributes:
115
+ tag.attrs.pop(attribute, None)
116
+
117
+ return html
118
+
119
+ @staticmethod
120
+ def clean_all_comments(html: typing.Text | bs4.BeautifulSoup) -> bs4.BeautifulSoup:
121
+ html = (
122
+ bs4.BeautifulSoup(html, "html.parser")
123
+ if isinstance(html, typing.Text)
124
+ else html
125
+ )
126
+ for comment in html.find_all(text=lambda text: isinstance(text, bs4.Comment)):
127
+ comment.decompose()
128
+ return html
129
+
130
+ @staticmethod
131
+ def clean_tags(
132
+ html: typing.Text | bs4.BeautifulSoup,
133
+ tags: typing.List[typing.Text] | None = None,
134
+ ) -> bs4.BeautifulSoup:
135
+ html = (
136
+ bs4.BeautifulSoup(html, "html.parser")
137
+ if isinstance(html, typing.Text)
138
+ else html
139
+ )
140
+ tags = tags or list(DEFAULT_DROP_TAGS)
141
+
142
+ for tag in html.find_all(tags):
143
+ tag.decompose()
144
+
145
+ return html
@@ -0,0 +1,21 @@
1
+ import typing
2
+
3
+ import bs4
4
+
5
+
6
+ def html_to_str(html: bs4.BeautifulSoup | bs4.Tag | str) -> str:
7
+ html = bs4.BeautifulSoup(html, "html.parser") if isinstance(html, str) else html
8
+
9
+ full_text = ""
10
+ for p in html.find_all("p"):
11
+ content = p.get_text(separator="\n", strip=True)
12
+ full_text += content
13
+ full_text += "\n"
14
+
15
+ return full_text.strip()
16
+
17
+
18
+ def htmls_to_str(
19
+ htmls: typing.List[bs4.BeautifulSoup | bs4.Tag | str] | bs4.ResultSet[bs4.Tag],
20
+ ) -> str:
21
+ return "\n\n".join(html_to_str(h) for h in htmls)
@@ -0,0 +1,11 @@
1
+ import asyncio
2
+ import random
3
+
4
+
5
+ async def human_delay(
6
+ base_delay: float = 1.2, *, jitter_ratio: tuple[float, float] = (0.5, 1.5)
7
+ ) -> None:
8
+ jitter = random.uniform(jitter_ratio[0], jitter_ratio[1])
9
+ total_delay = base_delay * jitter
10
+ await asyncio.sleep(total_delay)
11
+ return None
@@ -0,0 +1,15 @@
1
+ import playwright.async_api
2
+
3
+
4
+ async def page_with_init_script(
5
+ page: playwright.async_api.Page,
6
+ ) -> playwright.async_api.Page:
7
+ await page.add_init_script(
8
+ """
9
+ Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
10
+ Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]});
11
+ Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en', 'ja']}); # noqa: E501
12
+ window.chrome = {runtime: {}};
13
+ """
14
+ )
15
+ return page
@@ -0,0 +1,49 @@
1
+ import asyncio
2
+ import math
3
+ import random
4
+ import typing
5
+
6
+ import playwright.async_api
7
+ from playwright._impl._api_structures import ViewportSize
8
+
9
+ Number: typing.TypeAlias = float | int
10
+
11
+
12
+ async def simulate_mouse_circling(
13
+ page: playwright.async_api.Page,
14
+ default_viewport_size: ViewportSize | None = None,
15
+ *,
16
+ start_position: tuple[Number, Number] | None = None,
17
+ ) -> tuple[Number, Number]:
18
+ _viewport_size = (
19
+ page.viewport_size
20
+ or default_viewport_size
21
+ or ViewportSize(width=1920, height=1080)
22
+ )
23
+
24
+ # Random starting position
25
+ if start_position:
26
+ start_x = start_position[0]
27
+ start_y = start_position[1]
28
+ else:
29
+ start_x = random.randint(100, _viewport_size["width"] - 100)
30
+ start_y = random.randint(100, _viewport_size["height"] - 100)
31
+ center_x = start_x + 100
32
+ center_y = start_y + 100
33
+ radius = 50
34
+ x = center_x
35
+ y = center_y
36
+
37
+ # Simulate smooth circle: Move to multiple points
38
+ for angle in range(0, 360, 60): # Every 30 degrees a point
39
+ rad = (angle * 3.14159) / 180
40
+ x = center_x + radius * random.uniform(0.8, 1.2) * random.choice([-1, 1]) * abs(
41
+ math.cos(rad)
42
+ )
43
+ y = center_y + radius * random.uniform(0.8, 1.2) * random.choice([-1, 1]) * abs(
44
+ math.sin(rad)
45
+ )
46
+ await page.mouse.move(x, y, steps=random.randint(10, 20)) # Smooth movement
47
+ await asyncio.sleep(random.uniform(0.01, 0.05)) # Tiny delay
48
+
49
+ return (x, y)
@@ -0,0 +1,18 @@
1
+ import random
2
+
3
+ import playwright.async_api
4
+ from typing_extensions import Literal
5
+
6
+
7
+ async def simulate_scrolling(
8
+ page: playwright.async_api.Page,
9
+ scroll_direction: Literal["down", "up"] | None = None,
10
+ scroll_distance: int | None = None,
11
+ ) -> None:
12
+ scroll_direction = scroll_direction or random.choice(["down", "up"])
13
+ scroll_distance = scroll_distance or random.randint(200, 800)
14
+ if scroll_direction == "down":
15
+ await page.mouse.wheel(0, scroll_distance)
16
+ else:
17
+ await page.mouse.wheel(0, -scroll_distance)
18
+ return None
@@ -0,0 +1,48 @@
1
+ Metadata-Version: 2.4
2
+ Name: web-queue2
3
+ Version: 0.1.0
4
+ Summary: Get web content from queue.
5
+ License: MIT
6
+ License-File: LICENSE
7
+ Author: Allen Chou
8
+ Author-email: f1470891079@gmail.com
9
+ Requires-Python: >=3.11,<4
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Programming Language :: Python :: 3.14
16
+ Requires-Dist: bs4
17
+ Requires-Dist: cachetic
18
+ Requires-Dist: dictpress
19
+ Requires-Dist: fastapi
20
+ Requires-Dist: httpx
21
+ Requires-Dist: huey
22
+ Requires-Dist: logfire
23
+ Requires-Dist: logging_bullet_train
24
+ Requires-Dist: openai (>=1,<2)
25
+ Requires-Dist: openai-agents (>=0.1.0,<1.0.0)
26
+ Requires-Dist: playwright
27
+ Requires-Dist: pydantic (>=2)
28
+ Requires-Dist: pydantic-settings
29
+ Requires-Dist: str-or-none
30
+ Requires-Dist: tiktoken
31
+ Requires-Dist: yarl
32
+ Project-URL: Homepage, https://github.com/allen2c/web-queue
33
+ Project-URL: PyPI, https://pypi.org/project/web-queue/
34
+ Project-URL: Repository, https://github.com/allen2c/web-queue
35
+ Description-Content-Type: text/markdown
36
+
37
+ # Web-Queue
38
+
39
+ [![PyPI version](https://img.shields.io/pypi/v/web-queue.svg)](https://pypi.org/project/web-queue/)
40
+ [![Python Version](https://img.shields.io/pypi/pyversions/web-queue.svg)](https://pypi.org/project/web-queue/)
41
+ [![License](https://img.shields.io/pypi/l/web-queue.svg)](https://opensource.org/licenses/MIT)
42
+
43
+ A web content pipeline library.
44
+
45
+ ## License
46
+
47
+ MIT License
48
+
@@ -0,0 +1,30 @@
1
+ web_queue/VERSION,sha256=atlhOkVXmNbZLl9fOQq0uqcFlryGntaxf1zdKyhjXwY,5
2
+ web_queue/__init__.py,sha256=jimNuUC5G2j-0IoOnWIrsgBMQjz0ho4wtODWhwOrD80,100
3
+ web_queue/app.py,sha256=v-yJTPOUrUzEVmzL38UT03k82hig_bc5ZX_XhA-H6Q8,1591
4
+ web_queue/client/__init__.py,sha256=sueZYgb5SH38mj7ILPMIjliJCSn5oTI9XymEvko3Qt4,82
5
+ web_queue/client/_client.py,sha256=jz7kUGhf-ib0Ct-Dx3kd2El-fI2ifmF7p-L3m05jKgs,2914
6
+ web_queue/client/ai/__init__.py,sha256=CXVesgQWLKYrzWbExEBzGLlfE-thqCUyGnlxVkK7XeM,57
7
+ web_queue/client/ai/_ai.py,sha256=1fayO48efLkzQQRbM2BNsA1dIUsoRYhjZVKI3j4SaWw,5534
8
+ web_queue/client/clean/__init__.py,sha256=a1hGcIExYmvLX4GAQ4STNLr90dcAHSsRYYQzhweqZS0,69
9
+ web_queue/client/clean/_clean.py,sha256=iG60ynaiaLPJZBKUe7O3irngMY2nSfbwwE4aflxT01s,679
10
+ web_queue/client/config.py,sha256=O49rjnj08_VBN7OU1w88i6PM5AnNZ-VS0fScff_noh8,2107
11
+ web_queue/client/web/__init__.py,sha256=8HMOpuqCGT_c-locTbEJiltWm9BoPaVX5B23MNZzrqQ,61
12
+ web_queue/client/web/_web.py,sha256=_MmqMow0hZlYALsA0L5ALo6USDp1kbvmgBVJ2TwFAxk,7122
13
+ web_queue/config.py,sha256=6Jxp9-Eqfs3IRdUcbe6yK2e_KsYspvy1Kti8JxuyWRY,1036
14
+ web_queue/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ web_queue/types/fetch_html_message.py,sha256=TKIAWSHoaaylkQiVFin_DVxgCoBSswMMqtI0n_iV1Uk,639
16
+ web_queue/types/html_content.py,sha256=Ljtx7SHT1WupvFstwipC6d19Jt-U9CgGt1lhIXFv23k,526
17
+ web_queue/types/html_metadata_response.py,sha256=GsI_jy0i0E8awBdLpfdZ9DkXkyesetHBaZ_M2V7ydtE,2482
18
+ web_queue/types/message.py,sha256=awqy-rWRvqjrtJrlWAnz9Mh5lSbIN7bF4OKtG7PjoSc,1078
19
+ web_queue/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
+ web_queue/utils/compression.py,sha256=ab0afJMfqQypW_wOnpHF2FRbGqPhLtpjpmTHDbyS5xI,794
21
+ web_queue/utils/html_cleaner.py,sha256=DFRn2Z-Tz9dcjyTz3AzJoaBC3r_i3D5KrdOfep3VdKA,4055
22
+ web_queue/utils/html_to_str.py,sha256=-mFETVfJbse6DGTfxlz1-BgNmCUonHOUndGfWnbZhYs,541
23
+ web_queue/utils/human_delay.py,sha256=cMO9Z4cYfWtK0m4a3gwTBWdDEtTo_IKIGeAv0kirew8,297
24
+ web_queue/utils/page_with_init_script.py,sha256=OzCZPivBI_pi2mLHnCniQks0L-CuDlmEa-xCqbXFkUw,512
25
+ web_queue/utils/simulate_mouse_circling.py,sha256=tOyWt7jdfq3F7Skw17LrtRXnxfe7MxrhHu6gXCjpYY8,1506
26
+ web_queue/utils/simulate_scrolling.py,sha256=lwyMddDblXr4QqwBI7Kgc3yTQOj3NNH3_JMW2xyr4s4,563
27
+ web_queue2-0.1.0.dist-info/METADATA,sha256=gtweOrlbqenc6tfQbehAVZevC9suojbIVTC0NB6DJuw,1535
28
+ web_queue2-0.1.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
29
+ web_queue2-0.1.0.dist-info/licenses/LICENSE,sha256=GFTbuEFnRb9rtRPB-9Ak9M6HFneyyWpy-pHfCM1o1Rk,1066
30
+ web_queue2-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: poetry-core 2.2.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 AllenChou
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.