PyPI - staticweb - Versions diffs - 0.1.0__py3-none-any.whl - Mend

staticweb 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

staticweb/__init__.py +1 -0
staticweb/cli.py +205 -0
staticweb-0.1.0.dist-info/METADATA +8 -0
staticweb-0.1.0.dist-info/RECORD +7 -0
staticweb-0.1.0.dist-info/WHEEL +5 -0
staticweb-0.1.0.dist-info/entry_points.txt +2 -0
staticweb-0.1.0.dist-info/top_level.txt +1 -0

staticweb/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # staticweb package

staticweb/cli.py ADDED Viewed

@@ -0,0 +1,205 @@
+import argparse
+import requests
+from bs4 import BeautifulSoup
+import sys
+import json
+import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from rich.progress import Progress, BarColumn, TimeRemainingColumn, SpinnerColumn
+from rich.console import Console
+from rich.table import Table
+from rich.panel import Panel
+from rich.syntax import Syntax
+from rich.markdown import Markdown
+console = Console()
+def fetch_soup(url, insecure=False):
+    headers = {"User-Agent": "Mozilla/5.0"}
+    try:
+        res = requests.get(url, headers=headers, verify=not insecure, timeout=10)
+        res.raise_for_status()
+        return BeautifulSoup(res.text, "html.parser")
+    except Exception as e:
+        return f"[ERROR] {url} -> {e}"
+def extract_links(soup):
+    return [{"text": a.get_text(strip=True), "href": a.get("href")} for a in soup.find_all("a")]
+def extract_paragraphs(soup):
+    return [p.get_text(strip=True) for p in soup.find_all("p")]
+def extract_all_text(soup):
+    return soup.get_text(separator="\n", strip=True)
+def extract_tag(soup, tag):
+    return [el.get_text(strip=True) for el in soup.find_all(tag)]
+def process_url(url, args):
+    soup = fetch_soup(url, args.insecure)
+    if isinstance(soup, str):
+        return soup
+    if args.link:
+        return extract_links(soup)
+    elif args.para:
+        return extract_paragraphs(soup)
+    elif args.all:
+        return extract_all_text(soup)
+    elif args.tag:
+        return extract_tag(soup, args.tag)
+    else:
+        return "[ERROR] No valid option"
+def show_help():
+    help_text = """
+# ⚡ StaticWeb CLI
+A fast, multi-threaded static web scraper.
+## Usage
+python staticweb.py <urls> [options]
+## Options
+- --link        Extract all links
+- --para        Extract paragraphs
+- --all         Extract full text
+- --tag <tag>   Extract specific HTML tag
+## Output
+- --json        Output in JSON format
+- --save <file> Save output to file
+## Performance
+- --threads N   Number of threads (default: 1)
+- --insecure    Disable SSL verification
+## Dev Tools
+- --codeofit    Show source code of this tool
+## Example
+python staticweb.py https://example.com --link --threads 5
+"""
+    console.print(Markdown(help_text))
+def show_code():
+    try:
+        with open(__file__, "r", encoding="utf-8") as f:
+            code = f.read()
+        syntax = Syntax(code, "python", theme="monokai", line_numbers=True)
+        console.print(syntax)
+    except Exception as e:
+        console.print(f"[red][ERROR] Cannot read source: {e}[/red]")
+def output_result(results, args):
+    console.print("\n[bold cyan]=== RESULTS ===[/bold cyan]\n")
+    for url, result in results.items():
+        console.print(Panel(f"[bold yellow]{url}[/bold yellow]", expand=False))
+        if isinstance(result, str):
+            console.print(f"[red]{result}[/red]")
+            continue
+        if args.json:
+            console.print_json(json.dumps(result))
+            continue
+        if args.link:
+            table = Table(title="Links", show_lines=True)
+            table.add_column("Text", style="cyan", overflow="fold")
+            table.add_column("Href", style="green", overflow="fold")
+            for item in result:
+                table.add_row(item["text"], str(item["href"]))
+            console.print(table)
+        else:
+            for i, item in enumerate(result, 1):
+                console.print(f"[green]{i}.[/green] {item}")
+    if args.save:
+        try:
+            with open(args.save, "w", encoding="utf-8") as f:
+                json.dump(results, f, indent=2)
+            console.print(f"\n[bold green][+] Saved to {args.save}[/bold green]")
+        except Exception as e:
+            console.print(f"[red][ERROR] Save failed: {e}[/red]")
+def main():
+    parser = argparse.ArgumentParser(add_help=False)
+    parser.add_argument("urls", nargs="*", help="One or more URLs")
+    parser.add_argument("--link", action="store_true")
+    parser.add_argument("--para", action="store_true")
+    parser.add_argument("--all", action="store_true")
+    parser.add_argument("--tag", type=str)
+    parser.add_argument("--json", action="store_true")
+    parser.add_argument("--save", type=str)
+    parser.add_argument("--threads", type=int, default=1)
+    parser.add_argument("--insecure", action="store_true")
+    parser.add_argument("--help", action="store_true")
+    parser.add_argument("--codeofit", action="store_true")
+    args = parser.parse_args()
+    if args.help:
+        show_help()
+        sys.exit(0)
+    if args.codeofit:
+        show_code()
+        sys.exit(0)
+    if not args.urls:
+        console.print("[red]No URL provided. Use --help[/red]")
+        sys.exit(1)
+    console.print(Panel("[bold magenta]⚡ StaticWeb Scraper Starting...[/bold magenta]"))
+    results = {}
+    with Progress(
+        SpinnerColumn(style="bold magenta"),
+        "[progress.description]{task.description}",
+        BarColumn(bar_width=40),
+        "[progress.percentage]{task.percentage:>3.0f}%",
+        TimeRemainingColumn(),
+        console=console,
+    ) as progress:
+        task = progress.add_task("[cyan]Scraping URLs...", total=len(args.urls))
+        with ThreadPoolExecutor(max_workers=args.threads) as executor:
+            futures = {executor.submit(process_url, url, args): url for url in args.urls}
+            for future in as_completed(futures):
+                url = futures[future]
+                result = future.result()
+                results[url] = result
+                progress.update(task, advance=1)
+                time.sleep(0.02)
+    output_result(results, args)
+def run():
+    main()

staticweb-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,8 @@
+Metadata-Version: 2.4
+Name: staticweb
+Version: 0.1.0
+Summary: A fast CLI static web scraper
+Author: Shiwam Kumar
+Requires-Dist: requests
+Requires-Dist: beautifulsoup4
+Requires-Dist: rich

staticweb-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+staticweb/__init__.py,sha256=KvrRCfAMKwZz7BDtebzzc-_1VyM-inYE-OFc7DIX0WU,19
+staticweb/cli.py,sha256=r888efjVNEmZnESPgJkbgPZOuJgETqRwbgncU8FsGl0,5637
+staticweb-0.1.0.dist-info/METADATA,sha256=TXuUVzylEcxzxehn6AmmCR3ZUg737N74k0Wz9V8jBc8,187
+staticweb-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+staticweb-0.1.0.dist-info/entry_points.txt,sha256=Lzz08NMLX3-TrUwgLhEaHDG02sRRZXHgrOcc-WXtvIM,41
+staticweb-0.1.0.dist-info/top_level.txt,sha256=W0nceJfb8idUeZDMitfOrJe1aAppQA-_vIIEAq5DVdg,10
+staticweb-0.1.0.dist-info/RECORD,,

staticweb-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

staticweb-0.1.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ sw = staticweb.cli:run

staticweb-0.1.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ staticweb