staticweb 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
staticweb/__init__.py ADDED
@@ -0,0 +1 @@
1
+ # staticweb package
staticweb/cli.py ADDED
@@ -0,0 +1,205 @@
1
+ import argparse
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import sys
5
+ import json
6
+ import time
7
+ from concurrent.futures import ThreadPoolExecutor, as_completed
8
+
9
+ from rich.progress import Progress, BarColumn, TimeRemainingColumn, SpinnerColumn
10
+ from rich.console import Console
11
+ from rich.table import Table
12
+ from rich.panel import Panel
13
+ from rich.syntax import Syntax
14
+ from rich.markdown import Markdown
15
+
16
+ console = Console()
17
+
18
+
19
+ def fetch_soup(url, insecure=False):
20
+ headers = {"User-Agent": "Mozilla/5.0"}
21
+ try:
22
+ res = requests.get(url, headers=headers, verify=not insecure, timeout=10)
23
+ res.raise_for_status()
24
+ return BeautifulSoup(res.text, "html.parser")
25
+ except Exception as e:
26
+ return f"[ERROR] {url} -> {e}"
27
+
28
+
29
+ def extract_links(soup):
30
+ return [{"text": a.get_text(strip=True), "href": a.get("href")} for a in soup.find_all("a")]
31
+
32
+
33
+ def extract_paragraphs(soup):
34
+ return [p.get_text(strip=True) for p in soup.find_all("p")]
35
+
36
+
37
+ def extract_all_text(soup):
38
+ return soup.get_text(separator="\n", strip=True)
39
+
40
+
41
+ def extract_tag(soup, tag):
42
+ return [el.get_text(strip=True) for el in soup.find_all(tag)]
43
+
44
+
45
+ def process_url(url, args):
46
+ soup = fetch_soup(url, args.insecure)
47
+
48
+ if isinstance(soup, str):
49
+ return soup
50
+
51
+ if args.link:
52
+ return extract_links(soup)
53
+ elif args.para:
54
+ return extract_paragraphs(soup)
55
+ elif args.all:
56
+ return extract_all_text(soup)
57
+ elif args.tag:
58
+ return extract_tag(soup, args.tag)
59
+ else:
60
+ return "[ERROR] No valid option"
61
+
62
+
63
+ def show_help():
64
+ help_text = """
65
+ # ⚡ StaticWeb CLI
66
+
67
+ A fast, multi-threaded static web scraper.
68
+
69
+ ## Usage
70
+ python staticweb.py <urls> [options]
71
+
72
+ ## Options
73
+ - --link Extract all links
74
+ - --para Extract paragraphs
75
+ - --all Extract full text
76
+ - --tag <tag> Extract specific HTML tag
77
+
78
+ ## Output
79
+ - --json Output in JSON format
80
+ - --save <file> Save output to file
81
+
82
+ ## Performance
83
+ - --threads N Number of threads (default: 1)
84
+ - --insecure Disable SSL verification
85
+
86
+ ## Dev Tools
87
+ - --codeofit Show source code of this tool
88
+
89
+ ## Example
90
+ python staticweb.py https://example.com --link --threads 5
91
+ """
92
+ console.print(Markdown(help_text))
93
+
94
+
95
+ def show_code():
96
+ try:
97
+ with open(__file__, "r", encoding="utf-8") as f:
98
+ code = f.read()
99
+ syntax = Syntax(code, "python", theme="monokai", line_numbers=True)
100
+ console.print(syntax)
101
+ except Exception as e:
102
+ console.print(f"[red][ERROR] Cannot read source: {e}[/red]")
103
+
104
+
105
+ def output_result(results, args):
106
+ console.print("\n[bold cyan]=== RESULTS ===[/bold cyan]\n")
107
+
108
+ for url, result in results.items():
109
+ console.print(Panel(f"[bold yellow]{url}[/bold yellow]", expand=False))
110
+
111
+ if isinstance(result, str):
112
+ console.print(f"[red]{result}[/red]")
113
+ continue
114
+
115
+ if args.json:
116
+ console.print_json(json.dumps(result))
117
+ continue
118
+
119
+ if args.link:
120
+ table = Table(title="Links", show_lines=True)
121
+ table.add_column("Text", style="cyan", overflow="fold")
122
+ table.add_column("Href", style="green", overflow="fold")
123
+
124
+ for item in result:
125
+ table.add_row(item["text"], str(item["href"]))
126
+
127
+ console.print(table)
128
+ else:
129
+ for i, item in enumerate(result, 1):
130
+ console.print(f"[green]{i}.[/green] {item}")
131
+
132
+ if args.save:
133
+ try:
134
+ with open(args.save, "w", encoding="utf-8") as f:
135
+ json.dump(results, f, indent=2)
136
+ console.print(f"\n[bold green][+] Saved to {args.save}[/bold green]")
137
+ except Exception as e:
138
+ console.print(f"[red][ERROR] Save failed: {e}[/red]")
139
+
140
+
141
+ def main():
142
+ parser = argparse.ArgumentParser(add_help=False)
143
+
144
+ parser.add_argument("urls", nargs="*", help="One or more URLs")
145
+
146
+ parser.add_argument("--link", action="store_true")
147
+ parser.add_argument("--para", action="store_true")
148
+ parser.add_argument("--all", action="store_true")
149
+ parser.add_argument("--tag", type=str)
150
+
151
+ parser.add_argument("--json", action="store_true")
152
+ parser.add_argument("--save", type=str)
153
+
154
+ parser.add_argument("--threads", type=int, default=1)
155
+ parser.add_argument("--insecure", action="store_true")
156
+
157
+ parser.add_argument("--help", action="store_true")
158
+ parser.add_argument("--codeofit", action="store_true")
159
+
160
+ args = parser.parse_args()
161
+
162
+ if args.help:
163
+ show_help()
164
+ sys.exit(0)
165
+
166
+ if args.codeofit:
167
+ show_code()
168
+ sys.exit(0)
169
+
170
+ if not args.urls:
171
+ console.print("[red]No URL provided. Use --help[/red]")
172
+ sys.exit(1)
173
+
174
+ console.print(Panel("[bold magenta]⚡ StaticWeb Scraper Starting...[/bold magenta]"))
175
+
176
+ results = {}
177
+
178
+ with Progress(
179
+ SpinnerColumn(style="bold magenta"),
180
+ "[progress.description]{task.description}",
181
+ BarColumn(bar_width=40),
182
+ "[progress.percentage]{task.percentage:>3.0f}%",
183
+ TimeRemainingColumn(),
184
+ console=console,
185
+ ) as progress:
186
+
187
+ task = progress.add_task("[cyan]Scraping URLs...", total=len(args.urls))
188
+
189
+ with ThreadPoolExecutor(max_workers=args.threads) as executor:
190
+ futures = {executor.submit(process_url, url, args): url for url in args.urls}
191
+
192
+ for future in as_completed(futures):
193
+ url = futures[future]
194
+ result = future.result()
195
+
196
+ results[url] = result
197
+
198
+ progress.update(task, advance=1)
199
+ time.sleep(0.02)
200
+
201
+ output_result(results, args)
202
+
203
+
204
+ def run():
205
+ main()
@@ -0,0 +1,8 @@
1
+ Metadata-Version: 2.4
2
+ Name: staticweb
3
+ Version: 0.1.0
4
+ Summary: A fast CLI static web scraper
5
+ Author: Shiwam Kumar
6
+ Requires-Dist: requests
7
+ Requires-Dist: beautifulsoup4
8
+ Requires-Dist: rich
@@ -0,0 +1,7 @@
1
+ staticweb/__init__.py,sha256=KvrRCfAMKwZz7BDtebzzc-_1VyM-inYE-OFc7DIX0WU,19
2
+ staticweb/cli.py,sha256=r888efjVNEmZnESPgJkbgPZOuJgETqRwbgncU8FsGl0,5637
3
+ staticweb-0.1.0.dist-info/METADATA,sha256=TXuUVzylEcxzxehn6AmmCR3ZUg737N74k0Wz9V8jBc8,187
4
+ staticweb-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
5
+ staticweb-0.1.0.dist-info/entry_points.txt,sha256=Lzz08NMLX3-TrUwgLhEaHDG02sRRZXHgrOcc-WXtvIM,41
6
+ staticweb-0.1.0.dist-info/top_level.txt,sha256=W0nceJfb8idUeZDMitfOrJe1aAppQA-_vIIEAq5DVdg,10
7
+ staticweb-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ sw = staticweb.cli:run
@@ -0,0 +1 @@
1
+ staticweb