toolslm 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toolslm/__init__.py +1 -1
- toolslm/download.py +5 -7
- {toolslm-0.1.3.dist-info → toolslm-0.2.0.dist-info}/METADATA +1 -4
- toolslm-0.2.0.dist-info/RECORD +13 -0
- toolslm-0.1.3.dist-info/RECORD +0 -13
- {toolslm-0.1.3.dist-info → toolslm-0.2.0.dist-info}/WHEEL +0 -0
- {toolslm-0.1.3.dist-info → toolslm-0.2.0.dist-info}/entry_points.txt +0 -0
- {toolslm-0.1.3.dist-info → toolslm-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {toolslm-0.1.3.dist-info → toolslm-0.2.0.dist-info}/top_level.txt +0 -0
toolslm/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.2.0"
|
toolslm/download.py
CHANGED
|
@@ -7,10 +7,6 @@ __all__ = ['clean_md', 'read_md', 'html2md', 'read_html', 'get_llmstxt', 'split_
|
|
|
7
7
|
from fastcore.utils import *
|
|
8
8
|
from httpx import get
|
|
9
9
|
from fastcore.meta import delegates
|
|
10
|
-
from llms_txt import *
|
|
11
|
-
|
|
12
|
-
from html2text import HTML2Text
|
|
13
|
-
from bs4 import BeautifulSoup
|
|
14
10
|
from urllib.parse import urlparse, urljoin
|
|
15
11
|
|
|
16
12
|
# %% ../03_download.ipynb 4
|
|
@@ -29,7 +25,8 @@ def read_md(url, rm_comments=True, rm_details=True, **kwargs):
|
|
|
29
25
|
# %% ../03_download.ipynb 7
|
|
30
26
|
def html2md(s:str, ignore_links=True):
|
|
31
27
|
"Convert `s` from HTML to markdown"
|
|
32
|
-
|
|
28
|
+
import html2text
|
|
29
|
+
o = html2text.HTML2Text(bodywidth=5000)
|
|
33
30
|
o.ignore_links = ignore_links
|
|
34
31
|
o.mark_code = True
|
|
35
32
|
o.ignore_images = True
|
|
@@ -47,6 +44,7 @@ def read_html(url, # URL to read
|
|
|
47
44
|
"Get `url`, optionally selecting CSS selector `sel`, and convert to clean markdown"
|
|
48
45
|
page = get(url).text
|
|
49
46
|
if sel:
|
|
47
|
+
from bs4 import BeautifulSoup
|
|
50
48
|
soup = BeautifulSoup(page, 'html.parser')
|
|
51
49
|
if multi:
|
|
52
50
|
page = [str(el) for el in soup.select(sel)]
|
|
@@ -56,14 +54,14 @@ def read_html(url, # URL to read
|
|
|
56
54
|
if wrap_tag: return '\n'.join([f"\n<{wrap_tag}>\n{o}</{wrap_tag}>\n" for o in mds])
|
|
57
55
|
else: return'\n'.join(mds)
|
|
58
56
|
|
|
59
|
-
|
|
60
57
|
# %% ../03_download.ipynb 13
|
|
61
58
|
def get_llmstxt(url, optional=False, n_workers=None):
|
|
62
59
|
"Get llms.txt file from and expand it with `llms_txt.create_ctx()`"
|
|
63
60
|
if not url.endswith('llms.txt'): return None
|
|
61
|
+
import llms_txt
|
|
64
62
|
resp = get(url)
|
|
65
63
|
if resp.status_code!=200: return None
|
|
66
|
-
return create_ctx(resp.text, optional=optional, n_workers=n_workers)
|
|
64
|
+
return llms_txt.create_ctx(resp.text, optional=optional, n_workers=n_workers)
|
|
67
65
|
|
|
68
66
|
# %% ../03_download.ipynb 15
|
|
69
67
|
def split_url(url):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: toolslm
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Tools to make language models a bit easier to use
|
|
5
5
|
Home-page: https://github.com/AnswerDotAI/toolslm
|
|
6
6
|
Author: Jeremy Howard
|
|
@@ -17,10 +17,7 @@ Requires-Python: >=3.9
|
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
|
19
19
|
Requires-Dist: fastcore>=1.5.47
|
|
20
|
-
Requires-Dist: beautifulsoup4
|
|
21
|
-
Requires-Dist: html2text
|
|
22
20
|
Requires-Dist: httpx
|
|
23
|
-
Requires-Dist: llms_txt
|
|
24
21
|
Provides-Extra: dev
|
|
25
22
|
Dynamic: author
|
|
26
23
|
Dynamic: author-email
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
toolslm/__init__.py,sha256=Zn1KFblwuFHiDRdRAiRnDBRkbPttWh44jKa5zG2ov0E,22
|
|
2
|
+
toolslm/_modidx.py,sha256=-D-B5o30VGs11gBKf96lpADVXnZhdiVEshJpLzmUnDs,4378
|
|
3
|
+
toolslm/download.py,sha256=Ak7xzzCplU4RFW2cvUiuvhnL0agQqwF4lVH2hQhNNmc,4476
|
|
4
|
+
toolslm/funccall.py,sha256=bzVlcTpgQuuBIG-I1HRidnXV5mp0FfGV10_Mk1DSQuc,8460
|
|
5
|
+
toolslm/md_hier.py,sha256=4uC12443tPBduYJgIZZIcEat2VG0x7JYC8-SwDdS2JY,6360
|
|
6
|
+
toolslm/shell.py,sha256=GVqfL74NHw66zzZ7jvGVLjE55ZNJGBPvEb8kLz4aoYc,1576
|
|
7
|
+
toolslm/xml.py,sha256=QNwUavoMkFK84D7dMwnBjqlYJwN-pJ7u3BxOeDuNAmk,4088
|
|
8
|
+
toolslm-0.2.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
9
|
+
toolslm-0.2.0.dist-info/METADATA,sha256=L2bwDiXXiMvKYeR8SSApCVBF7YrwwOE4Nbry4DJfMOk,2404
|
|
10
|
+
toolslm-0.2.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
11
|
+
toolslm-0.2.0.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
|
|
12
|
+
toolslm-0.2.0.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
|
|
13
|
+
toolslm-0.2.0.dist-info/RECORD,,
|
toolslm-0.1.3.dist-info/RECORD
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
toolslm/__init__.py,sha256=XEqb2aiIn8fzGE68Mph4ck1FtQqsR_am0wRWvrYPffQ,22
|
|
2
|
-
toolslm/_modidx.py,sha256=-D-B5o30VGs11gBKf96lpADVXnZhdiVEshJpLzmUnDs,4378
|
|
3
|
-
toolslm/download.py,sha256=d84O8PCX7uAbLg0HTbNNfCdANPcnhXFu4qzOtcfJfHU,4465
|
|
4
|
-
toolslm/funccall.py,sha256=bzVlcTpgQuuBIG-I1HRidnXV5mp0FfGV10_Mk1DSQuc,8460
|
|
5
|
-
toolslm/md_hier.py,sha256=4uC12443tPBduYJgIZZIcEat2VG0x7JYC8-SwDdS2JY,6360
|
|
6
|
-
toolslm/shell.py,sha256=GVqfL74NHw66zzZ7jvGVLjE55ZNJGBPvEb8kLz4aoYc,1576
|
|
7
|
-
toolslm/xml.py,sha256=QNwUavoMkFK84D7dMwnBjqlYJwN-pJ7u3BxOeDuNAmk,4088
|
|
8
|
-
toolslm-0.1.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
9
|
-
toolslm-0.1.3.dist-info/METADATA,sha256=8MXaP61NyBqYuRyFe3IC49I0TYcZdZHniN17MJWHxB0,2483
|
|
10
|
-
toolslm-0.1.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
11
|
-
toolslm-0.1.3.dist-info/entry_points.txt,sha256=xFz0Eymlo5X7BGpaO6DI9gMxvN5A7faebzrlr8ctp5I,95
|
|
12
|
-
toolslm-0.1.3.dist-info/top_level.txt,sha256=4hRTrFWayz_Kz5221XjvlpCwVFrW3WPi1P0fllkTq9s,8
|
|
13
|
-
toolslm-0.1.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|