urlicon 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- urlicon/urlicon.py +14 -5
- {urlicon-0.1.0.dist-info → urlicon-0.2.1.dist-info}/METADATA +8 -7
- urlicon-0.2.1.dist-info/RECORD +6 -0
- urlicon/string_cache.py +0 -116
- urlicon-0.1.0.dist-info/RECORD +0 -7
- {urlicon-0.1.0.dist-info → urlicon-0.2.1.dist-info}/WHEEL +0 -0
- {urlicon-0.1.0.dist-info → urlicon-0.2.1.dist-info}/top_level.txt +0 -0
urlicon/urlicon.py
CHANGED
|
@@ -5,14 +5,14 @@ import urllib
|
|
|
5
5
|
import requests
|
|
6
6
|
from bs4 import BeautifulSoup
|
|
7
7
|
from dotenv import load_dotenv
|
|
8
|
+
from unforgettable import unforgettable
|
|
8
9
|
|
|
9
10
|
from urlicon import urls
|
|
10
|
-
from urlicon.string_cache import string_cache
|
|
11
11
|
|
|
12
12
|
load_dotenv()
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
cache =
|
|
14
|
+
SIMPLE_CACHE_ROOT_DIR = os.getenv("SIMPLE_CACHE_ROOT_DIR", None)
|
|
15
|
+
cache = unforgettable(cache_folder=SIMPLE_CACHE_ROOT_DIR)
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
def get_url_icon(url):
|
|
@@ -152,6 +152,15 @@ def requests_get(url):
|
|
|
152
152
|
if req.status_code != 200:
|
|
153
153
|
return None
|
|
154
154
|
|
|
155
|
-
code = req.
|
|
156
|
-
cache.set(
|
|
155
|
+
code = req.content
|
|
156
|
+
cache.set(content=code, cache_id=cache_prefix + url)
|
|
157
157
|
return code
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def is_file_binary(file_path: str) -> bool:
|
|
161
|
+
try:
|
|
162
|
+
with open(file_path, "r") as fp:
|
|
163
|
+
fp.read(16)
|
|
164
|
+
return False
|
|
165
|
+
except UnicodeDecodeError:
|
|
166
|
+
return True
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: urlicon
|
|
3
|
-
Version: 0.1
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: `URLicon` helps you to discover an possible icon from a URL.
|
|
5
5
|
Author-email: Cesar Cardoso <hello@cesarcardoso.cc>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -9,8 +9,9 @@ Description-Content-Type: text/markdown
|
|
|
9
9
|
Requires-Dist: bs4>=0.0.2
|
|
10
10
|
Requires-Dist: dotenv>=0.9.9
|
|
11
11
|
Requires-Dist: requests>=2.32.5
|
|
12
|
+
Requires-Dist: unforgettable
|
|
12
13
|
|
|
13
|
-
# URLicon - v0.1
|
|
14
|
+
# URLicon - v0.2.1
|
|
14
15
|
|
|
15
16
|
`URLicon` helps you to discover an possible icon from a URL.
|
|
16
17
|
|
|
@@ -42,19 +43,19 @@ print("icon:", icon_url)
|
|
|
42
43
|
|
|
43
44
|
### Caching
|
|
44
45
|
|
|
45
|
-
`URLicon` use
|
|
46
|
+
`URLicon` use the [unforgettable "cache"](https://github.com/bouli/unforgettable) to avoid unecessary URL requests.
|
|
46
47
|
It uses a [temp dir](https://docs.python.org/3/library/tempfile.html) for each
|
|
47
48
|
execution. But you can define a your own directory and use the cache as much as
|
|
48
|
-
you want setting `
|
|
49
|
+
you want setting `SIMPLE_CACHE_ROOT_DIR` env var.
|
|
49
50
|
|
|
50
51
|
```python
|
|
51
|
-
|
|
52
|
-
cache =
|
|
52
|
+
SIMPLE_CACHE_ROOT_DIR = os.getenv("SIMPLE_CACHE_ROOT_DIR", None)
|
|
53
|
+
cache = unforgettable(cache_folder=SIMPLE_CACHE_ROOT_DIR)
|
|
53
54
|
```
|
|
54
55
|
|
|
55
56
|
And you can clean the cache with:
|
|
56
57
|
```python
|
|
57
|
-
urlicon.
|
|
58
|
+
urlicon.unforgettable.clean()
|
|
58
59
|
```
|
|
59
60
|
|
|
60
61
|
## See Also
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
urlicon/urlicon.py,sha256=unEZeMvzLNxU4b4PIeQsLhuZEE_GRKruJbblvHZrd08,4263
|
|
2
|
+
urlicon/urls.py,sha256=ErdlgRva3bxs6HuBX2iHng21PqpWGIF1etZFToJq4jc,2235
|
|
3
|
+
urlicon-0.2.1.dist-info/METADATA,sha256=2ubWrjiZun1aJmfwGVMm_JekwEOWhw3dEYxafCEPSec,1788
|
|
4
|
+
urlicon-0.2.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
5
|
+
urlicon-0.2.1.dist-info/top_level.txt,sha256=Jts8QbeWp-6xANJ9KVj3CHk6L-8D950AQ_ZKXmF4YGI,8
|
|
6
|
+
urlicon-0.2.1.dist-info/RECORD,,
|
urlicon/string_cache.py
DELETED
|
@@ -1,116 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class string_cache:
|
|
5
|
-
cache_folder: str | None = None
|
|
6
|
-
cache_files_extension: str = "html"
|
|
7
|
-
|
|
8
|
-
def __init__(self, cache_folder: str | None = None):
|
|
9
|
-
if cache_folder is not None:
|
|
10
|
-
self.cache_folder = cache_folder
|
|
11
|
-
else:
|
|
12
|
-
self.cache_folder = self.get_cache_folder()
|
|
13
|
-
pass
|
|
14
|
-
|
|
15
|
-
def safe_cache_id(func):
|
|
16
|
-
def filter_cache_id(cache_id):
|
|
17
|
-
cache_id = cache_id.replace('"', "").replace("\\", "")
|
|
18
|
-
cache_id = f'"{cache_id}"'
|
|
19
|
-
return cache_id
|
|
20
|
-
|
|
21
|
-
def _filter_cache_id_func(*args, **kwargs):
|
|
22
|
-
if "cache_id" in args:
|
|
23
|
-
args["cache_id"] = filter_cache_id(cache_id=args["cache_id"])
|
|
24
|
-
|
|
25
|
-
if "cache_id" in kwargs:
|
|
26
|
-
kwargs["cache_id"] = filter_cache_id(cache_id=kwargs["cache_id"])
|
|
27
|
-
|
|
28
|
-
return func(*args, **kwargs)
|
|
29
|
-
|
|
30
|
-
return _filter_cache_id_func
|
|
31
|
-
|
|
32
|
-
@safe_cache_id
|
|
33
|
-
def set(self, text: str, cache_id: str):
|
|
34
|
-
cache_folder = self.get_cache_folder()
|
|
35
|
-
cache_index_file_path = self.get_cache_index_path()
|
|
36
|
-
|
|
37
|
-
cache_folder_files = os.listdir(cache_folder)
|
|
38
|
-
cached_file_index = self.get_index_from_file_index(_safe_cache_id=cache_id)
|
|
39
|
-
if cached_file_index is not None:
|
|
40
|
-
new_file_index = cached_file_index
|
|
41
|
-
else:
|
|
42
|
-
new_file_index = len(cache_folder_files)
|
|
43
|
-
with open(cache_index_file_path, "a") as cache_index_file_writer:
|
|
44
|
-
cache_index_file_writer.write(f"\n{new_file_index}: {cache_id}")
|
|
45
|
-
|
|
46
|
-
new_file_name = f"{new_file_index}.{self.cache_files_extension}"
|
|
47
|
-
new_file_path = os.path.join(cache_folder, new_file_name)
|
|
48
|
-
with open(new_file_path, "w+") as new_file_writer:
|
|
49
|
-
new_file_writer.write(text)
|
|
50
|
-
|
|
51
|
-
@safe_cache_id
|
|
52
|
-
def get(self, cache_id: str) -> str:
|
|
53
|
-
cached_file_index = self.get_index_from_file_index(_safe_cache_id=cache_id)
|
|
54
|
-
code = self.get_cached_file_by_index(cached_file_index=cached_file_index)
|
|
55
|
-
return code
|
|
56
|
-
|
|
57
|
-
def get_index_from_file_index(self, _safe_cache_id):
|
|
58
|
-
cache_index_file = self.get_cache_index_file()
|
|
59
|
-
if cache_index_file.find(_safe_cache_id) < 1:
|
|
60
|
-
return None
|
|
61
|
-
cache_index_file = cache_index_file[: cache_index_file.find(_safe_cache_id) - 2]
|
|
62
|
-
cached_file_index = int(cache_index_file.split("\n")[-1].strip())
|
|
63
|
-
return cached_file_index
|
|
64
|
-
|
|
65
|
-
def get_cache_index_path(
|
|
66
|
-
self,
|
|
67
|
-
) -> str:
|
|
68
|
-
cache_index_file_name = "cache_index.yaml"
|
|
69
|
-
cache_folder = self.get_cache_folder()
|
|
70
|
-
cache_index_file_path = os.path.join(cache_folder, cache_index_file_name)
|
|
71
|
-
|
|
72
|
-
if not os.path.exists(cache_index_file_path):
|
|
73
|
-
with open(cache_index_file_path, "w+") as cache_index_file_writer:
|
|
74
|
-
cache_index_file_writer.write(f"0: {cache_index_file_name}")
|
|
75
|
-
|
|
76
|
-
return cache_index_file_path
|
|
77
|
-
|
|
78
|
-
def get_cache_index_file(
|
|
79
|
-
self,
|
|
80
|
-
) -> str:
|
|
81
|
-
with open(self.get_cache_index_path(), "r") as f:
|
|
82
|
-
cache_index_file_content = f.read()
|
|
83
|
-
|
|
84
|
-
return cache_index_file_content
|
|
85
|
-
|
|
86
|
-
def get_cached_file_by_index(self, cached_file_index: int) -> str:
|
|
87
|
-
code = None
|
|
88
|
-
cache_folder = self.get_cache_folder()
|
|
89
|
-
cached_file_name = f"{cached_file_index}.{self.cache_files_extension}"
|
|
90
|
-
cached_file_path = os.path.join(cache_folder, cached_file_name)
|
|
91
|
-
if not os.path.exists(cached_file_path):
|
|
92
|
-
return None
|
|
93
|
-
with open(cached_file_path, "r") as cached_file_reader:
|
|
94
|
-
code = cached_file_reader.read()
|
|
95
|
-
return code
|
|
96
|
-
|
|
97
|
-
def clean(
|
|
98
|
-
self,
|
|
99
|
-
):
|
|
100
|
-
cache_folder = self.get_cache_folder()
|
|
101
|
-
cache_folder_files = os.listdir(cache_folder)
|
|
102
|
-
for file in cache_folder_files:
|
|
103
|
-
file_to_clean = os.path.join(cache_folder, file)
|
|
104
|
-
if os.path.exists(file_to_clean):
|
|
105
|
-
os.remove(file_to_clean)
|
|
106
|
-
|
|
107
|
-
def get_cache_folder(
|
|
108
|
-
self,
|
|
109
|
-
):
|
|
110
|
-
import tempfile
|
|
111
|
-
|
|
112
|
-
if self.cache_folder is not None:
|
|
113
|
-
return self.cache_folder
|
|
114
|
-
|
|
115
|
-
tmpdirname = tempfile.mkdtemp()
|
|
116
|
-
return tmpdirname
|
urlicon-0.1.0.dist-info/RECORD
DELETED
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
urlicon/string_cache.py,sha256=fY4ZtAdZC3PDfdotuHZmuSAcQxQLA4WyJJ6pmOUZZMc,4130
|
|
2
|
-
urlicon/urlicon.py,sha256=zcbevt29l2xbr14z9jFxz64TrvyS8O-fk29mCSQIoqI,4066
|
|
3
|
-
urlicon/urls.py,sha256=ErdlgRva3bxs6HuBX2iHng21PqpWGIF1etZFToJq4jc,2235
|
|
4
|
-
urlicon-0.1.0.dist-info/METADATA,sha256=1SEq8BOcKYjoGQhcFFrgQFscuJoe0Sv0wDcXyq3YBLw,1713
|
|
5
|
-
urlicon-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
6
|
-
urlicon-0.1.0.dist-info/top_level.txt,sha256=Jts8QbeWp-6xANJ9KVj3CHk6L-8D950AQ_ZKXmF4YGI,8
|
|
7
|
-
urlicon-0.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|