ultralytics-actions 0.0.45__py3-none-any.whl → 0.0.47__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- actions/__init__.py +1 -1
- actions/utils/common_utils.py +40 -34
- {ultralytics_actions-0.0.45.dist-info → ultralytics_actions-0.0.47.dist-info}/METADATA +1 -1
- {ultralytics_actions-0.0.45.dist-info → ultralytics_actions-0.0.47.dist-info}/RECORD +8 -8
- {ultralytics_actions-0.0.45.dist-info → ultralytics_actions-0.0.47.dist-info}/LICENSE +0 -0
- {ultralytics_actions-0.0.45.dist-info → ultralytics_actions-0.0.47.dist-info}/WHEEL +0 -0
- {ultralytics_actions-0.0.45.dist-info → ultralytics_actions-0.0.47.dist-info}/entry_points.txt +0 -0
- {ultralytics_actions-0.0.45.dist-info → ultralytics_actions-0.0.47.dist-info}/top_level.txt +0 -0
actions/__init__.py
CHANGED
actions/utils/common_utils.py
CHANGED
@@ -23,22 +23,8 @@ REQUESTS_HEADERS = {
|
|
23
23
|
"Origin": "https://www.google.com/",
|
24
24
|
}
|
25
25
|
|
26
|
-
|
27
|
-
|
28
|
-
"""Removes HTML comments from a string using regex pattern matching."""
|
29
|
-
return re.sub(r"<!--.*?-->", "", body, flags=re.DOTALL).strip()
|
30
|
-
|
31
|
-
|
32
|
-
def clean_url(url):
|
33
|
-
"""Remove extra characters from URL strings."""
|
34
|
-
for _ in range(3):
|
35
|
-
url = str(url).strip('"').strip("'").rstrip(".,:;!?`\\").replace(".git@main", "").replace("git+", "")
|
36
|
-
return url
|
37
|
-
|
38
|
-
|
39
|
-
def is_url(url, session=None, check=True, max_attempts=3, timeout=2):
|
40
|
-
"""Check if string is URL and optionally verify it exists."""
|
41
|
-
allow_list = (
|
26
|
+
URL_IGNORE_LIST = frozenset(
|
27
|
+
{
|
42
28
|
"localhost",
|
43
29
|
"127.0.0",
|
44
30
|
":5000",
|
@@ -56,10 +42,39 @@ def is_url(url, session=None, check=True, max_attempts=3, timeout=2):
|
|
56
42
|
"twitter.com",
|
57
43
|
"x.com",
|
58
44
|
"storage.googleapis.com", # private GCS buckets
|
59
|
-
|
45
|
+
}
|
46
|
+
)
|
47
|
+
|
48
|
+
URL_PATTERN = re.compile(
|
49
|
+
r"\[([^]]+)]\(([^)]+)\)" # Matches Markdown links [text](url)
|
50
|
+
r"|"
|
51
|
+
r"(" # Start capturing group for plaintext URLs
|
52
|
+
r"(?:https?://)?" # Optional http:// or https://
|
53
|
+
r"(?:www\.)?" # Optional www.
|
54
|
+
r"(?:[\w.-]+)?" # Optional domain name and subdomains
|
55
|
+
r"\.[a-zA-Z]{2,}" # TLD
|
56
|
+
r"(?:/[^\s\"')\]]*)?" # Optional path
|
57
|
+
r")"
|
58
|
+
)
|
59
|
+
|
60
|
+
|
61
|
+
def remove_html_comments(body: str) -> str:
|
62
|
+
"""Removes HTML comments from a string using regex pattern matching."""
|
63
|
+
return re.sub(r"<!--.*?-->", "", body, flags=re.DOTALL).strip()
|
64
|
+
|
65
|
+
|
66
|
+
def clean_url(url):
|
67
|
+
"""Remove extra characters from URL strings."""
|
68
|
+
for _ in range(3):
|
69
|
+
url = str(url).strip('"').strip("'").rstrip(".,:;!?`\\").replace(".git@main", "").replace("git+", "")
|
70
|
+
return url
|
71
|
+
|
72
|
+
|
73
|
+
def is_url(url, session=None, check=True, max_attempts=3, timeout=2):
|
74
|
+
"""Check if string is URL and optionally verify it exists."""
|
60
75
|
try:
|
61
76
|
# Check allow list
|
62
|
-
if any(x in url for x in
|
77
|
+
if any(x in url for x in URL_IGNORE_LIST):
|
63
78
|
return True
|
64
79
|
|
65
80
|
# Check structure
|
@@ -69,7 +84,7 @@ def is_url(url, session=None, check=True, max_attempts=3, timeout=2):
|
|
69
84
|
return False
|
70
85
|
|
71
86
|
if check:
|
72
|
-
requester = session
|
87
|
+
requester = session or requests
|
73
88
|
bad_codes = {404, 410, 500, 502, 503, 504}
|
74
89
|
kwargs = {"timeout": timeout, "allow_redirects": True}
|
75
90
|
if not session:
|
@@ -94,26 +109,13 @@ def is_url(url, session=None, check=True, max_attempts=3, timeout=2):
|
|
94
109
|
|
95
110
|
def check_links_in_string(text, verbose=True, return_bad=False):
|
96
111
|
"""Process a given text, find unique URLs within it, and check for any 404 errors."""
|
97
|
-
pattern = (
|
98
|
-
r"\[([^\]]+)\]\(([^)]+)\)" # Matches Markdown links [text](url)
|
99
|
-
r"|"
|
100
|
-
r"(" # Start capturing group for plaintext URLs
|
101
|
-
r"(?:https?://)?" # Optional http:// or https://
|
102
|
-
r"(?:www\.)?" # Optional www.
|
103
|
-
r"(?:[\w.-]+)?" # Optional domain name and subdomains
|
104
|
-
r"\.[a-zA-Z]{2,}" # TLD
|
105
|
-
r"(?:/[^\s\"')\]]*)?" # Optional path
|
106
|
-
r")"
|
107
|
-
)
|
108
|
-
# all_urls.extend([url for url in match if url and parse.urlparse(url).scheme])
|
109
112
|
all_urls = []
|
110
|
-
for md_text, md_url, plain_url in
|
113
|
+
for md_text, md_url, plain_url in URL_PATTERN.findall(text):
|
111
114
|
url = md_url or plain_url
|
112
115
|
if url and parse.urlparse(url).scheme:
|
113
116
|
all_urls.append(url)
|
114
117
|
|
115
118
|
urls = set(map(clean_url, all_urls)) # remove extra characters and make unique
|
116
|
-
# bad_urls = [x for x in urls if not is_url(x, check=True)] # single-thread
|
117
119
|
with requests.Session() as session, ThreadPoolExecutor(max_workers=16) as executor:
|
118
120
|
session.headers.update(REQUESTS_HEADERS)
|
119
121
|
bad_urls = [url for url, valid in zip(urls, executor.map(lambda x: not is_url(x, session), urls)) if valid]
|
@@ -126,4 +128,8 @@ def check_links_in_string(text, verbose=True, return_bad=False):
|
|
126
128
|
|
127
129
|
|
128
130
|
if __name__ == "__main__":
|
129
|
-
|
131
|
+
url = "https://ultralytics.com/images/bus.jpg"
|
132
|
+
string = f"This is a string with a [Markdown link]({url}) inside it."
|
133
|
+
|
134
|
+
print(f"is_url(): {is_url(url)}")
|
135
|
+
print(f"check_links_in_string(): {check_links_in_string(string)}")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: ultralytics-actions
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.47
|
4
4
|
Summary: Ultralytics Actions for GitHub automation and PR management.
|
5
5
|
Author-email: Glenn Jocher <glenn.jocher@ultralytics.com>
|
6
6
|
Maintainer-email: Ultralytics <hello@ultralytics.com>
|
@@ -1,15 +1,15 @@
|
|
1
|
-
actions/__init__.py,sha256=
|
1
|
+
actions/__init__.py,sha256=A3KLreATd7sSdPwF5hJy-yAjrcE0rtjVdaXz_g0_Yg0,742
|
2
2
|
actions/first_interaction.py,sha256=Oosw3GlXObF4RxYJG8haIjKXSV4MAx_uPXfMdRKY5qA,17851
|
3
3
|
actions/summarize_pr.py,sha256=Pfcm6ArfZfcMme48FcCGNmMwrFFlsG0czifhXHDKAlY,11166
|
4
4
|
actions/summarize_release.py,sha256=2D1IIeS4xrQNEJER3HItm5u9XvTL8hwGX_jWD2S8q1Y,8455
|
5
5
|
actions/update_markdown_code_blocks.py,sha256=DN6rrDw2VHXUTetrrg1SHlYDco9iaGOfQBKFORqbCBI,6362
|
6
6
|
actions/utils/__init__.py,sha256=WStdEAYROVnF0nubEOmrFLrejkRiMXIefA5O1ckfcFs,476
|
7
|
-
actions/utils/common_utils.py,sha256=
|
7
|
+
actions/utils/common_utils.py,sha256=8Au56sY6toCv76WJ-8jbmW94BgE_2VXevQbt3XKDyVc,4826
|
8
8
|
actions/utils/github_utils.py,sha256=0h0Hz2tgUta61Ymn9YggRXBZ7aZdF5krKnX7Tj9jqRU,7068
|
9
9
|
actions/utils/openai_utils.py,sha256=U7DjxTdFGdhmWSE4_KIg1yPQdtrfG4GkbNZCgMw4_1Q,1822
|
10
|
-
ultralytics_actions-0.0.
|
11
|
-
ultralytics_actions-0.0.
|
12
|
-
ultralytics_actions-0.0.
|
13
|
-
ultralytics_actions-0.0.
|
14
|
-
ultralytics_actions-0.0.
|
15
|
-
ultralytics_actions-0.0.
|
10
|
+
ultralytics_actions-0.0.47.dist-info/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
11
|
+
ultralytics_actions-0.0.47.dist-info/METADATA,sha256=xcYxNklkUgrpYsvP5HkQRl21H_vIiQsQWmLW_YxRjeo,10561
|
12
|
+
ultralytics_actions-0.0.47.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
13
|
+
ultralytics_actions-0.0.47.dist-info/entry_points.txt,sha256=GowvOFplj0C7JmsjbKcbpgLpdf2r921pcaOQkAHWZRA,378
|
14
|
+
ultralytics_actions-0.0.47.dist-info/top_level.txt,sha256=5apM5x80QlJcGbACn1v3fkmIuL1-XQCKcItJre7w7Tw,8
|
15
|
+
ultralytics_actions-0.0.47.dist-info/RECORD,,
|
File without changes
|
File without changes
|
{ultralytics_actions-0.0.45.dist-info → ultralytics_actions-0.0.47.dist-info}/entry_points.txt
RENAMED
File without changes
|
File without changes
|