ultralytics-actions 0.0.45__tar.gz → 0.0.46__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/PKG-INFO +1 -1
- {ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/actions/__init__.py +1 -1
- {ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/actions/utils/common_utils.py +40 -34
- {ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/tests/test_urls.py +16 -0
- {ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/ultralytics_actions.egg-info/PKG-INFO +1 -1
- {ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/LICENSE +0 -0
- {ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/README.md +0 -0
- {ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/actions/first_interaction.py +0 -0
- {ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/actions/summarize_pr.py +0 -0
- {ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/actions/summarize_release.py +0 -0
- {ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/actions/update_markdown_code_blocks.py +0 -0
- {ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/actions/utils/__init__.py +0 -0
- {ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/actions/utils/github_utils.py +0 -0
- {ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/actions/utils/openai_utils.py +0 -0
- {ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/pyproject.toml +0 -0
- {ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/setup.cfg +0 -0
- {ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/ultralytics_actions.egg-info/SOURCES.txt +0 -0
- {ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/ultralytics_actions.egg-info/dependency_links.txt +0 -0
- {ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/ultralytics_actions.egg-info/entry_points.txt +0 -0
- {ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/ultralytics_actions.egg-info/requires.txt +0 -0
- {ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/ultralytics_actions.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: ultralytics-actions
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.46
|
4
4
|
Summary: Ultralytics Actions for GitHub automation and PR management.
|
5
5
|
Author-email: Glenn Jocher <glenn.jocher@ultralytics.com>
|
6
6
|
Maintainer-email: Ultralytics <hello@ultralytics.com>
|
@@ -23,22 +23,8 @@ REQUESTS_HEADERS = {
|
|
23
23
|
"Origin": "https://www.google.com/",
|
24
24
|
}
|
25
25
|
|
26
|
-
|
27
|
-
|
28
|
-
"""Removes HTML comments from a string using regex pattern matching."""
|
29
|
-
return re.sub(r"<!--.*?-->", "", body, flags=re.DOTALL).strip()
|
30
|
-
|
31
|
-
|
32
|
-
def clean_url(url):
|
33
|
-
"""Remove extra characters from URL strings."""
|
34
|
-
for _ in range(3):
|
35
|
-
url = str(url).strip('"').strip("'").rstrip(".,:;!?`\\").replace(".git@main", "").replace("git+", "")
|
36
|
-
return url
|
37
|
-
|
38
|
-
|
39
|
-
def is_url(url, session=None, check=True, max_attempts=3, timeout=2):
|
40
|
-
"""Check if string is URL and optionally verify it exists."""
|
41
|
-
allow_list = (
|
26
|
+
URL_IGNORE_LIST = frozenset(
|
27
|
+
{
|
42
28
|
"localhost",
|
43
29
|
"127.0.0",
|
44
30
|
":5000",
|
@@ -56,10 +42,39 @@ def is_url(url, session=None, check=True, max_attempts=3, timeout=2):
|
|
56
42
|
"twitter.com",
|
57
43
|
"x.com",
|
58
44
|
"storage.googleapis.com", # private GCS buckets
|
59
|
-
|
45
|
+
}
|
46
|
+
)
|
47
|
+
|
48
|
+
URL_PATTERN = re.compile(
|
49
|
+
r"\[([^]]+)]\((.*?)(?=\)(?:\s|$))\)" # Markdown links with lookahead for space/end
|
50
|
+
r"|"
|
51
|
+
r"(" # Start capturing group for plaintext URLs
|
52
|
+
r"(?:https?://)?" # Optional http:// or https://
|
53
|
+
r"(?:www\.)?" # Optional www.
|
54
|
+
r"(?:[\w.-]+)?" # Optional domain name and subdomains
|
55
|
+
r"\.[a-zA-Z]{2,}" # TLD
|
56
|
+
r"(?:/[^\s\"'\]]*)?" # Optional path
|
57
|
+
r")"
|
58
|
+
)
|
59
|
+
|
60
|
+
|
61
|
+
def remove_html_comments(body: str) -> str:
|
62
|
+
"""Removes HTML comments from a string using regex pattern matching."""
|
63
|
+
return re.sub(r"<!--.*?-->", "", body, flags=re.DOTALL).strip()
|
64
|
+
|
65
|
+
|
66
|
+
def clean_url(url):
|
67
|
+
"""Remove extra characters from URL strings."""
|
68
|
+
for _ in range(3):
|
69
|
+
url = str(url).strip('"').strip("'").rstrip(".,:;!?`\\").replace(".git@main", "").replace("git+", "")
|
70
|
+
return url
|
71
|
+
|
72
|
+
|
73
|
+
def is_url(url, session=None, check=True, max_attempts=3, timeout=2):
|
74
|
+
"""Check if string is URL and optionally verify it exists."""
|
60
75
|
try:
|
61
76
|
# Check allow list
|
62
|
-
if any(x in url for x in
|
77
|
+
if any(x in url for x in URL_IGNORE_LIST):
|
63
78
|
return True
|
64
79
|
|
65
80
|
# Check structure
|
@@ -69,7 +84,7 @@ def is_url(url, session=None, check=True, max_attempts=3, timeout=2):
|
|
69
84
|
return False
|
70
85
|
|
71
86
|
if check:
|
72
|
-
requester = session
|
87
|
+
requester = session or requests
|
73
88
|
bad_codes = {404, 410, 500, 502, 503, 504}
|
74
89
|
kwargs = {"timeout": timeout, "allow_redirects": True}
|
75
90
|
if not session:
|
@@ -94,26 +109,13 @@ def is_url(url, session=None, check=True, max_attempts=3, timeout=2):
|
|
94
109
|
|
95
110
|
def check_links_in_string(text, verbose=True, return_bad=False):
|
96
111
|
"""Process a given text, find unique URLs within it, and check for any 404 errors."""
|
97
|
-
pattern = (
|
98
|
-
r"\[([^\]]+)\]\(([^)]+)\)" # Matches Markdown links [text](url)
|
99
|
-
r"|"
|
100
|
-
r"(" # Start capturing group for plaintext URLs
|
101
|
-
r"(?:https?://)?" # Optional http:// or https://
|
102
|
-
r"(?:www\.)?" # Optional www.
|
103
|
-
r"(?:[\w.-]+)?" # Optional domain name and subdomains
|
104
|
-
r"\.[a-zA-Z]{2,}" # TLD
|
105
|
-
r"(?:/[^\s\"')\]]*)?" # Optional path
|
106
|
-
r")"
|
107
|
-
)
|
108
|
-
# all_urls.extend([url for url in match if url and parse.urlparse(url).scheme])
|
109
112
|
all_urls = []
|
110
|
-
for md_text, md_url, plain_url in
|
113
|
+
for md_text, md_url, plain_url in URL_PATTERN.findall(text):
|
111
114
|
url = md_url or plain_url
|
112
115
|
if url and parse.urlparse(url).scheme:
|
113
116
|
all_urls.append(url)
|
114
117
|
|
115
118
|
urls = set(map(clean_url, all_urls)) # remove extra characters and make unique
|
116
|
-
# bad_urls = [x for x in urls if not is_url(x, check=True)] # single-thread
|
117
119
|
with requests.Session() as session, ThreadPoolExecutor(max_workers=16) as executor:
|
118
120
|
session.headers.update(REQUESTS_HEADERS)
|
119
121
|
bad_urls = [url for url, valid in zip(urls, executor.map(lambda x: not is_url(x, session), urls)) if valid]
|
@@ -126,4 +128,8 @@ def check_links_in_string(text, verbose=True, return_bad=False):
|
|
126
128
|
|
127
129
|
|
128
130
|
if __name__ == "__main__":
|
129
|
-
|
131
|
+
url = "https://ultralytics.com/images/bus.jpg"
|
132
|
+
string = f"This is a string with a [Markdown link]({url}) inside it."
|
133
|
+
|
134
|
+
print(f"is_url(): {is_url(url)}")
|
135
|
+
print(f"check_links_in_string(): {check_links_in_string(string)}")
|
@@ -23,6 +23,7 @@ URLS = [
|
|
23
23
|
"https://www.reddit.com/r/Ultralytics/comments/1fw3605/release_megathread/",
|
24
24
|
"https://www.kaggle.com/models/ultralytics/yolo11",
|
25
25
|
"https://apps.apple.com/xk/app/ultralytics/id1583935240",
|
26
|
+
"https://en.wikipedia.org/wiki/Active_learning_(machine_learning)", # parentheses in link
|
26
27
|
]
|
27
28
|
|
28
29
|
|
@@ -38,6 +39,21 @@ def test_is_url():
|
|
38
39
|
assert is_url(url), f"URL check failed: {url}"
|
39
40
|
|
40
41
|
|
42
|
+
def test_links_in_string_func():
|
43
|
+
"""Test URLs in strings function."""
|
44
|
+
assert check_links_in_string(" abc ".join(url for url in URLS))
|
45
|
+
|
46
|
+
|
47
|
+
def test_markdown_links_in_string_func():
|
48
|
+
"""Test Markdown links in strings function."""
|
49
|
+
assert check_links_in_string(" abc ".join(f"[text]({url})" for url in URLS))
|
50
|
+
|
51
|
+
|
52
|
+
def test_html_links_in_string_func():
|
53
|
+
"""Test HTML links in strings function."""
|
54
|
+
assert check_links_in_string(" abc ".join(f'<a href="{url}">text</a>' for url in URLS))
|
55
|
+
|
56
|
+
|
41
57
|
def test_html_links(verbose):
|
42
58
|
"""Tests the validity of URLs within HTML anchor tags and returns any invalid URLs found."""
|
43
59
|
text = "Visit <a href='https://err.com'>our site</a> or <a href=\"http://test.org\">test site</a>"
|
{ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/ultralytics_actions.egg-info/PKG-INFO
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: ultralytics-actions
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.46
|
4
4
|
Summary: Ultralytics Actions for GitHub automation and PR management.
|
5
5
|
Author-email: Glenn Jocher <glenn.jocher@ultralytics.com>
|
6
6
|
Maintainer-email: Ultralytics <hello@ultralytics.com>
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/actions/update_markdown_code_blocks.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/ultralytics_actions.egg-info/SOURCES.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
{ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/ultralytics_actions.egg-info/requires.txt
RENAMED
File without changes
|
{ultralytics_actions-0.0.45 → ultralytics_actions-0.0.46}/ultralytics_actions.egg-info/top_level.txt
RENAMED
File without changes
|