spiderforce4ai 1.6__py3-none-any.whl → 1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spiderforce4ai/__init__.py +43 -14
- {spiderforce4ai-1.6.dist-info → spiderforce4ai-1.7.dist-info}/METADATA +1 -1
- spiderforce4ai-1.7.dist-info/RECORD +5 -0
- spiderforce4ai-1.6.dist-info/RECORD +0 -5
- {spiderforce4ai-1.6.dist-info → spiderforce4ai-1.7.dist-info}/WHEEL +0 -0
- {spiderforce4ai-1.6.dist-info → spiderforce4ai-1.7.dist-info}/top_level.txt +0 -0
spiderforce4ai/__init__.py
CHANGED
@@ -23,26 +23,55 @@ from multiprocessing import Pool
|
|
23
23
|
console = Console()
|
24
24
|
|
25
25
|
def extract_metadata_headers(markdown: str) -> str:
|
26
|
-
"""Extract metadata and headers from markdown content."""
|
26
|
+
"""Extract metadata and headers from markdown content with SEO formatting."""
|
27
27
|
lines = markdown.split('\n')
|
28
28
|
extracted = []
|
29
29
|
in_metadata = False
|
30
|
+
metadata = {
|
31
|
+
'title': '',
|
32
|
+
'description': '',
|
33
|
+
'canonical_url': '',
|
34
|
+
'language': ''
|
35
|
+
}
|
30
36
|
|
37
|
+
# First pass - collect metadata
|
38
|
+
for line in lines:
|
39
|
+
if line.strip().startswith('title:'):
|
40
|
+
metadata['title'] = line.split(':', 1)[1].strip()
|
41
|
+
elif line.strip().startswith('description:'):
|
42
|
+
metadata['description'] = line.split(':', 1)[1].strip()
|
43
|
+
elif line.strip().startswith('canonical_url:'):
|
44
|
+
metadata['canonical_url'] = line.split(':', 1)[1].strip()
|
45
|
+
elif line.strip().startswith('language:'):
|
46
|
+
metadata['language'] = line.split(':', 1)[1].strip()
|
47
|
+
|
48
|
+
# Add formatted metadata section
|
49
|
+
extracted.append(f"Title: {metadata['title']}")
|
50
|
+
extracted.append(f"Description: {metadata['description']}")
|
51
|
+
extracted.append(f"CanonicalUrl: {metadata['canonical_url']}")
|
52
|
+
extracted.append(f"Language: {metadata['language']}")
|
53
|
+
extracted.append("") # Empty line after metadata
|
54
|
+
|
55
|
+
# Second pass - process headers
|
31
56
|
for line in lines:
|
32
|
-
# Check for metadata block
|
33
|
-
if line.strip() == '---':
|
34
|
-
in_metadata = not in_metadata
|
35
|
-
extracted.append(line)
|
36
|
-
continue
|
37
|
-
|
38
|
-
# Include metadata
|
39
|
-
if in_metadata:
|
40
|
-
extracted.append(line)
|
41
|
-
continue
|
42
|
-
|
43
|
-
# Include headers (lines starting with #)
|
44
57
|
if line.strip().startswith('#'):
|
45
|
-
|
58
|
+
# Count the number of # symbols
|
59
|
+
level = len(line) - len(line.lstrip('#'))
|
60
|
+
text = line.lstrip('#').strip()
|
61
|
+
|
62
|
+
# Format header according to level
|
63
|
+
if level == 1:
|
64
|
+
extracted.append(f"H1: {text}")
|
65
|
+
elif level == 2:
|
66
|
+
extracted.append(f"H2: {text}")
|
67
|
+
elif level == 3:
|
68
|
+
extracted.append(f"H3: {text}")
|
69
|
+
elif level == 4:
|
70
|
+
extracted.append(f"H4: {text}")
|
71
|
+
elif level == 5:
|
72
|
+
extracted.append(f"H5: {text}")
|
73
|
+
elif level == 6:
|
74
|
+
extracted.append(f"H6: {text}")
|
46
75
|
|
47
76
|
return '\n'.join(extracted)
|
48
77
|
|
@@ -0,0 +1,5 @@
|
|
1
|
+
spiderforce4ai/__init__.py,sha256=7YpJdZfmy4z5wUFGTBsvi5VOxGGX594oVul3Q5Ngdko,34906
|
2
|
+
spiderforce4ai-1.7.dist-info/METADATA,sha256=ON-lQ4BARmNOrHwT2Xbl2oc1hoo8FyMQWxl6T0LbClA,7183
|
3
|
+
spiderforce4ai-1.7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
4
|
+
spiderforce4ai-1.7.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
5
|
+
spiderforce4ai-1.7.dist-info/RECORD,,
|
@@ -1,5 +0,0 @@
|
|
1
|
-
spiderforce4ai/__init__.py,sha256=yNyBz8-HjENlAZ1NSy072Ir71T68xulTRj19Yxr1aEQ,33573
|
2
|
-
spiderforce4ai-1.6.dist-info/METADATA,sha256=pelYvJYMzC8W-P2ORQNWwP2Fyc5KshnzQ6edoYEYZQU,7183
|
3
|
-
spiderforce4ai-1.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
4
|
-
spiderforce4ai-1.6.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
5
|
-
spiderforce4ai-1.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|