spiderforce4ai 1.6__py3-none-any.whl → 1.7__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- spiderforce4ai/__init__.py +43 -14
- {spiderforce4ai-1.6.dist-info → spiderforce4ai-1.7.dist-info}/METADATA +1 -1
- spiderforce4ai-1.7.dist-info/RECORD +5 -0
- spiderforce4ai-1.6.dist-info/RECORD +0 -5
- {spiderforce4ai-1.6.dist-info → spiderforce4ai-1.7.dist-info}/WHEEL +0 -0
- {spiderforce4ai-1.6.dist-info → spiderforce4ai-1.7.dist-info}/top_level.txt +0 -0
spiderforce4ai/__init__.py
CHANGED
@@ -23,26 +23,55 @@ from multiprocessing import Pool
|
|
23
23
|
console = Console()
|
24
24
|
|
25
25
|
def extract_metadata_headers(markdown: str) -> str:
|
26
|
-
"""Extract metadata and headers from markdown content."""
|
26
|
+
"""Extract metadata and headers from markdown content with SEO formatting."""
|
27
27
|
lines = markdown.split('\n')
|
28
28
|
extracted = []
|
29
29
|
in_metadata = False
|
30
|
+
metadata = {
|
31
|
+
'title': '',
|
32
|
+
'description': '',
|
33
|
+
'canonical_url': '',
|
34
|
+
'language': ''
|
35
|
+
}
|
30
36
|
|
37
|
+
# First pass - collect metadata
|
38
|
+
for line in lines:
|
39
|
+
if line.strip().startswith('title:'):
|
40
|
+
metadata['title'] = line.split(':', 1)[1].strip()
|
41
|
+
elif line.strip().startswith('description:'):
|
42
|
+
metadata['description'] = line.split(':', 1)[1].strip()
|
43
|
+
elif line.strip().startswith('canonical_url:'):
|
44
|
+
metadata['canonical_url'] = line.split(':', 1)[1].strip()
|
45
|
+
elif line.strip().startswith('language:'):
|
46
|
+
metadata['language'] = line.split(':', 1)[1].strip()
|
47
|
+
|
48
|
+
# Add formatted metadata section
|
49
|
+
extracted.append(f"Title: {metadata['title']}")
|
50
|
+
extracted.append(f"Description: {metadata['description']}")
|
51
|
+
extracted.append(f"CanonicalUrl: {metadata['canonical_url']}")
|
52
|
+
extracted.append(f"Language: {metadata['language']}")
|
53
|
+
extracted.append("") # Empty line after metadata
|
54
|
+
|
55
|
+
# Second pass - process headers
|
31
56
|
for line in lines:
|
32
|
-
# Check for metadata block
|
33
|
-
if line.strip() == '---':
|
34
|
-
in_metadata = not in_metadata
|
35
|
-
extracted.append(line)
|
36
|
-
continue
|
37
|
-
|
38
|
-
# Include metadata
|
39
|
-
if in_metadata:
|
40
|
-
extracted.append(line)
|
41
|
-
continue
|
42
|
-
|
43
|
-
# Include headers (lines starting with #)
|
44
57
|
if line.strip().startswith('#'):
|
45
|
-
|
58
|
+
# Count the number of # symbols
|
59
|
+
level = len(line) - len(line.lstrip('#'))
|
60
|
+
text = line.lstrip('#').strip()
|
61
|
+
|
62
|
+
# Format header according to level
|
63
|
+
if level == 1:
|
64
|
+
extracted.append(f"H1: {text}")
|
65
|
+
elif level == 2:
|
66
|
+
extracted.append(f"H2: {text}")
|
67
|
+
elif level == 3:
|
68
|
+
extracted.append(f"H3: {text}")
|
69
|
+
elif level == 4:
|
70
|
+
extracted.append(f"H4: {text}")
|
71
|
+
elif level == 5:
|
72
|
+
extracted.append(f"H5: {text}")
|
73
|
+
elif level == 6:
|
74
|
+
extracted.append(f"H6: {text}")
|
46
75
|
|
47
76
|
return '\n'.join(extracted)
|
48
77
|
|
@@ -0,0 +1,5 @@
|
|
1
|
+
spiderforce4ai/__init__.py,sha256=7YpJdZfmy4z5wUFGTBsvi5VOxGGX594oVul3Q5Ngdko,34906
|
2
|
+
spiderforce4ai-1.7.dist-info/METADATA,sha256=ON-lQ4BARmNOrHwT2Xbl2oc1hoo8FyMQWxl6T0LbClA,7183
|
3
|
+
spiderforce4ai-1.7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
4
|
+
spiderforce4ai-1.7.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
5
|
+
spiderforce4ai-1.7.dist-info/RECORD,,
|
@@ -1,5 +0,0 @@
|
|
1
|
-
spiderforce4ai/__init__.py,sha256=yNyBz8-HjENlAZ1NSy072Ir71T68xulTRj19Yxr1aEQ,33573
|
2
|
-
spiderforce4ai-1.6.dist-info/METADATA,sha256=pelYvJYMzC8W-P2ORQNWwP2Fyc5KshnzQ6edoYEYZQU,7183
|
3
|
-
spiderforce4ai-1.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
4
|
-
spiderforce4ai-1.6.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
5
|
-
spiderforce4ai-1.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|