spiderforce4ai 1.6__py3-none-any.whl → 1.7__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -23,26 +23,55 @@ from multiprocessing import Pool
23
23
  console = Console()
24
24
 
25
25
  def extract_metadata_headers(markdown: str) -> str:
26
- """Extract metadata and headers from markdown content."""
26
+ """Extract metadata and headers from markdown content with SEO formatting."""
27
27
  lines = markdown.split('\n')
28
28
  extracted = []
29
29
  in_metadata = False
30
+ metadata = {
31
+ 'title': '',
32
+ 'description': '',
33
+ 'canonical_url': '',
34
+ 'language': ''
35
+ }
30
36
 
37
+ # First pass - collect metadata
38
+ for line in lines:
39
+ if line.strip().startswith('title:'):
40
+ metadata['title'] = line.split(':', 1)[1].strip()
41
+ elif line.strip().startswith('description:'):
42
+ metadata['description'] = line.split(':', 1)[1].strip()
43
+ elif line.strip().startswith('canonical_url:'):
44
+ metadata['canonical_url'] = line.split(':', 1)[1].strip()
45
+ elif line.strip().startswith('language:'):
46
+ metadata['language'] = line.split(':', 1)[1].strip()
47
+
48
+ # Add formatted metadata section
49
+ extracted.append(f"Title: {metadata['title']}")
50
+ extracted.append(f"Description: {metadata['description']}")
51
+ extracted.append(f"CanonicalUrl: {metadata['canonical_url']}")
52
+ extracted.append(f"Language: {metadata['language']}")
53
+ extracted.append("") # Empty line after metadata
54
+
55
+ # Second pass - process headers
31
56
  for line in lines:
32
- # Check for metadata block
33
- if line.strip() == '---':
34
- in_metadata = not in_metadata
35
- extracted.append(line)
36
- continue
37
-
38
- # Include metadata
39
- if in_metadata:
40
- extracted.append(line)
41
- continue
42
-
43
- # Include headers (lines starting with #)
44
57
  if line.strip().startswith('#'):
45
- extracted.append(line)
58
+ # Count the number of # symbols
59
+ level = len(line) - len(line.lstrip('#'))
60
+ text = line.lstrip('#').strip()
61
+
62
+ # Format header according to level
63
+ if level == 1:
64
+ extracted.append(f"H1: {text}")
65
+ elif level == 2:
66
+ extracted.append(f"H2: {text}")
67
+ elif level == 3:
68
+ extracted.append(f"H3: {text}")
69
+ elif level == 4:
70
+ extracted.append(f"H4: {text}")
71
+ elif level == 5:
72
+ extracted.append(f"H5: {text}")
73
+ elif level == 6:
74
+ extracted.append(f"H6: {text}")
46
75
 
47
76
  return '\n'.join(extracted)
48
77
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 1.6
3
+ Version: 1.7
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -0,0 +1,5 @@
1
+ spiderforce4ai/__init__.py,sha256=7YpJdZfmy4z5wUFGTBsvi5VOxGGX594oVul3Q5Ngdko,34906
2
+ spiderforce4ai-1.7.dist-info/METADATA,sha256=ON-lQ4BARmNOrHwT2Xbl2oc1hoo8FyMQWxl6T0LbClA,7183
3
+ spiderforce4ai-1.7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
4
+ spiderforce4ai-1.7.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
5
+ spiderforce4ai-1.7.dist-info/RECORD,,
@@ -1,5 +0,0 @@
1
- spiderforce4ai/__init__.py,sha256=yNyBz8-HjENlAZ1NSy072Ir71T68xulTRj19Yxr1aEQ,33573
2
- spiderforce4ai-1.6.dist-info/METADATA,sha256=pelYvJYMzC8W-P2ORQNWwP2Fyc5KshnzQ6edoYEYZQU,7183
3
- spiderforce4ai-1.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
4
- spiderforce4ai-1.6.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
5
- spiderforce4ai-1.6.dist-info/RECORD,,