spiderforce4ai 1.7__py3-none-any.whl → 1.8__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -35,21 +35,38 @@ def extract_metadata_headers(markdown: str) -> str:
35
35
  }
36
36
 
37
37
  # First pass - collect metadata
38
- for line in lines:
39
- if line.strip().startswith('title:'):
40
- metadata['title'] = line.split(':', 1)[1].strip()
41
- elif line.strip().startswith('description:'):
42
- metadata['description'] = line.split(':', 1)[1].strip()
43
- elif line.strip().startswith('canonical_url:'):
44
- metadata['canonical_url'] = line.split(':', 1)[1].strip()
45
- elif line.strip().startswith('language:'):
46
- metadata['language'] = line.split(':', 1)[1].strip()
38
+ for i, line in enumerate(lines):
39
+ # Check for metadata block boundaries
40
+ if line.strip() == '---':
41
+ if not in_metadata:
42
+ in_metadata = True
43
+ continue
44
+ else:
45
+ in_metadata = False
46
+ break
47
+
48
+ # Extract metadata within the block
49
+ if in_metadata:
50
+ if ':' in line:
51
+ key, value = line.split(':', 1)
52
+ key = key.strip().lower()
53
+ value = value.strip()
54
+
55
+ if key == 'title':
56
+ metadata['title'] = value
57
+ elif key == 'description':
58
+ metadata['description'] = value
59
+ elif key == 'canonical_url':
60
+ metadata['canonical_url'] = value
61
+ elif key == 'language':
62
+ metadata['language'] = value
47
63
 
48
- # Add formatted metadata section
64
+ # Add formatted metadata section with URL first
65
+ extracted.append(f"URL: {metadata.get('url', '')}")
49
66
  extracted.append(f"Title: {metadata['title']}")
50
67
  extracted.append(f"Description: {metadata['description']}")
51
68
  extracted.append(f"CanonicalUrl: {metadata['canonical_url']}")
52
- extracted.append(f"Language: {metadata['language']}")
69
+ extracted.append(f"Language: {metadata['language'] or 'en'}") # Default to 'en' if not specified
53
70
  extracted.append("") # Empty line after metadata
54
71
 
55
72
  # Second pass - process headers
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 1.7
3
+ Version: 1.8
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -0,0 +1,5 @@
1
+ spiderforce4ai/__init__.py,sha256=GaW2zVESi1pi13pD0Dky4g0Yuj9hEj7_4eP_eyoBnWM,35425
2
+ spiderforce4ai-1.8.dist-info/METADATA,sha256=T1K4wWbagvh0ZW_vsYNAAhSAqRH7bLDOF6lr7Yy1pfg,7183
3
+ spiderforce4ai-1.8.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
4
+ spiderforce4ai-1.8.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
5
+ spiderforce4ai-1.8.dist-info/RECORD,,
@@ -1,5 +0,0 @@
1
- spiderforce4ai/__init__.py,sha256=7YpJdZfmy4z5wUFGTBsvi5VOxGGX594oVul3Q5Ngdko,34906
2
- spiderforce4ai-1.7.dist-info/METADATA,sha256=ON-lQ4BARmNOrHwT2Xbl2oc1hoo8FyMQWxl6T0LbClA,7183
3
- spiderforce4ai-1.7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
4
- spiderforce4ai-1.7.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
5
- spiderforce4ai-1.7.dist-info/RECORD,,