spiderforce4ai 1.7__py3-none-any.whl → 1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -35,21 +35,38 @@ def extract_metadata_headers(markdown: str) -> str:
35
35
  }
36
36
 
37
37
  # First pass - collect metadata
38
- for line in lines:
39
- if line.strip().startswith('title:'):
40
- metadata['title'] = line.split(':', 1)[1].strip()
41
- elif line.strip().startswith('description:'):
42
- metadata['description'] = line.split(':', 1)[1].strip()
43
- elif line.strip().startswith('canonical_url:'):
44
- metadata['canonical_url'] = line.split(':', 1)[1].strip()
45
- elif line.strip().startswith('language:'):
46
- metadata['language'] = line.split(':', 1)[1].strip()
38
+ for i, line in enumerate(lines):
39
+ # Check for metadata block boundaries
40
+ if line.strip() == '---':
41
+ if not in_metadata:
42
+ in_metadata = True
43
+ continue
44
+ else:
45
+ in_metadata = False
46
+ break
47
+
48
+ # Extract metadata within the block
49
+ if in_metadata:
50
+ if ':' in line:
51
+ key, value = line.split(':', 1)
52
+ key = key.strip().lower()
53
+ value = value.strip()
54
+
55
+ if key == 'title':
56
+ metadata['title'] = value
57
+ elif key == 'description':
58
+ metadata['description'] = value
59
+ elif key == 'canonical_url':
60
+ metadata['canonical_url'] = value
61
+ elif key == 'language':
62
+ metadata['language'] = value
47
63
 
48
- # Add formatted metadata section
64
+ # Add formatted metadata section with URL first
65
+ extracted.append(f"URL: {metadata.get('url', '')}")
49
66
  extracted.append(f"Title: {metadata['title']}")
50
67
  extracted.append(f"Description: {metadata['description']}")
51
68
  extracted.append(f"CanonicalUrl: {metadata['canonical_url']}")
52
- extracted.append(f"Language: {metadata['language']}")
69
+ extracted.append(f"Language: {metadata['language'] or 'en'}") # Default to 'en' if not specified
53
70
  extracted.append("") # Empty line after metadata
54
71
 
55
72
  # Second pass - process headers
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 1.7
3
+ Version: 1.8
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -0,0 +1,5 @@
1
+ spiderforce4ai/__init__.py,sha256=GaW2zVESi1pi13pD0Dky4g0Yuj9hEj7_4eP_eyoBnWM,35425
2
+ spiderforce4ai-1.8.dist-info/METADATA,sha256=T1K4wWbagvh0ZW_vsYNAAhSAqRH7bLDOF6lr7Yy1pfg,7183
3
+ spiderforce4ai-1.8.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
4
+ spiderforce4ai-1.8.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
5
+ spiderforce4ai-1.8.dist-info/RECORD,,
@@ -1,5 +0,0 @@
1
- spiderforce4ai/__init__.py,sha256=7YpJdZfmy4z5wUFGTBsvi5VOxGGX594oVul3Q5Ngdko,34906
2
- spiderforce4ai-1.7.dist-info/METADATA,sha256=ON-lQ4BARmNOrHwT2Xbl2oc1hoo8FyMQWxl6T0LbClA,7183
3
- spiderforce4ai-1.7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
4
- spiderforce4ai-1.7.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
5
- spiderforce4ai-1.7.dist-info/RECORD,,