xhs-note-extractor 0.1.dev8__tar.gz → 0.1.dev10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/PKG-INFO +1 -1
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/examples/basic_usage.py +1 -1
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/pyproject.toml +1 -1
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/uv.lock +1 -1
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/extractor.py +19 -3
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor.egg-info/PKG-INFO +1 -1
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/.gitignore +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/.joycode/prompt.json +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/LICENSE +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/MANIFEST.in +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/QUICK_START.md +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/README.md +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/examples/advanced_usage.py +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/scripts/build.sh +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/scripts/publish.sh +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/setup.cfg +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/test_cli.py +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/tests/simple_test.py +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/tests/test_extractor.py +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/DEVICE_RETRY_GUIDE.md +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/__init__.py +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/_version.py +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/cli.py +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/date_desc_utils.py +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/number_utils.py +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/test_device_retry.py +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/test_initialization_fix.py +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/utils.py +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor.egg-info/SOURCES.txt +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor.egg-info/dependency_links.txt +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor.egg-info/entry_points.txt +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor.egg-info/requires.txt +0 -0
- {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor.egg-info/top_level.txt +0 -0
|
@@ -31,7 +31,7 @@ def main():
|
|
|
31
31
|
|
|
32
32
|
# 示例1: 提取单个笔记
|
|
33
33
|
print("\n=== 示例1: 提取单个笔记 ===")
|
|
34
|
-
note_url = "https://www.xiaohongshu.com/explore/
|
|
34
|
+
note_url = "https://www.xiaohongshu.com/explore/6960e673000000000a02857b?xsec_token=ABTv9s4ROv84q7M5ugj9bBctaS8LN3_BOhYsv1hlkAYoQ=&xsec_source=pc_search&source=unknown" # 替换为实际的笔记URL
|
|
35
35
|
|
|
36
36
|
try:
|
|
37
37
|
note_data = extractor.extract_note_data(note_url)
|
{xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/extractor.py
RENAMED
|
@@ -464,11 +464,27 @@ class XHSNoteExtractor:
|
|
|
464
464
|
if not detail_loaded:
|
|
465
465
|
logger.warning("⚠ 警告:详情页特征未发现,提取可能不完整")
|
|
466
466
|
|
|
467
|
-
#
|
|
467
|
+
# 向下滚动直到看到评论区标志
|
|
468
468
|
try:
|
|
469
469
|
logger.info("📜 向下滚动以显示发布时间...")
|
|
470
|
-
|
|
471
|
-
|
|
470
|
+
max_scrolls = 20 # 最多滚动5次
|
|
471
|
+
comment_section_found = False
|
|
472
|
+
|
|
473
|
+
for scroll_attempt in range(max_scrolls):
|
|
474
|
+
# 检查是否已经看到评论区标志
|
|
475
|
+
xml_check = self.device.dump_hierarchy()
|
|
476
|
+
if re.search(r'共\s*\d+\s*条评论', xml_check) and re.search(r'说点什么', xml_check):
|
|
477
|
+
logger.info(f"✓ 找到评论区标志,停止滚动 (滚动{scroll_attempt}次)")
|
|
478
|
+
comment_section_found = True
|
|
479
|
+
break
|
|
480
|
+
|
|
481
|
+
# 继续滚动
|
|
482
|
+
self.device.swipe(540, 1500, 540, 1000, 0.3)
|
|
483
|
+
time.sleep(0.3)
|
|
484
|
+
|
|
485
|
+
if not comment_section_found:
|
|
486
|
+
logger.warning(f"⚠ 滚动{max_scrolls}次后仍未找到评论区标志")
|
|
487
|
+
|
|
472
488
|
except Exception as e:
|
|
473
489
|
logger.warning(f"滚动失败: {e}")
|
|
474
490
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/date_desc_utils.py
RENAMED
|
File without changes
|
{xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/number_utils.py
RENAMED
|
File without changes
|
{xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/test_device_retry.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|