xhs-note-extractor 0.1.dev8__tar.gz → 0.1.dev10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/PKG-INFO +1 -1
  2. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/examples/basic_usage.py +1 -1
  3. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/pyproject.toml +1 -1
  4. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/uv.lock +1 -1
  5. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/extractor.py +19 -3
  6. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor.egg-info/PKG-INFO +1 -1
  7. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/.gitignore +0 -0
  8. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/.joycode/prompt.json +0 -0
  9. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/LICENSE +0 -0
  10. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/MANIFEST.in +0 -0
  11. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/QUICK_START.md +0 -0
  12. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/README.md +0 -0
  13. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/examples/advanced_usage.py +0 -0
  14. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/scripts/build.sh +0 -0
  15. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/scripts/publish.sh +0 -0
  16. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/setup.cfg +0 -0
  17. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/test_cli.py +0 -0
  18. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/tests/simple_test.py +0 -0
  19. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/tests/test_extractor.py +0 -0
  20. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/DEVICE_RETRY_GUIDE.md +0 -0
  21. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/__init__.py +0 -0
  22. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/_version.py +0 -0
  23. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/cli.py +0 -0
  24. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/date_desc_utils.py +0 -0
  25. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/number_utils.py +0 -0
  26. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/test_device_retry.py +0 -0
  27. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/test_initialization_fix.py +0 -0
  28. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/utils.py +0 -0
  29. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor.egg-info/SOURCES.txt +0 -0
  30. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor.egg-info/dependency_links.txt +0 -0
  31. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor.egg-info/entry_points.txt +0 -0
  32. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor.egg-info/requires.txt +0 -0
  33. {xhs_note_extractor-0.1.dev8 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xhs-note-extractor
3
- Version: 0.1.dev8
3
+ Version: 0.1.dev10
4
4
  Summary: A Python package for extracting Xiaohongshu (Little Red Book) note data from URLs
5
5
  Author-email: JoyCode Agent <agent@joycode.com>
6
6
  License: MIT
@@ -31,7 +31,7 @@ def main():
31
31
 
32
32
  # 示例1: 提取单个笔记
33
33
  print("\n=== 示例1: 提取单个笔记 ===")
34
- note_url = "https://www.xiaohongshu.com/explore/6965d2be000000000d009811?xsec_token=ABSsTDqkPo7quKqkrecMaoJuId5a3mfvDLB3OpahVNmlU=&xsec_source=pc_search&source=unknown " # 替换为实际的笔记URL
34
+ note_url = "https://www.xiaohongshu.com/explore/6960e673000000000a02857b?xsec_token=ABTv9s4ROv84q7M5ugj9bBctaS8LN3_BOhYsv1hlkAYoQ=&xsec_source=pc_search&source=unknown" # 替换为实际的笔记URL
35
35
 
36
36
  try:
37
37
  note_data = extractor.extract_note_data(note_url)
@@ -31,7 +31,7 @@ dependencies = [
31
31
  "uiautomator2>=2.16.17",
32
32
  "requests>=2.25.0",
33
33
  ]
34
- version = "0.1.dev8"
34
+ version = "0.1.dev10"
35
35
 
36
36
  [project.optional-dependencies]
37
37
  dev = [
@@ -816,7 +816,7 @@ wheels = [
816
816
 
817
817
  [[package]]
818
818
  name = "xhs-note-extractor"
819
- version = "0.1.dev8"
819
+ version = "0.1.dev9"
820
820
  source = { editable = "." }
821
821
  dependencies = [
822
822
  { name = "requests" },
@@ -464,11 +464,27 @@ class XHSNoteExtractor:
464
464
  if not detail_loaded:
465
465
  logger.warning("⚠ 警告:详情页特征未发现,提取可能不完整")
466
466
 
467
- # 向下滚动一点,确保发布时间可见
467
+ # 向下滚动直到看到评论区标志
468
468
  try:
469
469
  logger.info("📜 向下滚动以显示发布时间...")
470
- self.device.swipe(540, 1500, 540, 1000, 0.3) # 从下往上滑动
471
- time.sleep(0.5) # 等待滚动完成
470
+ max_scrolls = 20 # 最多滚动5次
471
+ comment_section_found = False
472
+
473
+ for scroll_attempt in range(max_scrolls):
474
+ # 检查是否已经看到评论区标志
475
+ xml_check = self.device.dump_hierarchy()
476
+ if re.search(r'共\s*\d+\s*条评论', xml_check) and re.search(r'说点什么', xml_check):
477
+ logger.info(f"✓ 找到评论区标志,停止滚动 (滚动{scroll_attempt}次)")
478
+ comment_section_found = True
479
+ break
480
+
481
+ # 继续滚动
482
+ self.device.swipe(540, 1500, 540, 1000, 0.3)
483
+ time.sleep(0.3)
484
+
485
+ if not comment_section_found:
486
+ logger.warning(f"⚠ 滚动{max_scrolls}次后仍未找到评论区标志")
487
+
472
488
  except Exception as e:
473
489
  logger.warning(f"滚动失败: {e}")
474
490
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xhs-note-extractor
3
- Version: 0.1.dev8
3
+ Version: 0.1.dev10
4
4
  Summary: A Python package for extracting Xiaohongshu (Little Red Book) note data from URLs
5
5
  Author-email: JoyCode Agent <agent@joycode.com>
6
6
  License: MIT