xhs-note-extractor 0.1.dev9__tar.gz → 0.1.dev10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/PKG-INFO +1 -1
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/examples/basic_usage.py +1 -1
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/pyproject.toml +1 -1
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/uv.lock +1 -1
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/extractor.py +2 -2
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor.egg-info/PKG-INFO +1 -1
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/.gitignore +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/.joycode/prompt.json +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/LICENSE +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/MANIFEST.in +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/QUICK_START.md +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/README.md +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/examples/advanced_usage.py +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/scripts/build.sh +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/scripts/publish.sh +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/setup.cfg +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/test_cli.py +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/tests/simple_test.py +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/tests/test_extractor.py +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/DEVICE_RETRY_GUIDE.md +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/__init__.py +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/_version.py +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/cli.py +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/date_desc_utils.py +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/number_utils.py +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/test_device_retry.py +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/test_initialization_fix.py +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/utils.py +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor.egg-info/SOURCES.txt +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor.egg-info/dependency_links.txt +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor.egg-info/entry_points.txt +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor.egg-info/requires.txt +0 -0
- {xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor.egg-info/top_level.txt +0 -0
|
@@ -31,7 +31,7 @@ def main():
|
|
|
31
31
|
|
|
32
32
|
# 示例1: 提取单个笔记
|
|
33
33
|
print("\n=== 示例1: 提取单个笔记 ===")
|
|
34
|
-
note_url = "
|
|
34
|
+
note_url = "https://www.xiaohongshu.com/explore/6960e673000000000a02857b?xsec_token=ABTv9s4ROv84q7M5ugj9bBctaS8LN3_BOhYsv1hlkAYoQ=&xsec_source=pc_search&source=unknown" # 替换为实际的笔记URL
|
|
35
35
|
|
|
36
36
|
try:
|
|
37
37
|
note_data = extractor.extract_note_data(note_url)
|
{xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/extractor.py
RENAMED
|
@@ -467,13 +467,13 @@ class XHSNoteExtractor:
|
|
|
467
467
|
# 向下滚动直到看到评论区标志
|
|
468
468
|
try:
|
|
469
469
|
logger.info("📜 向下滚动以显示发布时间...")
|
|
470
|
-
max_scrolls =
|
|
470
|
+
max_scrolls = 20 # 最多滚动5次
|
|
471
471
|
comment_section_found = False
|
|
472
472
|
|
|
473
473
|
for scroll_attempt in range(max_scrolls):
|
|
474
474
|
# 检查是否已经看到评论区标志
|
|
475
475
|
xml_check = self.device.dump_hierarchy()
|
|
476
|
-
if re.search(r'共\s*\d+\s*条评论', xml_check):
|
|
476
|
+
if re.search(r'共\s*\d+\s*条评论', xml_check) and re.search(r'说点什么', xml_check):
|
|
477
477
|
logger.info(f"✓ 找到评论区标志,停止滚动 (滚动{scroll_attempt}次)")
|
|
478
478
|
comment_section_found = True
|
|
479
479
|
break
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/date_desc_utils.py
RENAMED
|
File without changes
|
{xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/number_utils.py
RENAMED
|
File without changes
|
{xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor/test_device_retry.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{xhs_note_extractor-0.1.dev9 → xhs_note_extractor-0.1.dev10}/xhs_note_extractor.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|