xhs-note-extractor 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xhs_note_extractor-0.1.2/.gitignore +151 -0
- xhs_note_extractor-0.1.2/.joycode/prompt.json +1 -0
- xhs_note_extractor-0.1.2/LICENSE +21 -0
- xhs_note_extractor-0.1.2/MANIFEST.in +6 -0
- xhs_note_extractor-0.1.2/PKG-INFO +234 -0
- xhs_note_extractor-0.1.2/QUICK_START.md +98 -0
- xhs_note_extractor-0.1.2/README.md +198 -0
- xhs_note_extractor-0.1.2/examples/advanced_usage.py +164 -0
- xhs_note_extractor-0.1.2/examples/basic_usage.py +117 -0
- xhs_note_extractor-0.1.2/pyproject.toml +89 -0
- xhs_note_extractor-0.1.2/scripts/build.sh +28 -0
- xhs_note_extractor-0.1.2/scripts/publish.sh +71 -0
- xhs_note_extractor-0.1.2/setup.cfg +4 -0
- xhs_note_extractor-0.1.2/test_cli.py +64 -0
- xhs_note_extractor-0.1.2/tests/simple_test.py +78 -0
- xhs_note_extractor-0.1.2/tests/test_extractor.py +184 -0
- xhs_note_extractor-0.1.2/uv.lock +859 -0
- xhs_note_extractor-0.1.2/xhs_note_extractor/DEVICE_RETRY_GUIDE.md +98 -0
- xhs_note_extractor-0.1.2/xhs_note_extractor/__init__.py +50 -0
- xhs_note_extractor-0.1.2/xhs_note_extractor/_version.py +34 -0
- xhs_note_extractor-0.1.2/xhs_note_extractor/cli.py +98 -0
- xhs_note_extractor-0.1.2/xhs_note_extractor/date_desc_utils.py +80 -0
- xhs_note_extractor-0.1.2/xhs_note_extractor/extractor.py +793 -0
- xhs_note_extractor-0.1.2/xhs_note_extractor/number_utils.py +44 -0
- xhs_note_extractor-0.1.2/xhs_note_extractor/test_device_retry.py +100 -0
- xhs_note_extractor-0.1.2/xhs_note_extractor/test_initialization_fix.py +46 -0
- xhs_note_extractor-0.1.2/xhs_note_extractor/utils.py +493 -0
- xhs_note_extractor-0.1.2/xhs_note_extractor.egg-info/PKG-INFO +234 -0
- xhs_note_extractor-0.1.2/xhs_note_extractor.egg-info/SOURCES.txt +31 -0
- xhs_note_extractor-0.1.2/xhs_note_extractor.egg-info/dependency_links.txt +1 -0
- xhs_note_extractor-0.1.2/xhs_note_extractor.egg-info/entry_points.txt +2 -0
- xhs_note_extractor-0.1.2/xhs_note_extractor.egg-info/requires.txt +8 -0
- xhs_note_extractor-0.1.2/xhs_note_extractor.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
*.egg-info/
|
|
24
|
+
.installed.cfg
|
|
25
|
+
*.egg
|
|
26
|
+
MANIFEST
|
|
27
|
+
|
|
28
|
+
# PyInstaller
|
|
29
|
+
# Usually these files are written by a python script from a template
|
|
30
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
31
|
+
*.manifest
|
|
32
|
+
*.spec
|
|
33
|
+
|
|
34
|
+
# Installer logs
|
|
35
|
+
pip-log.txt
|
|
36
|
+
pip-delete-this-directory.txt
|
|
37
|
+
|
|
38
|
+
# Unit test / coverage reports
|
|
39
|
+
htmlcov/
|
|
40
|
+
.tox/
|
|
41
|
+
.nox/
|
|
42
|
+
.coverage
|
|
43
|
+
.coverage.*
|
|
44
|
+
.cache
|
|
45
|
+
nosetests.xml
|
|
46
|
+
coverage.xml
|
|
47
|
+
*.cover
|
|
48
|
+
*.py,cover
|
|
49
|
+
.hypothesis/
|
|
50
|
+
.pytest_cache/
|
|
51
|
+
|
|
52
|
+
# Translations
|
|
53
|
+
*.mo
|
|
54
|
+
*.pot
|
|
55
|
+
|
|
56
|
+
# Django stuff:
|
|
57
|
+
*.log
|
|
58
|
+
local_settings.py
|
|
59
|
+
db.sqlite3
|
|
60
|
+
db.sqlite3-journal
|
|
61
|
+
|
|
62
|
+
# Flask stuff:
|
|
63
|
+
instance/
|
|
64
|
+
.webassets-cache
|
|
65
|
+
|
|
66
|
+
# Scrapy stuff:
|
|
67
|
+
.scrapy
|
|
68
|
+
|
|
69
|
+
# Sphinx documentation
|
|
70
|
+
docs/_build/
|
|
71
|
+
|
|
72
|
+
# PyBuilder
|
|
73
|
+
target/
|
|
74
|
+
|
|
75
|
+
# Jupyter Notebook
|
|
76
|
+
.ipynb_checkpoints
|
|
77
|
+
|
|
78
|
+
# IPython
|
|
79
|
+
profile_default/
|
|
80
|
+
ipython_config.py
|
|
81
|
+
|
|
82
|
+
# pyenv
|
|
83
|
+
.python-version
|
|
84
|
+
|
|
85
|
+
# pipenv
|
|
86
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
87
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
88
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
89
|
+
# install all needed dependencies.
|
|
90
|
+
#Pipfile.lock
|
|
91
|
+
|
|
92
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
|
93
|
+
__pypackages__/
|
|
94
|
+
|
|
95
|
+
# Celery stuff
|
|
96
|
+
celerybeat-schedule
|
|
97
|
+
celerybeat.pid
|
|
98
|
+
|
|
99
|
+
# SageMath parsed files
|
|
100
|
+
*.sage.py
|
|
101
|
+
|
|
102
|
+
# Environments
|
|
103
|
+
.env
|
|
104
|
+
.venv
|
|
105
|
+
env/
|
|
106
|
+
venv/
|
|
107
|
+
ENV/
|
|
108
|
+
env.bak/
|
|
109
|
+
venv.bak/
|
|
110
|
+
|
|
111
|
+
# Spyder project settings
|
|
112
|
+
.spyderproject
|
|
113
|
+
.spyproject
|
|
114
|
+
|
|
115
|
+
# Rope project settings
|
|
116
|
+
.ropeproject
|
|
117
|
+
|
|
118
|
+
# mkdocs documentation
|
|
119
|
+
/site
|
|
120
|
+
|
|
121
|
+
# mypy
|
|
122
|
+
.mypy_cache/
|
|
123
|
+
.dmypy.json
|
|
124
|
+
dmypy.json
|
|
125
|
+
|
|
126
|
+
# Pyre type checker
|
|
127
|
+
.pyre/
|
|
128
|
+
|
|
129
|
+
# IDEs
|
|
130
|
+
.vscode/
|
|
131
|
+
.idea/
|
|
132
|
+
*.swp
|
|
133
|
+
*.swo
|
|
134
|
+
|
|
135
|
+
# OS
|
|
136
|
+
.DS_Store
|
|
137
|
+
.DS_Store?
|
|
138
|
+
._*
|
|
139
|
+
.Spotlight-V100
|
|
140
|
+
.Trashes
|
|
141
|
+
ehthumbs.db
|
|
142
|
+
Thumbs.db
|
|
143
|
+
|
|
144
|
+
# Logs
|
|
145
|
+
logs/
|
|
146
|
+
*.log
|
|
147
|
+
|
|
148
|
+
# Local development
|
|
149
|
+
local/
|
|
150
|
+
tmp/
|
|
151
|
+
temp/last_extracted_note.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
[{"label":"一键安装环境","name":"Install","description":"专注于解决工作空间环境问题","prompt":"你是一位专门从事解决工作空间环境问题的全栈工程师和DevOps专家,你的主要任务是帮助用户诊断、修复和配置当前工作空间`/Users/yehao20/android_scrawl_v3/xhs-note-extractor/`的开发环境。\n\n## 核心职责\n\n### 1. 环境检测与诊断\n- 自动扫描工作空间中的项目文件(package.json, requirements.txt, pom.xml, Gemfile, go.mod等)\n- 识别项目所需的运行环境和依赖\n- 检测当前系统已安装的环境版本\n- 分析环境配置冲突和兼容性问题\n\n### 2. 主流环境支持\n**Node.js生态系统:**\n- 检测和安装Node.js(如果用户没要求推荐LTS版本)\n- 配置npm/yarn/pnpm包管理器\n- 处理node_modules依赖问题\n- 解决版本冲突和权限问题\n\n**Python生态系统:**\n- 安装Python(2.x/3.x版本管理)\n- 配置pip包管理器和虚拟环境(venv/conda)\n- 处理requirements.txt依赖安装\n- 解决Python路径和模块导入问题\n\n**Java生态系统:**\n- 安装和配置JDK/JRE(版本选择和JAVA_HOME设置)\n- 配置Maven/Gradle构建工具\n- 处理依赖下载和仓库配置\n- 解决类路径和编译问题\n\n**其他主流环境:**\n- Go语言环境配置\n- Ruby和Rails环境\n- PHP和Composer\n- .NET Core环境\n- Docker容器化环境\n\n### 3. 项目启动与运行\n- 分析项目启动脚本和配置文件\n- 提供标准化的启动命令\n- 配置开发服务器和热重载\n- 设置环境变量和配置文件\n- 处理端口冲突和服务依赖\n\n### 4. 问题解决策略\n- 提供跨平台解决方案(Windows/macOS/Linux)\n- 给出详细的安装步骤和命令\n- 提供多种安装方式选择(官方安装器/包管理器/容器化)\n- 预防常见错误和最佳实践建议\n- 提供环境验证和测试方法\n\n### 5. 交互方式\n- 首先询问用户的操作系统和项目类型\n- 逐步引导用户完成环境配置\n- 提供可复制的命令和脚本\n- 在每个步骤后确认执行结果\n- 遇到问题时提供多种备选方案\n\n## 工作流程\n1. **环境扫描**:分析工作空间文件结构,识别项目类型\n2. **需求评估**:确定所需的运行环境和版本要求\n3. **现状检查**:检测当前已安装的环境和工具\n4. **差距分析**:对比需求与现状,列出缺失项\n5. **安装指导**:提供详细的安装和配置步骤\n6. **验证测试**:确保环境配置正确可用\n7. **项目启动**:协助用户成功启动项目\n\n请始终保持耐心和专业,用通俗易懂的语言解释技术概念,并在每个关键步骤提供清晰的指导。现在请开始分析当前工作空间的环境需求。\n"},{"label":"一键生成JoyCode Rules","name":"generateRules","description":"专注于生成JoyCode规则","prompt":"\n # 你是一位资深的代码架构分析专家,专门负责分析项目结构并生成 JoyCode Rules 配置文件。\n\n## 任务目标\n基于当前工作空间`/Users/yehao20/android_scrawl_v3/xhs-note-extractor/`的代码结构,生成一份完整结构清晰、可操作性强的 `[object Promise].md` Rules文件,Rules文件存放在`/Users/yehao20/android_scrawl_v3/xhs-note-extractor/`工作空间下.joycode文件夹下rules文件夹中,用于指导 AI 助手更好地理解和协助开发工作。\n\n## 分析维度\n请按以下维度深入分析当前项目:\n\n### 1. 项目概览\n- 项目类型(前端/后端/全栈/移动端等)\n- 主要技术栈和框架\n- 项目规模和复杂度\n\n### 2. 调用关系分析\n- 模块间依赖关系\n- API 调用链路\n- 数据流向\n- 关键组件交互模式\n\n### 3. 仓库结构总结\n- 目录组织架构\n- 文件命名规范\n- 配置文件分布\n- 资源文件管理\n\n### 4. 代码特征总结\n- 编程语言和版本\n- 代码风格和规范\n- 设计模式使用\n- 第三方库依赖\n\n## 输出要求\n生成的 `[object Promise].md` 文件必须包含:\n- 项目背景和技术栈说明\n- 代码风格和命名约定\n- 架构模式和最佳实践\n- 开发约束和注意事项\n- 常用命令和工作流程\n- 格式如下:\n`\n---\nglobs: *\nalwaysApply: true\n---\n\n# 项目规则要求如下\n\n规则(Rules)内容放在这里\n`\n\n## 约束条件\n- 仅分析当前工作空间内的文件\n- 不要包含敏感信息(如密钥、密码等)\n- 确保规则具体可执行,避免模糊描述\n- 生成的规则要便于 AI 助手理解和遵循\n\n\n请开始分析当前工作空间并生成对应的 JoyCode Rules 配置。\n"}]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 JoyCode Agent
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: xhs-note-extractor
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: A Python package for extracting Xiaohongshu (Little Red Book) note data from URLs
|
|
5
|
+
Author-email: JoyCode Agent <agent@joycode.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/yehao20/xhs-note-extractor
|
|
8
|
+
Project-URL: Repository, https://github.com/yehao20/xhs-note-extractor
|
|
9
|
+
Project-URL: Documentation, https://github.com/yehao20/xhs-note-extractor/blob/main/README.md
|
|
10
|
+
Project-URL: Issues, https://github.com/yehao20/xhs-note-extractor/issues
|
|
11
|
+
Keywords: xiaohongshu,little-red-book,web-scraping,automation,uiautomator
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
24
|
+
Classifier: Topic :: Utilities
|
|
25
|
+
Requires-Python: >=3.8
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Requires-Dist: uiautomator2>=2.16.17
|
|
29
|
+
Requires-Dist: requests>=2.25.0
|
|
30
|
+
Provides-Extra: dev
|
|
31
|
+
Requires-Dist: pytest>=6.0; extra == "dev"
|
|
32
|
+
Requires-Dist: pytest-cov>=2.0; extra == "dev"
|
|
33
|
+
Requires-Dist: black>=21.0; extra == "dev"
|
|
34
|
+
Requires-Dist: flake8>=3.8; extra == "dev"
|
|
35
|
+
Dynamic: license-file
|
|
36
|
+
|
|
37
|
+
# 小红书笔记提取器 (Xiaohongshu Note Extractor)
|
|
38
|
+
|
|
39
|
+
一个用于从小红书提取笔记数据的Python工具,支持命令行界面和编程接口。
|
|
40
|
+
|
|
41
|
+
## 功能特性
|
|
42
|
+
|
|
43
|
+
- 🔍 从小红书笔记URL提取详细数据
|
|
44
|
+
- 📊 支持JSON和CSV输出格式
|
|
45
|
+
- 🖥️ 命令行界面支持
|
|
46
|
+
- 🔧 可配置的设备连接选项
|
|
47
|
+
- 📱 Android设备集成(通过uiautomator2)
|
|
48
|
+
- 🛡️ 优雅的错误处理和设备状态检查
|
|
49
|
+
|
|
50
|
+
## 安装
|
|
51
|
+
|
|
52
|
+
### 从源码安装
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
# 克隆仓库
|
|
56
|
+
git clone <repository-url>
|
|
57
|
+
cd xhs-note-extractor
|
|
58
|
+
|
|
59
|
+
# 安装依赖
|
|
60
|
+
pip install -r requirements.txt
|
|
61
|
+
|
|
62
|
+
# 安装包(开发模式)
|
|
63
|
+
pip install -e .
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### 依赖要求
|
|
67
|
+
|
|
68
|
+
- Python 3.7+
|
|
69
|
+
- Android设备(用于完整功能)
|
|
70
|
+
- ADB工具
|
|
71
|
+
|
|
72
|
+
## 使用方法
|
|
73
|
+
|
|
74
|
+
### 命令行界面(CLI)
|
|
75
|
+
|
|
76
|
+
安装完成后,可以直接使用 `xhs-extract` 命令:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
# 提取笔记并输出到控制台(JSON格式)
|
|
80
|
+
xhs-extract https://www.xiaohongshu.com/explore/note_id
|
|
81
|
+
|
|
82
|
+
# 保存到文件
|
|
83
|
+
xhs-extract https://www.xiaohongshu.com/explore/note_id -o note_data.json
|
|
84
|
+
|
|
85
|
+
# 输出CSV格式
|
|
86
|
+
xhs-extract https://www.xiaohongshu.com/explore/note_id -f csv -o note_data.csv
|
|
87
|
+
|
|
88
|
+
# 启用详细输出模式
|
|
89
|
+
xhs-extract https://www.xiaohongshu.com/explore/note_id -v
|
|
90
|
+
|
|
91
|
+
# 查看帮助
|
|
92
|
+
xhs-extract --help
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### 编程接口
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
from xhs_note_extractor import XHSNoteExtractor
|
|
99
|
+
import json
|
|
100
|
+
|
|
101
|
+
# 创建提取器实例
|
|
102
|
+
extractor = XHSNoteExtractor()
|
|
103
|
+
|
|
104
|
+
# 检查设备连接状态
|
|
105
|
+
if extractor.is_device_connected():
|
|
106
|
+
# 提取笔记数据
|
|
107
|
+
note_data = extractor.extract_note_data("https://www.xiaohongshu.com/explore/note_id")
|
|
108
|
+
print(json.dumps(note_data, ensure_ascii=False, indent=2))
|
|
109
|
+
else:
|
|
110
|
+
print("请连接Android设备并启用USB调试")
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## 输出数据结构
|
|
114
|
+
|
|
115
|
+
提取的数据包含以下字段:
|
|
116
|
+
|
|
117
|
+
```json
|
|
118
|
+
{
|
|
119
|
+
"title": "笔记标题",
|
|
120
|
+
"content": "笔记完整内容",
|
|
121
|
+
"author": {
|
|
122
|
+
"nickname": "作者昵称",
|
|
123
|
+
"user_id": "用户ID"
|
|
124
|
+
},
|
|
125
|
+
"likes": 100,
|
|
126
|
+
"collects": 50,
|
|
127
|
+
"comments": 25,
|
|
128
|
+
"shares": 10,
|
|
129
|
+
"image_urls": [
|
|
130
|
+
"图片URL1",
|
|
131
|
+
"图片URL2"
|
|
132
|
+
],
|
|
133
|
+
"video_url": "视频URL(如果有)",
|
|
134
|
+
"tags": ["标签1", "标签2"],
|
|
135
|
+
"publish_time": "发布时间",
|
|
136
|
+
"note_id": "笔记ID"
|
|
137
|
+
}
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## 设备连接
|
|
141
|
+
|
|
142
|
+
### 连接Android设备
|
|
143
|
+
|
|
144
|
+
1. 在Android设备上启用**开发者选项**和**USB调试**
|
|
145
|
+
2. 通过USB连接设备到电脑
|
|
146
|
+
3. 授权USB调试权限(设备上会弹出提示)
|
|
147
|
+
|
|
148
|
+
### 检查设备状态
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
# 使用ADB检查设备
|
|
152
|
+
adb devices
|
|
153
|
+
|
|
154
|
+
# 使用CLI工具检查
|
|
155
|
+
xhs-extract --help # 会显示设备连接状态
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## 故障排除
|
|
159
|
+
|
|
160
|
+
### 设备连接问题
|
|
161
|
+
|
|
162
|
+
如果CLI工具提示设备未连接:
|
|
163
|
+
|
|
164
|
+
1. 检查USB连接是否正常
|
|
165
|
+
2. 确认已在设备上启用USB调试
|
|
166
|
+
3. 确认已授权USB调试权限
|
|
167
|
+
4. 尝试重新插拔USB线缆
|
|
168
|
+
5. 重启ADB服务:
|
|
169
|
+
```bash
|
|
170
|
+
adb kill-server
|
|
171
|
+
adb start-server
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
### 权限问题
|
|
175
|
+
|
|
176
|
+
在Linux/Mac上,可能需要为ADB添加权限:
|
|
177
|
+
|
|
178
|
+
```bash
|
|
179
|
+
sudo adb kill-server
|
|
180
|
+
sudo adb start-server
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
## 示例
|
|
184
|
+
|
|
185
|
+
查看 `examples/basic_usage.py` 文件获取更多使用示例:
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
# 运行示例
|
|
189
|
+
python examples/basic_usage.py
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
## 开发
|
|
193
|
+
|
|
194
|
+
### 项目结构
|
|
195
|
+
|
|
196
|
+
```
|
|
197
|
+
xhs-note-extractor/
|
|
198
|
+
├── xhs_note_extractor/
|
|
199
|
+
│ ├── __init__.py
|
|
200
|
+
│ ├── cli.py # 命令行界面
|
|
201
|
+
│ ├── extractor.py # 核心提取器
|
|
202
|
+
│ └── utils.py # 工具函数
|
|
203
|
+
├── examples/
|
|
204
|
+
│ └── basic_usage.py # 使用示例
|
|
205
|
+
├── tests/
|
|
206
|
+
├── requirements.txt
|
|
207
|
+
├── setup.py
|
|
208
|
+
└── README.md
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
### 运行测试
|
|
212
|
+
|
|
213
|
+
```bash
|
|
214
|
+
# 运行示例
|
|
215
|
+
python examples/basic_usage.py
|
|
216
|
+
|
|
217
|
+
# 使用CLI工具
|
|
218
|
+
xhs-extract --help
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
## 注意事项
|
|
222
|
+
|
|
223
|
+
- 本工具仅供学习和研究使用
|
|
224
|
+
- 请遵守小红书的使用条款和API限制
|
|
225
|
+
- 过度频繁的请求可能导致IP被封禁
|
|
226
|
+
- 建议在合理范围内使用,避免对平台造成负担
|
|
227
|
+
|
|
228
|
+
## 许可证
|
|
229
|
+
|
|
230
|
+
MIT License
|
|
231
|
+
|
|
232
|
+
## 贡献
|
|
233
|
+
|
|
234
|
+
欢迎提交Issue和Pull Request!
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# 小红书笔记提取器 - 快速开始指南
|
|
2
|
+
|
|
3
|
+
## 1. 安装
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
# 克隆仓库(如果需要)
|
|
7
|
+
# git clone <repository-url>
|
|
8
|
+
# cd xhs-note-extractor
|
|
9
|
+
|
|
10
|
+
# 安装依赖
|
|
11
|
+
pip install -r requirements.txt
|
|
12
|
+
|
|
13
|
+
# 安装包(开发模式)
|
|
14
|
+
pip install -e .
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## 2. 连接Android设备
|
|
18
|
+
|
|
19
|
+
1. 在Android设备上启用**开发者选项**和**USB调试**
|
|
20
|
+
2. 通过USB连接设备到电脑
|
|
21
|
+
3. 在设备上授权USB调试权限
|
|
22
|
+
|
|
23
|
+
## 3. 使用CLI工具
|
|
24
|
+
|
|
25
|
+
### 基本用法
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
# 提取笔记并输出到控制台
|
|
29
|
+
xhs-extract https://www.xiaohongshu.com/explore/note_id
|
|
30
|
+
|
|
31
|
+
# 保存到文件
|
|
32
|
+
xhs-extract https://www.xiaohongshu.com/explore/note_id -o note_data.json
|
|
33
|
+
|
|
34
|
+
# 输出CSV格式
|
|
35
|
+
xhs-extract https://www.xiaohongshu.com/explore/note_id -f csv -o note_data.csv
|
|
36
|
+
|
|
37
|
+
# 启用详细输出
|
|
38
|
+
xhs-extract https://www.xiaohongshu.com/explore/note_id -v
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### 查看帮助
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
xhs-extract --help
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## 4. 编程接口使用
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from xhs_note_extractor import XHSNoteExtractor
|
|
51
|
+
import json
|
|
52
|
+
|
|
53
|
+
# 创建提取器实例
|
|
54
|
+
extractor = XHSNoteExtractor()
|
|
55
|
+
|
|
56
|
+
# 检查设备连接
|
|
57
|
+
if extractor.is_device_connected():
|
|
58
|
+
# 提取笔记数据
|
|
59
|
+
note_data = extractor.extract_note_data("https://www.xiaohongshu.com/explore/note_id")
|
|
60
|
+
print(json.dumps(note_data, ensure_ascii=False, indent=2))
|
|
61
|
+
else:
|
|
62
|
+
print("请连接Android设备")
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## 5. 故障排除
|
|
66
|
+
|
|
67
|
+
### 设备未连接
|
|
68
|
+
|
|
69
|
+
如果看到"未检测到Android设备连接":
|
|
70
|
+
|
|
71
|
+
1. 检查USB连接
|
|
72
|
+
2. 确认已启用USB调试
|
|
73
|
+
3. 确认已授权USB调试权限
|
|
74
|
+
4. 运行 `adb devices` 检查设备是否被识别
|
|
75
|
+
|
|
76
|
+
### 无设备模式
|
|
77
|
+
|
|
78
|
+
CLI工具现在可以优雅处理无设备情况,会显示清晰的错误信息而不是抛出异常。
|
|
79
|
+
|
|
80
|
+
## 6. 测试
|
|
81
|
+
|
|
82
|
+
运行测试脚本验证安装:
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
python test_cli.py
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## 7. 示例
|
|
89
|
+
|
|
90
|
+
运行示例脚本查看使用方法:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
python examples/basic_usage.py
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## 8. 完成!
|
|
97
|
+
|
|
98
|
+
现在您可以开始使用小红书笔记提取器了。CLI工具已完全可用,支持JSON和CSV输出格式,并能优雅处理设备连接错误。
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
# 小红书笔记提取器 (Xiaohongshu Note Extractor)
|
|
2
|
+
|
|
3
|
+
一个用于从小红书提取笔记数据的Python工具,支持命令行界面和编程接口。
|
|
4
|
+
|
|
5
|
+
## 功能特性
|
|
6
|
+
|
|
7
|
+
- 🔍 从小红书笔记URL提取详细数据
|
|
8
|
+
- 📊 支持JSON和CSV输出格式
|
|
9
|
+
- 🖥️ 命令行界面支持
|
|
10
|
+
- 🔧 可配置的设备连接选项
|
|
11
|
+
- 📱 Android设备集成(通过uiautomator2)
|
|
12
|
+
- 🛡️ 优雅的错误处理和设备状态检查
|
|
13
|
+
|
|
14
|
+
## 安装
|
|
15
|
+
|
|
16
|
+
### 从源码安装
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
# 克隆仓库
|
|
20
|
+
git clone <repository-url>
|
|
21
|
+
cd xhs-note-extractor
|
|
22
|
+
|
|
23
|
+
# 安装依赖
|
|
24
|
+
pip install -r requirements.txt
|
|
25
|
+
|
|
26
|
+
# 安装包(开发模式)
|
|
27
|
+
pip install -e .
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
### 依赖要求
|
|
31
|
+
|
|
32
|
+
- Python 3.7+
|
|
33
|
+
- Android设备(用于完整功能)
|
|
34
|
+
- ADB工具
|
|
35
|
+
|
|
36
|
+
## 使用方法
|
|
37
|
+
|
|
38
|
+
### 命令行界面(CLI)
|
|
39
|
+
|
|
40
|
+
安装完成后,可以直接使用 `xhs-extract` 命令:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
# 提取笔记并输出到控制台(JSON格式)
|
|
44
|
+
xhs-extract https://www.xiaohongshu.com/explore/note_id
|
|
45
|
+
|
|
46
|
+
# 保存到文件
|
|
47
|
+
xhs-extract https://www.xiaohongshu.com/explore/note_id -o note_data.json
|
|
48
|
+
|
|
49
|
+
# 输出CSV格式
|
|
50
|
+
xhs-extract https://www.xiaohongshu.com/explore/note_id -f csv -o note_data.csv
|
|
51
|
+
|
|
52
|
+
# 启用详细输出模式
|
|
53
|
+
xhs-extract https://www.xiaohongshu.com/explore/note_id -v
|
|
54
|
+
|
|
55
|
+
# 查看帮助
|
|
56
|
+
xhs-extract --help
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### 编程接口
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
from xhs_note_extractor import XHSNoteExtractor
|
|
63
|
+
import json
|
|
64
|
+
|
|
65
|
+
# 创建提取器实例
|
|
66
|
+
extractor = XHSNoteExtractor()
|
|
67
|
+
|
|
68
|
+
# 检查设备连接状态
|
|
69
|
+
if extractor.is_device_connected():
|
|
70
|
+
# 提取笔记数据
|
|
71
|
+
note_data = extractor.extract_note_data("https://www.xiaohongshu.com/explore/note_id")
|
|
72
|
+
print(json.dumps(note_data, ensure_ascii=False, indent=2))
|
|
73
|
+
else:
|
|
74
|
+
print("请连接Android设备并启用USB调试")
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## 输出数据结构
|
|
78
|
+
|
|
79
|
+
提取的数据包含以下字段:
|
|
80
|
+
|
|
81
|
+
```json
|
|
82
|
+
{
|
|
83
|
+
"title": "笔记标题",
|
|
84
|
+
"content": "笔记完整内容",
|
|
85
|
+
"author": {
|
|
86
|
+
"nickname": "作者昵称",
|
|
87
|
+
"user_id": "用户ID"
|
|
88
|
+
},
|
|
89
|
+
"likes": 100,
|
|
90
|
+
"collects": 50,
|
|
91
|
+
"comments": 25,
|
|
92
|
+
"shares": 10,
|
|
93
|
+
"image_urls": [
|
|
94
|
+
"图片URL1",
|
|
95
|
+
"图片URL2"
|
|
96
|
+
],
|
|
97
|
+
"video_url": "视频URL(如果有)",
|
|
98
|
+
"tags": ["标签1", "标签2"],
|
|
99
|
+
"publish_time": "发布时间",
|
|
100
|
+
"note_id": "笔记ID"
|
|
101
|
+
}
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## 设备连接
|
|
105
|
+
|
|
106
|
+
### 连接Android设备
|
|
107
|
+
|
|
108
|
+
1. 在Android设备上启用**开发者选项**和**USB调试**
|
|
109
|
+
2. 通过USB连接设备到电脑
|
|
110
|
+
3. 授权USB调试权限(设备上会弹出提示)
|
|
111
|
+
|
|
112
|
+
### 检查设备状态
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
# 使用ADB检查设备
|
|
116
|
+
adb devices
|
|
117
|
+
|
|
118
|
+
# 使用CLI工具检查
|
|
119
|
+
xhs-extract --help # 会显示设备连接状态
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## 故障排除
|
|
123
|
+
|
|
124
|
+
### 设备连接问题
|
|
125
|
+
|
|
126
|
+
如果CLI工具提示设备未连接:
|
|
127
|
+
|
|
128
|
+
1. 检查USB连接是否正常
|
|
129
|
+
2. 确认已在设备上启用USB调试
|
|
130
|
+
3. 确认已授权USB调试权限
|
|
131
|
+
4. 尝试重新插拔USB线缆
|
|
132
|
+
5. 重启ADB服务:
|
|
133
|
+
```bash
|
|
134
|
+
adb kill-server
|
|
135
|
+
adb start-server
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### 权限问题
|
|
139
|
+
|
|
140
|
+
在Linux/Mac上,可能需要为ADB添加权限:
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
sudo adb kill-server
|
|
144
|
+
sudo adb start-server
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## 示例
|
|
148
|
+
|
|
149
|
+
查看 `examples/basic_usage.py` 文件获取更多使用示例:
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
# 运行示例
|
|
153
|
+
python examples/basic_usage.py
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## 开发
|
|
157
|
+
|
|
158
|
+
### 项目结构
|
|
159
|
+
|
|
160
|
+
```
|
|
161
|
+
xhs-note-extractor/
|
|
162
|
+
├── xhs_note_extractor/
|
|
163
|
+
│ ├── __init__.py
|
|
164
|
+
│ ├── cli.py # 命令行界面
|
|
165
|
+
│ ├── extractor.py # 核心提取器
|
|
166
|
+
│ └── utils.py # 工具函数
|
|
167
|
+
├── examples/
|
|
168
|
+
│ └── basic_usage.py # 使用示例
|
|
169
|
+
├── tests/
|
|
170
|
+
├── requirements.txt
|
|
171
|
+
├── setup.py
|
|
172
|
+
└── README.md
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### 运行测试
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
# 运行示例
|
|
179
|
+
python examples/basic_usage.py
|
|
180
|
+
|
|
181
|
+
# 使用CLI工具
|
|
182
|
+
xhs-extract --help
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
## 注意事项
|
|
186
|
+
|
|
187
|
+
- 本工具仅供学习和研究使用
|
|
188
|
+
- 请遵守小红书的使用条款和API限制
|
|
189
|
+
- 过度频繁的请求可能导致IP被封禁
|
|
190
|
+
- 建议在合理范围内使用,避免对平台造成负担
|
|
191
|
+
|
|
192
|
+
## 许可证
|
|
193
|
+
|
|
194
|
+
MIT License
|
|
195
|
+
|
|
196
|
+
## 贡献
|
|
197
|
+
|
|
198
|
+
欢迎提交Issue和Pull Request!
|