xhs-note-extractor 0.1.dev6__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,98 @@
1
+ # 小红书笔记提取器 - 设备重试机制使用指南
2
+
3
+ ## 功能概述
4
+
5
+ 小红书笔记提取器现在支持设备重试机制。当某个设备需要登录时,系统会自动尝试连接其他可用设备,直到找到无需登录的设备或所有设备都尝试过为止。
6
+
7
+ ## 使用方式
8
+
9
+ ### 1. 使用便捷函数(推荐)
10
+
11
+ ```python
12
+ from xhs_note_extractor import extract_note_from_url
13
+
14
+ # 直接提取笔记数据
15
+ result = extract_note_from_url("https://www.xiaohongshu.com/explore/你的笔记ID")
16
+
17
+ if result:
18
+ print("成功提取笔记数据")
19
+ print(f"作者: {result['author_name']}")
20
+ print(f"点赞数: {result['likes']}")
21
+ print(f"图片数: {len(result['image_urls'])}")
22
+ else:
23
+ print("所有设备都需要登录,提取失败")
24
+ ```
25
+
26
+ ### 2. 使用类实例
27
+
28
+ ```python
29
+ from xhs_note_extractor import XHSNoteExtractor
30
+
31
+ # 创建提取器实例
32
+ extractor = XHSNoteExtractor()
33
+
34
+ # 显示可用设备
35
+ print(f"可用设备: {extractor.available_devices}")
36
+
37
+ # 提取笔记数据
38
+ result = extractor.extract_note_data(url="https://www.xiaohongshu.com/explore/你的笔记ID")
39
+
40
+ if result:
41
+ print("成功提取笔记数据")
42
+ else:
43
+ print("所有设备都需要登录,提取失败")
44
+ ```
45
+
46
+ ### 3. 手动切换设备
47
+
48
+ ```python
49
+ from xhs_note_extractor import XHSNoteExtractor
50
+
51
+ extractor = XHSNoteExtractor()
52
+
53
+ # 查看当前设备
54
+ print(f"当前设备: {extractor.device.serial}")
55
+
56
+ # 手动切换到下一个设备
57
+ success = extractor.switch_to_next_device()
58
+ if success:
59
+ print(f"已切换到设备: {extractor.device.serial}")
60
+ ```
61
+
62
+ ## 工作原理
63
+
64
+ 1. **设备发现**: 初始化时自动检测所有通过ADB连接的Android设备
65
+ 2. **登录检测**: 在提取笔记时检测是否需要登录
66
+ 3. **自动重试**: 如果需要登录,自动尝试下一个可用设备
67
+ 4. **循环尝试**: 依次尝试所有可用设备
68
+ 5. **结果返回**: 成功则返回数据,失败则返回None
69
+
70
+ ## 注意事项
71
+
72
+ - 确保所有设备都已通过USB调试连接并授权
73
+ - 设备需要安装小红书APP
74
+ - 如果所有设备都需要登录,则返回None而不是抛出异常
75
+ - 设备切换时会自动重启小红书APP
76
+
77
+ ## 测试脚本
78
+
79
+ 运行测试脚本验证设备重试功能:
80
+
81
+ ```bash
82
+ python xhs_note_extractor/test_device_retry.py
83
+ ```
84
+
85
+ ## 故障排除
86
+
87
+ 1. **无法发现设备**:
88
+ - 检查USB连接
89
+ - 确保ADB调试已开启
90
+ - 运行 `adb devices` 验证设备连接
91
+
92
+ 2. **设备切换失败**:
93
+ - 检查设备是否仍然连接
94
+ - 确保小红书APP在设备上已安装
95
+
96
+ 3. **返回None**:
97
+ - 所有设备都需要登录
98
+ - 尝试手动登录某个设备后再试
@@ -0,0 +1,264 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Phone Agent Usage Examples / Phone Agent 使用示例
4
+
5
+ Demonstrates how to use Phone Agent for phone automation tasks via Python API.
6
+ 演示如何通过 Python API 使用 Phone Agent 进行手机自动化任务。
7
+ """
8
+ import json
9
+ from datetime import datetime
10
+
11
+ from phone_agent import PhoneAgent
12
+ from phone_agent.agent import AgentConfig
13
+ from phone_agent.config import get_messages
14
+ from phone_agent.model import ModelConfig
15
+
16
+ from xhs_note_extractor.login_propmt import phone_agent_protocol_v3_t1, phone_agent_protocol_v3_t3
17
+ from xhs_note_extractor.sms_verification import get_verification_code_sync
18
+
19
+ def do_login(device_id:str, lang: str = "cn", phone_number:str = "19163152334"):
20
+ """Basic task example / 基础任务示例"""
21
+ # Configure model endpoint
22
+ model_config = ModelConfig(
23
+ model_name="ZhipuAI/AutoGLM-Phone-9B",
24
+ temperature=0.1,
25
+ api_key="ms-ed9ed848-d630-4192-a688-37ebbf985246",
26
+ base_url="https://api-inference.modelscope.cn/v1"
27
+ )
28
+
29
+ # Configure Agent behavior
30
+ agent_config = AgentConfig(
31
+ max_steps=50,
32
+ verbose=True,
33
+ lang=lang,
34
+ device_id=device_id,
35
+ )
36
+
37
+ # Create Agent
38
+ agent = PhoneAgent(
39
+ model_config=model_config,
40
+ agent_config=agent_config,
41
+ )
42
+ cur_date_time = datetime.now()
43
+ print(f'phone_number:{phone_number}')
44
+ # 从文件加载协议内容
45
+ prompt_task1 = phone_agent_protocol_v3_t1.format(phone_number)
46
+ result = agent.run(prompt_task1)
47
+ print(f"prompt_task1: {result}")
48
+
49
+ # 解析JSON结果
50
+ try:
51
+ result_json = json.loads(result)
52
+ print(f"result_json: {result_json}")
53
+ # 检查任务1是否成功
54
+ if result_json.get("status") != "success":
55
+ print(f"验证码触发失败: {result_json.get('message')}")
56
+ return False
57
+ except json.JSONDecodeError:
58
+ print(f"result error: {result}")
59
+ # 如果不是JSON格式,尝试兼容旧格式
60
+
61
+ # 检查是否包含图片验证码相关内容
62
+ image_captcha_keywords = ["图片验证码", "点击文字", "旋转图片", "滑块", "拼图", "拖拽", "文字验证"]
63
+ has_image_captcha = any(keyword in result for keyword in image_captcha_keywords)
64
+
65
+ if has_image_captcha:
66
+ print("检测到图片验证码,验证码触发失败")
67
+ return False
68
+
69
+ # 原始的旧格式检查逻辑
70
+ if (not "验证码已触发" in result and not "验证码已成功发送" in result and "任务已完成" not in result ) and ("图片验证码界面" in result):
71
+ print("验证码触发失败")
72
+ return False
73
+ # 3. 自定义重试参数
74
+ print(f"\n获取手机号 {phone_number} 的验证码(3次尝试,每次间隔3秒)...")
75
+ code = get_verification_code_sync(
76
+ phone_number,
77
+ send_time=cur_date_time,
78
+ max_retries=3,
79
+ retry_interval=3
80
+ )
81
+ print(f"手机号: {phone_number}, 验证码: {code}")
82
+ prompt_task3 = phone_agent_protocol_v3_t3.format(phone_number, code)
83
+ result = agent.run(prompt_task3)
84
+ print(f"prompt_task3: {result}")
85
+
86
+ # 解析JSON结果
87
+ try:
88
+ result_json = json.loads(result)
89
+ # 检查任务3是否成功
90
+ return result_json.get("status") == "success"
91
+ except json.JSONDecodeError:
92
+ # 如果不是JSON格式,尝试兼容旧格式
93
+ return "登录成功" in result
94
+
95
+ def example_with_callbacks(lang: str = "cn"):
96
+ """Task example with callbacks / 带回调的任务示例"""
97
+ msgs = get_messages(lang)
98
+
99
+ def my_confirmation(message: str) -> bool:
100
+ """Sensitive operation confirmation callback / 敏感操作确认回调"""
101
+ print(f"\n[{msgs['confirmation_required']}] {message}")
102
+ response = input(f"{msgs['continue_prompt']}: ")
103
+ return response.lower() in ("yes", "y", "是")
104
+
105
+ def my_takeover(message: str) -> None:
106
+ """Manual takeover callback / 人工接管回调"""
107
+ print(f"\n[{msgs['manual_operation_required']}] {message}")
108
+ print(msgs["manual_operation_hint"])
109
+ input(f"{msgs['press_enter_when_done']}: ")
110
+
111
+ # Create Agent with custom callbacks
112
+ agent_config = AgentConfig(lang=lang)
113
+ agent = PhoneAgent(
114
+ agent_config=agent_config,
115
+ confirmation_callback=my_confirmation,
116
+ takeover_callback=my_takeover,
117
+ )
118
+
119
+ # Execute task that may require confirmation
120
+ result = agent.run("打开淘宝搜索无线耳机并加入购物车")
121
+ print(f"{msgs['task_result']}: {result}")
122
+
123
+
124
+ def example_step_by_step(lang: str = "cn"):
125
+ """Step-by-step execution example (for debugging) / 单步执行示例(用于调试)"""
126
+ msgs = get_messages(lang)
127
+
128
+ agent_config = AgentConfig(lang=lang)
129
+ agent = PhoneAgent(agent_config=agent_config)
130
+
131
+ # Initialize task
132
+ result = agent.step("打开美团搜索附近的火锅店")
133
+ print(f"{msgs['step']} 1: {result.action}")
134
+
135
+ # Continue if not finished
136
+ while not result.finished and agent.step_count < 10:
137
+ result = agent.step()
138
+ print(f"{msgs['step']} {agent.step_count}: {result.action}")
139
+ print(f" {msgs['thinking']}: {result.thinking[:100]}...")
140
+
141
+ print(f"\n{msgs['final_result']}: {result.message}")
142
+
143
+
144
+ def example_multiple_tasks(lang: str = "cn"):
145
+ """Batch task example / 批量任务示例"""
146
+ msgs = get_messages(lang)
147
+
148
+ agent_config = AgentConfig(lang=lang)
149
+ agent = PhoneAgent(agent_config=agent_config)
150
+
151
+ tasks = [
152
+ "打开高德地图查看实时路况",
153
+ "打开大众点评搜索附近的咖啡店",
154
+ "打开bilibili搜索Python教程",
155
+ ]
156
+
157
+ for task in tasks:
158
+ print(f"\n{'=' * 50}")
159
+ print(f"{msgs['task']}: {task}")
160
+ print("=" * 50)
161
+
162
+ result = agent.run(task)
163
+ print(f"{msgs['result']}: {result}")
164
+
165
+ # Reset Agent state
166
+ agent.reset()
167
+
168
+
169
+ def example_remote_device(lang: str = "cn"):
170
+ """Remote device example / 远程设备示例"""
171
+ from phone_agent.adb import ADBConnection
172
+
173
+ msgs = get_messages(lang)
174
+
175
+ # Create connection manager
176
+ conn = ADBConnection()
177
+
178
+ # Connect to remote device
179
+ success, message = conn.connect("192.168.1.100:5555")
180
+ if not success:
181
+ print(f"{msgs['connection_failed']}: {message}")
182
+ return
183
+
184
+ print(f"{msgs['connection_successful']}: {message}")
185
+
186
+ # Create Agent with device specified
187
+ agent_config = AgentConfig(
188
+ device_id="192.168.1.100:5555",
189
+ verbose=True,
190
+ lang=lang,
191
+ )
192
+
193
+ agent = PhoneAgent(agent_config=agent_config)
194
+
195
+ # Execute task
196
+ result = agent.run("打开微信查看消息")
197
+ print(f"{msgs['task_result']}: {result}")
198
+
199
+ # Disconnect
200
+ conn.disconnect("192.168.1.100:5555")
201
+
202
+
203
+
204
+ def check_adb_devices():
205
+ """Check if any ADB devices are connected / 检查是否有 ADB 设备连接"""
206
+ import subprocess
207
+ try:
208
+ result = subprocess.run(["adb", "devices"], capture_output=True, text=True)
209
+ lines = result.stdout.strip().split("\n")[1:] # Skip header
210
+ devices = [line for line in lines if line.strip()]
211
+ if not devices:
212
+ print("\nError: No Android devices connected via ADB.")
213
+ print("错误: 未通过 ADB 连接任何 Android 设备。")
214
+ print("Please connect a device or start an emulator.")
215
+ print("请连接设备或启动模拟器。")
216
+ return False
217
+ return True
218
+ except FileNotFoundError:
219
+ print("\nError: 'adb' command not found. Please install Android Platform Tools.")
220
+ print("错误: 未找到 'adb' 命令。请安装 Android Platform Tools。")
221
+ return False
222
+ #
223
+ # if __name__ == "__main__":
224
+ # if not check_adb_devices():
225
+ # exit(1)
226
+ #
227
+ # import argparse
228
+ #
229
+ # parser = argparse.ArgumentParser(description="Phone Agent Usage Examples")
230
+ # parser.add_argument(
231
+ # "--lang",
232
+ # type=str,
233
+ # default="cn",
234
+ # choices=["cn", "en"],
235
+ # help="Language for UI messages (cn=Chinese, en=English)",
236
+ # )
237
+ # args = parser.parse_args()
238
+ #
239
+ # msgs = get_messages(args.lang)
240
+ #
241
+ # print("Phone Agent Usage Examples")
242
+ # print("=" * 50)
243
+ #
244
+ # # Run basic example
245
+ # print(f"\n1. Basic Task Example")
246
+ # print("-" * 30)
247
+ # do_login(args.lang)
248
+ #
249
+ # # Uncomment to run other examples
250
+ # # print(f"\n2. Task Example with Callbacks")
251
+ # # print("-" * 30)
252
+ # # example_with_callbacks(args.lang)
253
+ #
254
+ # # print(f"\n3. Step-by-step Example")
255
+ # # print("-" * 30)
256
+ # # example_step_by_step(args.lang)
257
+ #
258
+ # # print(f"\n4. Batch Task Example")
259
+ # # print("-" * 30)
260
+ # # example_multiple_tasks(args.lang)
261
+ #
262
+ # # print(f"\n5. Remote Device Example")
263
+ # # print("-" * 30)
264
+ # # example_remote_device(args.lang)
@@ -0,0 +1,80 @@
1
+ import re
2
+ from datetime import datetime, timedelta
3
+
4
+ def parse_time_to_timestamp_ms(time_str: str, now: datetime | None = None) -> int:
5
+ if now is None:
6
+ now = datetime.now()
7
+
8
+ time_str = time_str.strip()
9
+ # Remove common prefixes
10
+ for prefix in ["编辑于", "发布于"]:
11
+ if time_str.startswith(prefix):
12
+ time_str = time_str[len(prefix):].strip()
13
+
14
+ # Remove location suffix (e.g., "昨天 15:09重庆" -> "昨天 15:09")
15
+ # Match common Chinese city/province names at the end
16
+ import re as re_module
17
+ time_str = re_module.sub(r'[\u4e00-\u9fa5]{2,4}$', '', time_str).strip()
18
+
19
+ # 刚刚
20
+ if time_str == "刚刚":
21
+ dt = now
22
+
23
+ # X分钟前
24
+ elif match := re.match(r"(\d+)分钟前", time_str):
25
+ dt = now - timedelta(minutes=int(match.group(1)))
26
+
27
+ # X小时前
28
+ elif match := re.match(r"(\d+)小时前", time_str):
29
+ dt = now - timedelta(hours=int(match.group(1)))
30
+
31
+ # X天前
32
+ elif match := re.match(r"(\d+)天前", time_str):
33
+ dt = now - timedelta(days=int(match.group(1)))
34
+
35
+ # 今天 HH:mm
36
+ elif match := re.match(r"今天\s*(\d{1,2}:\d{2})", time_str):
37
+ dt = datetime.strptime(
38
+ f"{now.date()} {match.group(1)}",
39
+ "%Y-%m-%d %H:%M"
40
+ )
41
+
42
+ # 昨天 HH:mm
43
+ elif match := re.match(r"昨天\s*(\d{1,2}:\d{2})", time_str):
44
+ dt = datetime.strptime(
45
+ f"{(now - timedelta(days=1)).date()} {match.group(1)}",
46
+ "%Y-%m-%d %H:%M"
47
+ )
48
+
49
+ # 前天 HH:mm
50
+ elif match := re.match(r"前天\s*(\d{1,2}:\d{2})", time_str):
51
+ dt = datetime.strptime(
52
+ f"{(now - timedelta(days=2)).date()} {match.group(1)}",
53
+ "%Y-%m-%d %H:%M"
54
+ )
55
+
56
+ # YYYY-MM-DD HH:mm
57
+ elif re.match(r"\d{4}-\d{2}-\d{2}\s+\d{1,2}:\d{2}", time_str):
58
+ dt = datetime.strptime(time_str, "%Y-%m-%d %H:%M")
59
+
60
+ # YYYY-MM-DD
61
+ elif re.match(r"\d{4}-\d{2}-\d{2}", time_str):
62
+ dt = datetime.strptime(time_str, "%Y-%m-%d")
63
+
64
+ # ✅ 新增:MM-DD(默认当前年份)
65
+ elif match := re.match(r"(\d{2})-(\d{2})", time_str):
66
+ year = now.year
67
+ month, day = map(int, match.groups())
68
+ dt = datetime(year, month, day)
69
+
70
+ # HH:mm(默认当天)
71
+ elif re.match(r"\d{1,2}:\d{2}", time_str):
72
+ dt = datetime.strptime(
73
+ f"{now.date()} {time_str}",
74
+ "%Y-%m-%d %H:%M"
75
+ )
76
+
77
+ else:
78
+ raise ValueError(f"无法解析的时间格式: {time_str}")
79
+
80
+ return int(dt.timestamp() * 1000)