xhs-note-extractor 0.1.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,50 @@
1
+ """
2
+ 小红书笔记提取器包
3
+
4
+ 这是一个用于从小红书URL中提取笔记信息的Python包。
5
+ 支持URL解析、设备连接、页面跳转和笔记内容提取。
6
+
7
+ 主要功能:
8
+ - URL解析和转换(支持标准格式和xhsdiscover协议格式)
9
+ - 设备连接和自动化操作
10
+ - 笔记内容提取(正文、图片、点赞数等)
11
+ - 结构化数据返回
12
+
13
+ 示例:
14
+ >>> from xhs_note_extractor import XHSNoteExtractor
15
+ >>> extractor = XHSNoteExtractor()
16
+ >>> data = extractor.extract_note_data(url="https://www.xiaohongshu.com/explore/...")
17
+ >>> print(data['content'])
18
+ """
19
+
20
+ __version__ = "1.0.0"
21
+ __author__ = "JoyCode Agent"
22
+ __email__ = "agent@joycode.com"
23
+
24
+ from .extractor import XHSNoteExtractor
25
+ from .utils import (
26
+ DeviceManager,
27
+ ElementFinder,
28
+ DataFormatter,
29
+ NetworkUtils,
30
+ FileManager,
31
+ XHSUtils,
32
+ connect_device,
33
+ format_like_count,
34
+ extract_image_urls_from_html,
35
+ fetch_html
36
+ )
37
+
38
+ __all__ = [
39
+ "XHSNoteExtractor",
40
+ "DeviceManager",
41
+ "ElementFinder",
42
+ "DataFormatter",
43
+ "NetworkUtils",
44
+ "FileManager",
45
+ "XHSUtils",
46
+ "connect_device",
47
+ "format_like_count",
48
+ "extract_image_urls_from_html",
49
+ "fetch_html",
50
+ ]
@@ -0,0 +1,34 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
12
+
13
+ TYPE_CHECKING = False
14
+ if TYPE_CHECKING:
15
+ from typing import Tuple
16
+ from typing import Union
17
+
18
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
20
+ else:
21
+ VERSION_TUPLE = object
22
+ COMMIT_ID = object
23
+
24
+ version: str
25
+ __version__: str
26
+ __version_tuple__: VERSION_TUPLE
27
+ version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
30
+
31
+ __version__ = version = '0.1.dev2'
32
+ __version_tuple__ = version_tuple = (0, 1, 'dev2')
33
+
34
+ __commit_id__ = commit_id = 'g1aa72014c'
@@ -0,0 +1,98 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Command Line Interface for XHS Note Extractor
4
+ """
5
+
6
+ import argparse
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ from .extractor import XHSNoteExtractor
11
+ from .utils import NetworkUtils
12
+
13
+
14
+ def main():
15
+ """Main entry point for the CLI application."""
16
+ parser = argparse.ArgumentParser(
17
+ description="Extract Xiaohongshu (Little Red Book) note data from URLs"
18
+ )
19
+ parser.add_argument(
20
+ "url",
21
+ help="Xiaohongshu note URL to extract data from"
22
+ )
23
+ parser.add_argument(
24
+ "-o", "--output",
25
+ help="Output file path (default: stdout)"
26
+ )
27
+ parser.add_argument(
28
+ "-f", "--format",
29
+ choices=["json", "csv"],
30
+ default="json",
31
+ help="Output format (default: json)"
32
+ )
33
+ parser.add_argument(
34
+ "-v", "--verbose",
35
+ action="store_true",
36
+ help="Enable verbose output"
37
+ )
38
+
39
+ args = parser.parse_args()
40
+
41
+ # Validate URL
42
+ if not NetworkUtils.is_valid_xhs_url(args.url):
43
+ print(f"Error: Invalid Xiaohongshu URL: {args.url}", file=sys.stderr)
44
+ sys.exit(1)
45
+
46
+ try:
47
+ if args.verbose:
48
+ print(f"Extracting data from: {args.url}")
49
+
50
+ # Initialize extractor
51
+ extractor = XHSNoteExtractor()
52
+
53
+ # Extract note data
54
+ note_data = extractor.extract_note(args.url)
55
+
56
+ if not note_data:
57
+ print("Error: Failed to extract note data", file=sys.stderr)
58
+ sys.exit(1)
59
+
60
+ # Format output
61
+ if args.format == "json":
62
+ import json
63
+ output = json.dumps(note_data, ensure_ascii=False, indent=2)
64
+ else: # csv
65
+ import csv
66
+ from io import StringIO
67
+
68
+ # Convert to CSV format (simplified)
69
+ output_buffer = StringIO()
70
+ writer = csv.writer(output_buffer)
71
+
72
+ # Write headers
73
+ writer.writerow(["Field", "Value"])
74
+
75
+ # Write data rows
76
+ for key, value in note_data.items():
77
+ if isinstance(value, (list, dict)):
78
+ value = str(value)
79
+ writer.writerow([key, value])
80
+
81
+ output = output_buffer.getvalue()
82
+
83
+ # Output result
84
+ if args.output:
85
+ output_path = Path(args.output)
86
+ output_path.write_text(output, encoding='utf-8')
87
+ if args.verbose:
88
+ print(f"Output saved to: {output_path}")
89
+ else:
90
+ print(output)
91
+
92
+ except Exception as e:
93
+ print(f"Error: {e}", file=sys.stderr)
94
+ sys.exit(1)
95
+
96
+
97
+ if __name__ == "__main__":
98
+ main()
@@ -0,0 +1,412 @@
1
+ """
2
+ 小红书笔记提取器模块
3
+
4
+ 该模块提供了从小红书URL中提取笔记信息的功能,包括:
5
+ - URL解析和转换
6
+ - 设备连接和页面跳转
7
+ - 笔记内容提取(正文、图片、点赞数等)
8
+ - 结构化数据返回
9
+
10
+ 作者: JoyCode Agent
11
+ 版本: 1.0.0
12
+ """
13
+
14
+ import uiautomator2 as u2
15
+ import time
16
+ import re
17
+ import requests
18
+ import logging
19
+ from typing import Dict, List, Optional, Union
20
+ from urllib.parse import urlparse, parse_qs
21
+
22
+ # 配置日志
23
+ logging.basicConfig(
24
+ level=logging.INFO,
25
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
26
+ )
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ class XHSNoteExtractor:
31
+ """
32
+ 小红书笔记提取器类
33
+
34
+ 提供了从小红书URL中提取笔记信息的完整功能,
35
+ 包括URL解析、设备连接、页面跳转和笔记内容提取。
36
+ """
37
+
38
+ def __init__(self, device_serial: Optional[str] = None):
39
+ """
40
+ 初始化小红书笔记提取器
41
+
42
+ Args:
43
+ device_serial (str, optional): 设备序列号,如果为None则自动连接可用设备
44
+
45
+ Raises:
46
+ RuntimeError: 当没有可用设备时抛出异常
47
+ """
48
+ self.device = None
49
+ self.device_serial = device_serial
50
+ if not self.connect_device():
51
+ raise RuntimeError("未找到可用的Android设备,请连接设备后再试")
52
+
53
+ def connect_device(self) -> bool:
54
+ """
55
+ 连接设备
56
+
57
+ Returns:
58
+ bool: 是否成功连接设备
59
+ """
60
+ try:
61
+ if self.device_serial:
62
+ self.device = u2.connect(self.device_serial)
63
+ else:
64
+ self.device = u2.connect()
65
+ logger.info(f"✓ 已连接设备: {self.device.serial}")
66
+ return True
67
+ except Exception as e:
68
+ logger.error(f"✗ 设备连接失败: {e}")
69
+ return False
70
+
71
+ @staticmethod
72
+ def parse_xhs_url(url: str) -> Dict[str, str]:
73
+ """
74
+ 解析小红书URL,提取note_id和xsec_token
75
+
76
+ Args:
77
+ url (str): 小红书URL,支持标准格式或xhsdiscover协议格式
78
+
79
+ Returns:
80
+ Dict[str, str]: 包含note_id和xsec_token的字典
81
+
82
+ Raises:
83
+ ValueError: 当URL格式不正确时抛出异常
84
+ """
85
+ # 处理xhsdiscover协议格式
86
+ if url.startswith("xhsdiscover://"):
87
+ # 提取note_id
88
+ note_id_match = re.search(r'item/([^?]+)', url)
89
+ if not note_id_match:
90
+ raise ValueError("无法从xhsdiscover URL中提取note_id")
91
+
92
+ note_id = note_id_match.group(1)
93
+
94
+ # 尝试从open_url参数中提取原始URL
95
+ open_url_match = re.search(r'open_url=([^&]+)', url)
96
+ xsec_token = ""
97
+ if open_url_match:
98
+ open_url = open_url_match.group(1)
99
+ # 解码URL
100
+ import urllib.parse
101
+ decoded_url = urllib.parse.unquote(open_url)
102
+ # 从原始URL中提取xsec_token
103
+ token_match = re.search(r'xsec_token=([^&]+)', decoded_url)
104
+ if token_match:
105
+ xsec_token = token_match.group(1)
106
+
107
+ return {
108
+ "note_id": note_id,
109
+ "xsec_token": xsec_token,
110
+ "original_url": url
111
+ }
112
+
113
+ # 处理标准URL格式
114
+ elif "xiaohongshu.com" in url:
115
+ parsed_url = urlparse(url)
116
+ path_parts = parsed_url.path.strip('/').split('/')
117
+
118
+ # 查找explore部分和note_id
119
+ if 'explore' in path_parts:
120
+ explore_index = path_parts.index('explore')
121
+ if explore_index + 1 < len(path_parts):
122
+ note_id = path_parts[explore_index + 1]
123
+ else:
124
+ raise ValueError("URL中缺少note_id")
125
+ else:
126
+ raise ValueError("URL格式不正确,缺少/explore/路径")
127
+
128
+ # 提取查询参数中的xsec_token
129
+ query_params = parse_qs(parsed_url.query)
130
+ xsec_token = query_params.get('xsec_token', [''])[0]
131
+
132
+ return {
133
+ "note_id": note_id,
134
+ "xsec_token": xsec_token,
135
+ "original_url": url
136
+ }
137
+
138
+ else:
139
+ raise ValueError("不支持的URL格式")
140
+
141
+ @staticmethod
142
+ def validate_url(url: str) -> bool:
143
+ """
144
+ 验证URL是否是有效的小红书URL
145
+
146
+ Args:
147
+ url (str): 要验证的URL
148
+
149
+ Returns:
150
+ bool: URL是否有效
151
+ """
152
+ try:
153
+ XHSNoteExtractor.parse_xhs_url(url)
154
+ return True
155
+ except ValueError:
156
+ return False
157
+
158
+ @staticmethod
159
+ def convert_to_xhsdiscover_format(note_id: str, xsec_token: str = "") -> str:
160
+ """
161
+ 将note_id和xsec_token转换为xhsdiscover协议格式
162
+
163
+ Args:
164
+ note_id (str): 笔记ID
165
+ xsec_token (str): xsec_token参数
166
+
167
+ Returns:
168
+ str: xhsdiscover协议格式的URL
169
+ """
170
+ if xsec_token:
171
+ original_url = f"http://www.xiaohongshu.com/explore/{note_id}?xsec_token={xsec_token}&xsec_source=pc_feed"
172
+ encoded_url = requests.utils.quote(original_url)
173
+ return f"xhsdiscover://item/{note_id}?open_url={encoded_url}"
174
+ else:
175
+ return f"xhsdiscover://item/{note_id}"
176
+
177
+ def extract_note_data(self, url: Optional[str] = None, note_id: Optional[str] = None,
178
+ xsec_token: Optional[str] = None) -> Dict[str, Union[str, List[str]]]:
179
+ """
180
+ 从小红书笔记中提取数据
181
+
182
+ Args:
183
+ url (str, optional): 小红书URL,如果提供则会解析其中的note_id和xsec_token
184
+ note_id (str, optional): 笔记ID,如果提供则直接使用
185
+ xsec_token (str, optional): xsec_token参数
186
+
187
+ Returns:
188
+ Dict[str, Union[str, List[str]]]: 包含笔记数据的字典,格式与xhs_utils.get_detail_data()一致
189
+
190
+ Raises:
191
+ RuntimeError: 当设备未连接时抛出异常
192
+ Exception: 当提取过程中出现错误时抛出异常
193
+ """
194
+ # 如果提供了URL,则先解析它(验证URL有效性)
195
+ if url:
196
+ parsed_data = self.parse_xhs_url(url)
197
+ note_id = parsed_data["note_id"]
198
+ xsec_token = parsed_data["xsec_token"]
199
+
200
+ # 检查设备是否连接
201
+ if self.device is None:
202
+ raise RuntimeError("设备未连接,请先连接设备")
203
+
204
+ # 构建跳转URL
205
+ jump_url = self.convert_to_xhsdiscover_format(note_id, xsec_token)
206
+
207
+ logger.info(f"正在尝试跳转至笔记: {note_id}")
208
+
209
+ try:
210
+ # 发起跳转
211
+ self.device.open_url(jump_url)
212
+ logger.info("✓ 已发送跳转指令,等待页面加载...")
213
+
214
+ # 使用现有的xhs_utils功能提取数据
215
+ data = self._get_detail_data()
216
+
217
+ logger.info(f"✓ 成功提取笔记数据,点赞数: {data['likes']}, 图片数: {len(data['image_urls'])}")
218
+
219
+ return data
220
+
221
+ except Exception as e:
222
+ logger.error(f"✗ 提取笔记数据失败: {e}")
223
+ raise
224
+
225
+ def _get_detail_data(self) -> Dict[str, Union[str, List[str]]]:
226
+ """
227
+ 从当前已经打开的小红书详情页提取完整正文、图片和点赞数。
228
+ 这是xhs_utils.get_detail_data的封装版本,保持相同功能。
229
+
230
+ Returns:
231
+ Dict[str, Union[str, List[str]]]: 包含笔记数据的字典
232
+ """
233
+ logger.info("🔍 进入深度提取模式...")
234
+
235
+ # 1. 验证是否进入详情页 (增加重试和多关键词检测)
236
+ detail_loaded = False
237
+ detail_keywords = ["说点什么", "写评论", "写点什么", "收藏", "点赞", "评论", "分享", "发弹幕"]
238
+ for i in range(8):
239
+ if any(self.device(textContains=kw).exists or self.device(descriptionContains=kw).exists for kw in detail_keywords):
240
+ detail_loaded = True
241
+ break
242
+ if i == 4:
243
+ # 可能是视频,点击屏幕中心尝试激活 UI
244
+ self.device.click(540, 900)
245
+ time.sleep(1)
246
+
247
+ if not detail_loaded:
248
+ logger.warning("⚠ 警告:详情页特征未发现,提取可能不完整")
249
+
250
+ # 2. 处理"展开"按钮以获取完整长文
251
+ for btn_text in ["展开", "查看全部", "全文"]:
252
+ btn = self.device(text=btn_text)
253
+ if btn.exists:
254
+ logger.info(f"[Action] 点击'{btn_text}'")
255
+ btn.click()
256
+ time.sleep(1)
257
+
258
+ # 3. 提取正文 (多策略拼接)
259
+ content = ""
260
+ # 策略 A: 尝试常见 ID
261
+ desc_el = self.device(resourceIdMatches=".*desc.*|.*content.*")
262
+ if desc_el.exists:
263
+ content = desc_el.get_text()
264
+
265
+ # 策略 B: 文本容器遍历 (更稳健)
266
+ if not content or len(content) < 20:
267
+ texts = []
268
+ for el in self.device(className="android.widget.TextView"):
269
+ try:
270
+ t = el.get_text()
271
+ if not t or len(t) < 3: continue
272
+ # 过滤坐标:只取屏幕中间内容区
273
+ b = el.info.get('bounds', {})
274
+ if 200 < b.get('top', 0) < 2100:
275
+ if not any(k in t for k in ['收藏', '点赞', '评论', '分享', '发布于', '说点什么', '条评论']):
276
+ texts.append(t)
277
+ except: continue
278
+ content = "\n".join(texts)
279
+
280
+ # 4. 提取图片 (通过分享链接解析高清图)
281
+ image_urls = []
282
+ try:
283
+ share_btn = self.device(description="分享")
284
+ if share_btn.exists:
285
+ share_btn.click()
286
+ time.sleep(1.5)
287
+ copy_link_btn = self.device(text="复制链接")
288
+ if copy_link_btn.exists:
289
+ copy_link_btn.click()
290
+ time.sleep(0.5)
291
+ share_link = self.device.clipboard
292
+ if "http" in str(share_link):
293
+ image_urls = self._fetch_web_images(share_link)
294
+ else:
295
+ self.device.press("back")
296
+ except Exception as e:
297
+ logger.warning(f"⚠ 图片提取异常: {e}")
298
+
299
+ # 5. 提取点赞数
300
+ likes = "0"
301
+ try:
302
+ for el in self.device(className="android.widget.TextView"):
303
+ txt = el.get_text() or ""
304
+ if any(c.isdigit() for c in txt):
305
+ b = el.info.get('bounds', {})
306
+ if b.get('top', 0) > 2000 and b.get('left', 0) > 500:
307
+ likes = ''.join(c for c in txt if c.isdigit() or c in ['.', 'w', 'W'])
308
+ if likes: break
309
+ except: pass
310
+
311
+ return {
312
+ "content": content,
313
+ "image_urls": image_urls,
314
+ "likes": likes
315
+ }
316
+
317
+ def _fetch_web_images(self, url: str) -> List[str]:
318
+ """
319
+ 从分享链接中解析图片地址
320
+
321
+ Args:
322
+ url (str): 分享链接URL
323
+
324
+ Returns:
325
+ List[str]: 图片URL列表
326
+ """
327
+ try:
328
+ headers = {"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_8 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1"}
329
+ res = requests.get(url, headers=headers, timeout=10)
330
+ html = res.text
331
+ img_patterns = [
332
+ r'property="og:image" content="(https://[^"]+)"',
333
+ r'"url":"(https://sns-img-[^"]+)"',
334
+ r'"url":"(https://sns-img-qc\.xhscdn\.com/[^"]+)"'
335
+ ]
336
+ found = []
337
+ for pattern in img_patterns:
338
+ matches = re.findall(pattern, html)
339
+ for m in matches:
340
+ clean_url = m.replace('\\u002F', '/')
341
+ if clean_url not in found: found.append(clean_url)
342
+ return found
343
+ except:
344
+ return []
345
+
346
+ def save_note_data(self, data: Dict[str, Union[str, List[str]]],
347
+ filename: str = "last_extracted_note.txt",
348
+ note_url: str = "") -> None:
349
+ """
350
+ 保存笔记数据到文件
351
+
352
+ Args:
353
+ data (Dict[str, Union[str, List[str]]]): 笔记数据
354
+ filename (str): 保存文件名
355
+ note_url (str): 笔记URL
356
+ """
357
+ try:
358
+ with open(filename, "w", encoding="utf-8") as f:
359
+ f.write("=" * 50 + "\n")
360
+ f.write("【小红书笔记提取结果】\n")
361
+ f.write("=" * 50 + "\n")
362
+ if note_url:
363
+ f.write(f"笔记URL: {note_url}\n")
364
+ f.write("=" * 50 + "\n")
365
+ f.write(f"点赞数: {data['likes']}\n")
366
+ f.write(f"图片数: {len(data['image_urls'])}\n")
367
+ f.write("=" * 50 + "\n")
368
+ f.write("【正文内容】\n")
369
+ f.write(data['content'])
370
+ f.write("\n" + "=" * 50 + "\n")
371
+ if data['image_urls']:
372
+ f.write("【图片URL】\n")
373
+ for i, url in enumerate(data['image_urls'], 1):
374
+ f.write(f"{i}. {url}\n")
375
+ f.write("=" * 50 + "\n")
376
+
377
+ logger.info(f"✓ 笔记数据已保存到: {filename}")
378
+ except Exception as e:
379
+ logger.error(f"✗ 保存笔记数据失败: {e}")
380
+ raise
381
+
382
+
383
+ def extract_note_from_url(url: str, device_serial: Optional[str] = None) -> Dict[str, Union[str, List[str]]]:
384
+ """
385
+ 便捷函数:直接从URL提取笔记数据
386
+
387
+ Args:
388
+ url (str): 小红书笔记URL
389
+ device_serial (str, optional): 设备序列号
390
+
391
+ Returns:
392
+ Dict[str, Union[str, List[str]]]: 笔记数据
393
+ """
394
+ extractor = XHSNoteExtractor(device_serial=device_serial)
395
+ return extractor.extract_note_data(url=url)
396
+
397
+
398
+ def convert_url_format(url: str) -> str:
399
+ """
400
+ 便捷函数:转换URL格式
401
+
402
+ Args:
403
+ url (str): 输入URL
404
+
405
+ Returns:
406
+ str: 转换后的xhsdiscover协议格式URL
407
+ """
408
+ parsed_data = XHSNoteExtractor.parse_xhs_url(url)
409
+ return XHSNoteExtractor.convert_to_xhsdiscover_format(
410
+ parsed_data["note_id"],
411
+ parsed_data["xsec_token"]
412
+ )
@@ -0,0 +1,493 @@
1
+ """
2
+ 小红书工具模块
3
+
4
+ 该模块提供了小红书相关的辅助功能,包括:
5
+ - 设备管理和连接
6
+ - 页面操作和元素查找
7
+ - 数据格式化和验证
8
+ - 错误处理和日志记录
9
+
10
+ 作者: JoyCode Agent
11
+ 版本: 1.0.0
12
+ """
13
+
14
+ import uiautomator2 as u2
15
+ import time
16
+ import re
17
+ import requests
18
+ import logging
19
+ from typing import Dict, List, Optional, Union, Any
20
+ from functools import wraps
21
+
22
+ # 配置日志
23
+ logging.basicConfig(
24
+ level=logging.INFO,
25
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
26
+ )
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ def retry(max_attempts: int = 3, delay: float = 1.0, backoff: float = 2.0):
31
+ """
32
+ 重试装饰器
33
+
34
+ Args:
35
+ max_attempts (int): 最大重试次数
36
+ delay (float): 初始延迟时间(秒)
37
+ backoff (float): 延迟时间倍增因子
38
+ """
39
+ def decorator(func):
40
+ @wraps(func)
41
+ def wrapper(*args, **kwargs):
42
+ attempt = 0
43
+ current_delay = delay
44
+
45
+ while attempt < max_attempts:
46
+ try:
47
+ return func(*args, **kwargs)
48
+ except Exception as e:
49
+ attempt += 1
50
+ if attempt >= max_attempts:
51
+ logger.error(f"函数 {func.__name__} 在 {max_attempts} 次尝试后仍然失败: {e}")
52
+ raise
53
+
54
+ logger.warning(f"函数 {func.__name__} 第 {attempt} 次尝试失败: {e},{current_delay}秒后重试...")
55
+ time.sleep(current_delay)
56
+ current_delay *= backoff
57
+
58
+ return None
59
+ return wrapper
60
+ return decorator
61
+
62
+
63
+ class DeviceManager:
64
+ """设备管理类"""
65
+
66
+ @staticmethod
67
+ def connect_device(device_serial: Optional[str] = None) -> u2.Device:
68
+ """
69
+ 连接设备
70
+
71
+ Args:
72
+ device_serial (str, optional): 设备序列号
73
+
74
+ Returns:
75
+ u2.Device: 设备对象
76
+ """
77
+ try:
78
+ if device_serial:
79
+ device = u2.connect(device_serial)
80
+ logger.info(f"✓ 已连接指定设备: {device.serial}")
81
+ else:
82
+ device = u2.connect()
83
+ logger.info(f"✓ 已连接设备: {device.serial}")
84
+ return device
85
+ except Exception as e:
86
+ logger.error(f"✗ 设备连接失败: {e}")
87
+ raise
88
+
89
+ @staticmethod
90
+ def check_device_status(device: u2.Device) -> Dict[str, Any]:
91
+ """
92
+ 检查设备状态
93
+
94
+ Args:
95
+ device (u2.Device): 设备对象
96
+
97
+ Returns:
98
+ Dict[str, Any]: 设备状态信息
99
+ """
100
+ try:
101
+ info = device.info
102
+ return {
103
+ "serial": device.serial,
104
+ "status": "connected",
105
+ "sdk_version": info.get('sdkInt', 'unknown'),
106
+ "screen_size": f"{info.get('displayWidth', 0)}x{info.get('displayHeight', 0)}",
107
+ "battery": info.get('battery', {})
108
+ }
109
+ except Exception as e:
110
+ logger.error(f"✗ 获取设备状态失败: {e}")
111
+ return {"status": "error", "error": str(e)}
112
+
113
+
114
+ class ElementFinder:
115
+ """元素查找器类"""
116
+
117
+ def __init__(self, device: u2.Device):
118
+ """
119
+ 初始化元素查找器
120
+
121
+ Args:
122
+ device (u2.Device): 设备对象
123
+ """
124
+ self.device = device
125
+
126
+ @retry(max_attempts=3, delay=0.5)
127
+ def find_element_by_text(self, text: str, timeout: float = 5.0) -> Optional[u2.UiObject]:
128
+ """
129
+ 通过文本查找元素
130
+
131
+ Args:
132
+ text (str): 要查找的文本
133
+ timeout (float): 超时时间(秒)
134
+
135
+ Returns:
136
+ Optional[u2.UiObject]: 找到的元素对象,未找到返回None
137
+ """
138
+ element = self.device(text=text)
139
+ if element.wait(timeout=timeout):
140
+ return element
141
+ return None
142
+
143
+ @retry(max_attempts=3, delay=0.5)
144
+ def find_element_by_description(self, description: str, timeout: float = 5.0) -> Optional[u2.UiObject]:
145
+ """
146
+ 通过描述查找元素
147
+
148
+ Args:
149
+ description (str): 要查找的描述
150
+ timeout (float): 超时时间(秒)
151
+
152
+ Returns:
153
+ Optional[u2.UiObject]: 找到的元素对象,未找到返回None
154
+ """
155
+ element = self.device(description=description)
156
+ if element.wait(timeout=timeout):
157
+ return element
158
+ return None
159
+
160
+ @retry(max_attempts=3, delay=0.5)
161
+ def find_element_by_resource_id(self, resource_id: str, timeout: float = 5.0) -> Optional[u2.UiObject]:
162
+ """
163
+ 通过资源ID查找元素
164
+
165
+ Args:
166
+ resource_id (str): 资源ID
167
+ timeout (float): 超时时间(秒)
168
+
169
+ Returns:
170
+ Optional[u2.UiObject]: 找到的元素对象,未找到返回None
171
+ """
172
+ element = self.device(resourceId=resource_id)
173
+ if element.wait(timeout=timeout):
174
+ return element
175
+ return None
176
+
177
+ def wait_for_element(self, condition_func, timeout: float = 10.0, check_interval: float = 0.5) -> bool:
178
+ """
179
+ 等待元素出现
180
+
181
+ Args:
182
+ condition_func: 条件函数,返回True表示找到元素
183
+ timeout (float): 超时时间(秒)
184
+ check_interval (float): 检查间隔(秒)
185
+
186
+ Returns:
187
+ bool: 是否找到元素
188
+ """
189
+ start_time = time.time()
190
+ while time.time() - start_time < timeout:
191
+ if condition_func():
192
+ return True
193
+ time.sleep(check_interval)
194
+ return False
195
+
196
+
197
+ class DataFormatter:
198
+ """数据格式化类"""
199
+
200
+ @staticmethod
201
+ def format_like_count(like_text: str) -> str:
202
+ """
203
+ 格式化点赞数字符串
204
+
205
+ Args:
206
+ like_text (str): 原始点赞数字符串
207
+
208
+ Returns:
209
+ str: 格式化后的点赞数
210
+ """
211
+ if not like_text:
212
+ return "0"
213
+
214
+ # 提取数字和可能的单位
215
+ match = re.search(r'([\d.]+)\s*([wW万]?)\s*', str(like_text))
216
+ if match:
217
+ number = match.group(1)
218
+ unit = match.group(2).lower()
219
+
220
+ # 处理单位转换
221
+ if unit in ['w', '万']:
222
+ try:
223
+ num = float(number)
224
+ return str(int(num * 10000))
225
+ except ValueError:
226
+ return number
227
+ else:
228
+ return number
229
+
230
+ # 如果没有匹配到模式,返回原始文本中的数字
231
+ digits = ''.join(c for c in str(like_text) if c.isdigit())
232
+ return digits if digits else "0"
233
+
234
+ @staticmethod
235
+ def extract_image_urls_from_html(html: str) -> List[str]:
236
+ """
237
+ 从HTML中提取图片URL
238
+
239
+ Args:
240
+ html (str): HTML内容
241
+
242
+ Returns:
243
+ List[str]: 图片URL列表
244
+ """
245
+ img_patterns = [
246
+ r'property="og:image" content="(https://[^"]+)"',
247
+ r'"url":"(https://sns-img-[^"]+)"',
248
+ r'"url":"(https://sns-img-qc\.xhscdn\.com/[^"]+)"',
249
+ r'data-src="(https://[^"]+)"',
250
+ r'src="(https://[^"]+\.(?:jpg|jpeg|png|gif))"'
251
+ ]
252
+
253
+ found_urls = []
254
+ for pattern in img_patterns:
255
+ matches = re.findall(pattern, html)
256
+ for match in matches:
257
+ clean_url = match.replace('\\u002F', '/').replace('\\/', '/')
258
+ if clean_url not in found_urls:
259
+ found_urls.append(clean_url)
260
+
261
+ return found_urls
262
+
263
+ @staticmethod
264
+ def clean_text_content(text: str) -> str:
265
+ """
266
+ 清理文本内容
267
+
268
+ Args:
269
+ text (str): 原始文本
270
+
271
+ Returns:
272
+ str: 清理后的文本
273
+ """
274
+ if not text:
275
+ return ""
276
+
277
+ # 移除多余的空白字符
278
+ text = re.sub(r'\s+', ' ', text)
279
+ # 移除特殊字符
280
+ text = re.sub(r'[^\w\s\u4e00-\u9fff,。!?;:""''()【】]', '', text)
281
+ # 移除前后空格
282
+ text = text.strip()
283
+
284
+ return text
285
+
286
+
287
+ class NetworkUtils:
288
+ """网络工具类"""
289
+
290
+ @staticmethod
291
+ @retry(max_attempts=3, delay=1.0)
292
+ def fetch_html(url: str, headers: Optional[Dict[str, str]] = None, timeout: int = 10) -> str:
293
+ """
294
+ 获取网页HTML内容
295
+
296
+ Args:
297
+ url (str): 目标URL
298
+ headers (dict, optional): 请求头
299
+ timeout (int): 超时时间(秒)
300
+
301
+ Returns:
302
+ str: HTML内容
303
+
304
+ Raises:
305
+ requests.RequestException: 请求失败时抛出异常
306
+ """
307
+ default_headers = {
308
+ "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_8 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1"
309
+ }
310
+
311
+ if headers:
312
+ default_headers.update(headers)
313
+
314
+ response = requests.get(url, headers=default_headers, timeout=timeout)
315
+ response.raise_for_status()
316
+
317
+ return response.text
318
+
319
+ @staticmethod
320
+ def is_valid_url(url: str) -> bool:
321
+ """
322
+ 验证URL是否有效
323
+
324
+ Args:
325
+ url (str): 要验证的URL
326
+
327
+ Returns:
328
+ bool: URL是否有效
329
+ """
330
+ try:
331
+ from urllib.parse import urlparse
332
+ result = urlparse(url)
333
+ return all([result.scheme, result.netloc])
334
+ except Exception:
335
+ return False
336
+
337
+ @staticmethod
338
+ def is_valid_xhs_url(url: str) -> bool:
339
+ """
340
+ 验证小红书URL是否有效
341
+
342
+ Args:
343
+ url (str): 要验证的小红书笔记URL
344
+
345
+ Returns:
346
+ bool: URL是否有效的小红书笔记URL
347
+ """
348
+ try:
349
+ from urllib.parse import urlparse
350
+ result = urlparse(url)
351
+
352
+ # 检查是否为有效的URL
353
+ if not all([result.scheme, result.netloc]):
354
+ return False
355
+
356
+ # 检查是否为小红书域名
357
+ valid_domains = ['xiaohongshu.com', 'www.xiaohongshu.com', 'm.xiaohongshu.com']
358
+ if result.netloc not in valid_domains:
359
+ return False
360
+
361
+ # 检查是否为笔记详情页URL
362
+ if '/explore/' not in url and '/discovery/item/' not in url:
363
+ return False
364
+
365
+ return True
366
+ except Exception:
367
+ return False
368
+
369
+
370
+ class FileManager:
371
+ """文件管理类"""
372
+
373
+ @staticmethod
374
+ def save_data_to_file(data: str, filename: str, encoding: str = "utf-8") -> bool:
375
+ """
376
+ 将数据保存到文件
377
+
378
+ Args:
379
+ data (str): 要保存的数据
380
+ filename (str): 文件名
381
+ encoding (str): 文件编码
382
+
383
+ Returns:
384
+ bool: 是否保存成功
385
+ """
386
+ try:
387
+ with open(filename, "w", encoding=encoding) as f:
388
+ f.write(data)
389
+ logger.info(f"✓ 数据已保存到: {filename}")
390
+ return True
391
+ except Exception as e:
392
+ logger.error(f"✗ 保存数据失败: {e}")
393
+ return False
394
+
395
+ @staticmethod
396
+ def load_data_from_file(filename: str, encoding: str = "utf-8") -> Optional[str]:
397
+ """
398
+ 从文件加载数据
399
+
400
+ Args:
401
+ filename (str): 文件名
402
+ encoding (str): 文件编码
403
+
404
+ Returns:
405
+ Optional[str]: 文件内容,失败返回None
406
+ """
407
+ try:
408
+ with open(filename, "r", encoding=encoding) as f:
409
+ return f.read()
410
+ except Exception as e:
411
+ logger.error(f"✗ 加载数据失败: {e}")
412
+ return None
413
+
414
+
415
+ class XHSUtils:
416
+ """小红书工具类 - 兼容原有接口"""
417
+
418
+ @staticmethod
419
+ def get_detail_data(device: u2.Device) -> Dict[str, Union[str, List[str]]]:
420
+ """
421
+ 从当前已经打开的小红书详情页提取完整正文、图片和点赞数。
422
+ 这是为了向后兼容而保留的方法,实际功能已迁移到extractor.py中。
423
+
424
+ Args:
425
+ device (u2.Device): 设备对象
426
+
427
+ Returns:
428
+ Dict[str, Union[str, List[str]]]: 包含笔记数据的字典
429
+ """
430
+ # 导入extractor模块中的方法
431
+ from .extractor import XHSNoteExtractor
432
+
433
+ # 创建临时提取器实例
434
+ extractor = XHSNoteExtractor.__new__(XHSNoteExtractor)
435
+ extractor.device = device
436
+
437
+ # 调用提取方法
438
+ return extractor._get_detail_data()
439
+
440
+
441
+ # 便捷函数
442
+ def connect_device(device_serial: Optional[str] = None) -> u2.Device:
443
+ """
444
+ 便捷函数:连接设备
445
+
446
+ Args:
447
+ device_serial (str, optional): 设备序列号
448
+
449
+ Returns:
450
+ u2.Device: 设备对象
451
+ """
452
+ return DeviceManager.connect_device(device_serial)
453
+
454
+
455
+ def format_like_count(like_text: str) -> str:
456
+ """
457
+ 便捷函数:格式化点赞数
458
+
459
+ Args:
460
+ like_text (str): 原始点赞数字符串
461
+
462
+ Returns:
463
+ str: 格式化后的点赞数
464
+ """
465
+ return DataFormatter.format_like_count(like_text)
466
+
467
+
468
+ def extract_image_urls_from_html(html: str) -> List[str]:
469
+ """
470
+ 便捷函数:从HTML中提取图片URL
471
+
472
+ Args:
473
+ html (str): HTML内容
474
+
475
+ Returns:
476
+ List[str]: 图片URL列表
477
+ """
478
+ return DataFormatter.extract_image_urls_from_html(html)
479
+
480
+
481
+ def fetch_html(url: str, headers: Optional[Dict[str, str]] = None, timeout: int = 10) -> str:
482
+ """
483
+ 便捷函数:获取网页HTML内容
484
+
485
+ Args:
486
+ url (str): 目标URL
487
+ headers (dict, optional): 请求头
488
+ timeout (int): 超时时间(秒)
489
+
490
+ Returns:
491
+ str: HTML内容
492
+ """
493
+ return NetworkUtils.fetch_html(url, headers, timeout)
@@ -0,0 +1,234 @@
1
+ Metadata-Version: 2.4
2
+ Name: xhs-note-extractor
3
+ Version: 0.1.dev2
4
+ Summary: A Python package for extracting Xiaohongshu (Little Red Book) note data from URLs
5
+ Author-email: JoyCode Agent <agent@joycode.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/yehao20/xhs-note-extractor
8
+ Project-URL: Repository, https://github.com/yehao20/xhs-note-extractor
9
+ Project-URL: Documentation, https://github.com/yehao20/xhs-note-extractor/blob/main/README.md
10
+ Project-URL: Issues, https://github.com/yehao20/xhs-note-extractor/issues
11
+ Keywords: xiaohongshu,little-red-book,web-scraping,automation,uiautomator
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.8
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
+ Classifier: Topic :: Internet :: WWW/HTTP
24
+ Classifier: Topic :: Utilities
25
+ Requires-Python: >=3.8
26
+ Description-Content-Type: text/markdown
27
+ License-File: LICENSE
28
+ Requires-Dist: uiautomator2>=2.16.17
29
+ Requires-Dist: requests>=2.25.0
30
+ Provides-Extra: dev
31
+ Requires-Dist: pytest>=6.0; extra == "dev"
32
+ Requires-Dist: pytest-cov>=2.0; extra == "dev"
33
+ Requires-Dist: black>=21.0; extra == "dev"
34
+ Requires-Dist: flake8>=3.8; extra == "dev"
35
+ Dynamic: license-file
36
+
37
+ # 小红书笔记提取器 (Xiaohongshu Note Extractor)
38
+
39
+ 一个用于从小红书提取笔记数据的Python工具,支持命令行界面和编程接口。
40
+
41
+ ## 功能特性
42
+
43
+ - 🔍 从小红书笔记URL提取详细数据
44
+ - 📊 支持JSON和CSV输出格式
45
+ - 🖥️ 命令行界面支持
46
+ - 🔧 可配置的设备连接选项
47
+ - 📱 Android设备集成(通过uiautomator2)
48
+ - 🛡️ 优雅的错误处理和设备状态检查
49
+
50
+ ## 安装
51
+
52
+ ### 从源码安装
53
+
54
+ ```bash
55
+ # 克隆仓库
56
+ git clone <repository-url>
57
+ cd xhs-note-extractor
58
+
59
+ # 安装依赖
60
+ pip install -r requirements.txt
61
+
62
+ # 安装包(开发模式)
63
+ pip install -e .
64
+ ```
65
+
66
+ ### 依赖要求
67
+
68
+ - Python 3.7+
69
+ - Android设备(用于完整功能)
70
+ - ADB工具
71
+
72
+ ## 使用方法
73
+
74
+ ### 命令行界面(CLI)
75
+
76
+ 安装完成后,可以直接使用 `xhs-extract` 命令:
77
+
78
+ ```bash
79
+ # 提取笔记并输出到控制台(JSON格式)
80
+ xhs-extract https://www.xiaohongshu.com/explore/note_id
81
+
82
+ # 保存到文件
83
+ xhs-extract https://www.xiaohongshu.com/explore/note_id -o note_data.json
84
+
85
+ # 输出CSV格式
86
+ xhs-extract https://www.xiaohongshu.com/explore/note_id -f csv -o note_data.csv
87
+
88
+ # 启用详细输出模式
89
+ xhs-extract https://www.xiaohongshu.com/explore/note_id -v
90
+
91
+ # 查看帮助
92
+ xhs-extract --help
93
+ ```
94
+
95
+ ### 编程接口
96
+
97
+ ```python
98
+ from xhs_note_extractor import XHSNoteExtractor
99
+ import json
100
+
101
+ # 创建提取器实例
102
+ extractor = XHSNoteExtractor()
103
+
104
+ # 检查设备连接状态
105
+ if extractor.is_device_connected():
106
+ # 提取笔记数据
107
+ note_data = extractor.extract_note_data("https://www.xiaohongshu.com/explore/note_id")
108
+ print(json.dumps(note_data, ensure_ascii=False, indent=2))
109
+ else:
110
+ print("请连接Android设备并启用USB调试")
111
+ ```
112
+
113
+ ## 输出数据结构
114
+
115
+ 提取的数据包含以下字段:
116
+
117
+ ```json
118
+ {
119
+ "title": "笔记标题",
120
+ "content": "笔记完整内容",
121
+ "author": {
122
+ "nickname": "作者昵称",
123
+ "user_id": "用户ID"
124
+ },
125
+ "likes": 100,
126
+ "collects": 50,
127
+ "comments": 25,
128
+ "shares": 10,
129
+ "image_urls": [
130
+ "图片URL1",
131
+ "图片URL2"
132
+ ],
133
+ "video_url": "视频URL(如果有)",
134
+ "tags": ["标签1", "标签2"],
135
+ "publish_time": "发布时间",
136
+ "note_id": "笔记ID"
137
+ }
138
+ ```
139
+
140
+ ## 设备连接
141
+
142
+ ### 连接Android设备
143
+
144
+ 1. 在Android设备上启用**开发者选项**和**USB调试**
145
+ 2. 通过USB连接设备到电脑
146
+ 3. 授权USB调试权限(设备上会弹出提示)
147
+
148
+ ### 检查设备状态
149
+
150
+ ```bash
151
+ # 使用ADB检查设备
152
+ adb devices
153
+
154
+ # 使用CLI工具检查
155
+ xhs-extract --help # 会显示设备连接状态
156
+ ```
157
+
158
+ ## 故障排除
159
+
160
+ ### 设备连接问题
161
+
162
+ 如果CLI工具提示设备未连接:
163
+
164
+ 1. 检查USB连接是否正常
165
+ 2. 确认已在设备上启用USB调试
166
+ 3. 确认已授权USB调试权限
167
+ 4. 尝试重新插拔USB线缆
168
+ 5. 重启ADB服务:
169
+ ```bash
170
+ adb kill-server
171
+ adb start-server
172
+ ```
173
+
174
+ ### 权限问题
175
+
176
+ 在Linux/Mac上,可能需要为ADB添加权限:
177
+
178
+ ```bash
179
+ sudo adb kill-server
180
+ sudo adb start-server
181
+ ```
182
+
183
+ ## 示例
184
+
185
+ 查看 `examples/basic_usage.py` 文件获取更多使用示例:
186
+
187
+ ```bash
188
+ # 运行示例
189
+ python examples/basic_usage.py
190
+ ```
191
+
192
+ ## 开发
193
+
194
+ ### 项目结构
195
+
196
+ ```
197
+ xhs-note-extractor/
198
+ ├── xhs_note_extractor/
199
+ │ ├── __init__.py
200
+ │ ├── cli.py # 命令行界面
201
+ │ ├── extractor.py # 核心提取器
202
+ │ └── utils.py # 工具函数
203
+ ├── examples/
204
+ │ └── basic_usage.py # 使用示例
205
+ ├── tests/
206
+ ├── requirements.txt
207
+ ├── setup.py
208
+ └── README.md
209
+ ```
210
+
211
+ ### 运行测试
212
+
213
+ ```bash
214
+ # 运行示例
215
+ python examples/basic_usage.py
216
+
217
+ # 使用CLI工具
218
+ xhs-extract --help
219
+ ```
220
+
221
+ ## 注意事项
222
+
223
+ - 本工具仅供学习和研究使用
224
+ - 请遵守小红书的使用条款和API限制
225
+ - 过度频繁的请求可能导致IP被封禁
226
+ - 建议在合理范围内使用,避免对平台造成负担
227
+
228
+ ## 许可证
229
+
230
+ MIT License
231
+
232
+ ## 贡献
233
+
234
+ 欢迎提交Issue和Pull Request!
@@ -0,0 +1,11 @@
1
+ xhs_note_extractor/__init__.py,sha256=CjHdqO4W5sj6zbeE7xYkR0_WRfc99G56nR6k2Kmji44,1207
2
+ xhs_note_extractor/_version.py,sha256=59jjKBtTUi_9u6FVZcIpQEDYjyAaqdxzqXyuRuFYKPE,720
3
+ xhs_note_extractor/cli.py,sha256=F5phl4HqnzEe_vTS8vpio_KcZNx4cxmXJnYcQ1FgMbA,2693
4
+ xhs_note_extractor/extractor.py,sha256=Afl-VzMmuRSk82BbAROpIAB6g1BSpC3yRbDJTrO0NCM,14964
5
+ xhs_note_extractor/utils.py,sha256=mOVoLknlflzv7aCjXdmeNniQ7P6WNUcjSKjCm8uwFNk,14364
6
+ xhs_note_extractor-0.1.dev2.dist-info/licenses/LICENSE,sha256=VFtWajKKKkgOoX3cMb2upEjsQmDpU85ymhom2bYY_oI,1069
7
+ xhs_note_extractor-0.1.dev2.dist-info/METADATA,sha256=ohcbdSTJ-ms66W5xBEqXIhczNN_7lpk4mqsvpL1fF0g,5525
8
+ xhs_note_extractor-0.1.dev2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
+ xhs_note_extractor-0.1.dev2.dist-info/entry_points.txt,sha256=1IG34snKfX2pzpLSeDXHqlSVSH8p7bf3eaKQfcwGDk4,60
10
+ xhs_note_extractor-0.1.dev2.dist-info/top_level.txt,sha256=at3SqTdQr3DWMFCL5KM0Ofo_LE88WqADjh8MeFLwwO0,19
11
+ xhs_note_extractor-0.1.dev2.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ xhs-extract = xhs_note_extractor.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 JoyCode Agent
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ xhs_note_extractor