web-novel-scraper 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. web_novel_scraper-1.0.2/.github/scripts/update_version.py +65 -0
  2. web_novel_scraper-1.0.2/.github/workflows/build.yaml +31 -0
  3. web_novel_scraper-1.0.2/.github/workflows/publish.yaml +88 -0
  4. web_novel_scraper-1.0.2/.github/workflows/test.yaml +21 -0
  5. web_novel_scraper-1.0.2/.gitignore +15 -0
  6. web_novel_scraper-1.0.2/.readthedocs.yaml +25 -0
  7. web_novel_scraper-1.0.2/PKG-INFO +231 -0
  8. web_novel_scraper-1.0.2/README.md +212 -0
  9. web_novel_scraper-1.0.2/docs/Makefile +20 -0
  10. web_novel_scraper-1.0.2/docs/make.bat +35 -0
  11. web_novel_scraper-1.0.2/docs/requirements.txt +3 -0
  12. web_novel_scraper-1.0.2/docs/source/README.rst +255 -0
  13. web_novel_scraper-1.0.2/docs/source/_static/README.md +1 -0
  14. web_novel_scraper-1.0.2/docs/source/commands/chapters_commands.rst +17 -0
  15. web_novel_scraper-1.0.2/docs/source/commands/creation_commands.rst +41 -0
  16. web_novel_scraper-1.0.2/docs/source/commands/index.rst +14 -0
  17. web_novel_scraper-1.0.2/docs/source/commands/output_commands.rst +8 -0
  18. web_novel_scraper-1.0.2/docs/source/commands/toc_commands.rst +25 -0
  19. web_novel_scraper-1.0.2/docs/source/commands/utils_commands.rst +16 -0
  20. web_novel_scraper-1.0.2/docs/source/concepts.rst +178 -0
  21. web_novel_scraper-1.0.2/docs/source/conf.py +41 -0
  22. web_novel_scraper-1.0.2/docs/source/config_options.rst +33 -0
  23. web_novel_scraper-1.0.2/docs/source/index.rst +23 -0
  24. web_novel_scraper-1.0.2/docs/source/tutorial.rst +115 -0
  25. web_novel_scraper-1.0.2/pyproject.toml +44 -0
  26. web_novel_scraper-1.0.2/requirements.txt +7 -0
  27. web_novel_scraper-1.0.2/web_novel_scraper/__init__.py +0 -0
  28. web_novel_scraper-1.0.2/web_novel_scraper/__main__.py +430 -0
  29. web_novel_scraper-1.0.2/web_novel_scraper/decode.py +141 -0
  30. web_novel_scraper-1.0.2/web_novel_scraper/decode_guide/decode_guide.json +213 -0
  31. web_novel_scraper-1.0.2/web_novel_scraper/file_manager.py +292 -0
  32. web_novel_scraper-1.0.2/web_novel_scraper/logger_manager.py +72 -0
  33. web_novel_scraper-1.0.2/web_novel_scraper/novel_scraper.py +723 -0
  34. web_novel_scraper-1.0.2/web_novel_scraper/request_manager.py +135 -0
  35. web_novel_scraper-1.0.2/web_novel_scraper/utils.py +66 -0
  36. web_novel_scraper-1.0.2/web_novel_scraper/version.py +1 -0
@@ -0,0 +1,65 @@
1
+ import sys
2
+ import re
3
+
4
+ def update_version(file_path, increment):
5
+ """
6
+ Updates the version in the specified file.
7
+
8
+ Args:
9
+ file_path (str): Path to the file containing the version.
10
+ increment (str): The type of increment: "patch", "minor", or "major".
11
+ """
12
+ try:
13
+ # Read the file
14
+ with open(file_path, "r", encoding="utf-8") as file:
15
+ content = file.read()
16
+
17
+ # Find the current version using regex
18
+ match = re.search(r'__version__ = "(\d+)\.(\d+)\.(\d+)"', content)
19
+ if not match:
20
+ raise ValueError("Version not found in the file.")
21
+
22
+ # Extract MAJOR, MINOR, and PATCH values
23
+ major, minor, patch = map(int, match.groups())
24
+
25
+ # Increment the appropriate part
26
+ if increment == "major":
27
+ major += 1
28
+ minor = 0
29
+ patch = 0
30
+ elif increment == "minor":
31
+ minor += 1
32
+ patch = 0
33
+ elif increment == "patch":
34
+ patch += 1
35
+ else:
36
+ raise ValueError("Increment type must be 'major', 'minor', or 'patch'.")
37
+
38
+ # Generate the new version
39
+ new_version = f'{major}.{minor}.{patch}'
40
+
41
+ # Update the file content with the new version
42
+ updated_content = re.sub(r'__version__ = "(\d+)\.(\d+)\.(\d+)"',
43
+ f'__version__ = "{new_version}"',
44
+ content)
45
+
46
+ # Write the updated content back to the file
47
+ with open(file_path, "w", encoding="utf-8") as file:
48
+ file.write(updated_content)
49
+
50
+ print(new_version) # Print the new version for the workflow
51
+
52
+ except Exception as e:
53
+ print(f"Error updating the version: {e}")
54
+ sys.exit(1)
55
+
56
+
57
+ if __name__ == "__main__":
58
+ if len(sys.argv) != 3:
59
+ print("Usage: python update_version.py <file_path> <increment>")
60
+ print("Example: python update_version.py web_novel_scraper/version.py patch")
61
+ sys.exit(1)
62
+
63
+ file_path = sys.argv[1]
64
+ increment = sys.argv[2].lower()
65
+ update_version(file_path, increment)
@@ -0,0 +1,31 @@
1
+ name: Build Artifacts
2
+ on:
3
+ [workflow_call, workflow_dispatch]
4
+ permissions:
5
+ contents: read
6
+
7
+ jobs:
8
+ release-build:
9
+ runs-on: ubuntu-latest
10
+
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+ with:
14
+ ref: main
15
+ - uses: actions/setup-python@v5
16
+ with:
17
+ python-version: "3.x"
18
+
19
+ - name: Build release distributions
20
+ run: |
21
+ python -m pip install build hatchling
22
+ python -m build
23
+
24
+ - name: Verify build artifacts
25
+ run: test -n "$(ls -A dist)" || exit 1
26
+
27
+ - name: Upload distributions
28
+ uses: actions/upload-artifact@v4
29
+ with:
30
+ name: release-dists
31
+ path: dist/
@@ -0,0 +1,88 @@
1
+ name: Test, Build and Publish
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ inputs:
6
+ increment:
7
+ description: "Version increment type (PATCH, MINOR, MAJOR)"
8
+ required: false
9
+ default: "PATCH"
10
+ type: choice
11
+ options:
12
+ - "PATCH"
13
+ - "MINOR"
14
+ - "MAJOR"
15
+ publish_to_pypi:
16
+ description: "Publish to PyPI (yes or no)"
17
+ required: false
18
+ default: true
19
+ type: boolean
20
+
21
+ permissions:
22
+ contents: write
23
+ id-token: write
24
+
25
+ jobs:
26
+ run-tests:
27
+ uses: ./.github/workflows/test.yaml
28
+
29
+ bump-version:
30
+ needs: run-tests
31
+ runs-on: ubuntu-latest
32
+ permissions:
33
+ contents: write
34
+ steps:
35
+ - name: Checkout code
36
+ uses: actions/checkout@v4
37
+
38
+ - name: Set up Python
39
+ uses: actions/setup-python@v5
40
+ with:
41
+ python-version: "3.x"
42
+
43
+ - name: Bump version
44
+ run: |
45
+ NEW_VERSION=$(python .github/scripts/update_version.py web_novel_scraper/version.py ${{ inputs.increment }})
46
+ echo "NEW_VERSION=$NEW_VERSION" >> $GITHUB_ENV
47
+
48
+ - name: Commit updated version
49
+ run: |
50
+ git config --global user.name "github-actions[bot]"
51
+ git config --global user.email "github-actions[bot]@users.noreply.github.com"
52
+ git add web_novel_scraper/version.py
53
+ git commit -m "Bump version to ${{ env.NEW_VERSION }}"
54
+ git push
55
+
56
+ - name: Tag the new version
57
+ run: |
58
+ git tag -a "${{ env.NEW_VERSION }}" -m "Version ${{ env.NEW_VERSION }}"
59
+ git push origin "${{ env.NEW_VERSION }}"
60
+
61
+ build:
62
+ needs: bump-version
63
+ uses: ./.github/workflows/build.yaml
64
+ permissions:
65
+ contents: read
66
+ packages: write
67
+ actions: write
68
+
69
+ pypi-publish:
70
+ needs: build
71
+ if: ${{ inputs.publish_to_pypi }}
72
+ runs-on: ubuntu-latest
73
+ permissions:
74
+ id-token: write
75
+ environment:
76
+ name: pypi
77
+ url: https://pypi.org/p/web-novel-scraper
78
+ steps:
79
+ - name: Retrieve release distributions
80
+ uses: actions/download-artifact@v4
81
+ with:
82
+ name: release-dists
83
+ path: dist/
84
+
85
+ - name: Publish release distributions to PyPI
86
+ uses: pypa/gh-action-pypi-publish@release/v1
87
+ with:
88
+ packages-dir: dist/
@@ -0,0 +1,21 @@
1
+ name: Run tests
2
+ on:
3
+ [workflow_call, workflow_dispatch]
4
+ permissions:
5
+ contents: read
6
+
7
+ jobs:
8
+ release-build:
9
+ runs-on: ubuntu-latest
10
+
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+
14
+ - uses: actions/setup-python@v5
15
+ with:
16
+ python-version: "3.x"
17
+
18
+ - name: Build release distributions
19
+ run: |
20
+ python -m pip install .
21
+ web-novel-scraper --help
@@ -0,0 +1,15 @@
1
+ **.jpg
2
+ **.png
3
+ **.html
4
+ **/tmp
5
+ **/output
6
+ **.epub
7
+ **/__pycache__
8
+ test.py
9
+ **/.env
10
+ **.txt
11
+ !requirements.txt
12
+ **/novelas
13
+ web_novel_scraper/web_novel_scraper**
14
+ dist
15
+ docs/build
@@ -0,0 +1,25 @@
1
+ # Read the Docs configuration file
2
+ # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
3
+
4
+ # Required
5
+ version: 2
6
+
7
+ # Set the OS, Python version, and other tools you might need
8
+ build:
9
+ os: ubuntu-24.04
10
+ tools:
11
+ python: "3.13"
12
+
13
+ # Build documentation in the "docs/source" directory with Sphinx
14
+ sphinx:
15
+ configuration: docs/source/conf.py
16
+
17
+ # Optionally, but recommended,
18
+ # declare the Python requirements required to build your documentation
19
+ # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
20
+ python:
21
+ install:
22
+ - requirements: docs/requirements.txt
23
+ # We need to install the requirements to autogenerate the click docs
24
+ - requirements: requirements.txt
25
+
@@ -0,0 +1,231 @@
1
+ Metadata-Version: 2.4
2
+ Name: web-novel-scraper
3
+ Version: 1.0.2
4
+ Summary: Python tool that allows you to scrape web novels from various sources and save them to more readable formats like EPUB.
5
+ Project-URL: Homepage, https://github.com/ImagineBrkr/web-novel-scraper
6
+ Project-URL: Documentation, https://web-novel-scraper.readthedocs.io
7
+ Project-URL: Repository, https://github.com/ImagineBrkr/web-novel-scraper.git
8
+ Author-email: ImagineBrkr <salvattore_25@hotmail.com>
9
+ Keywords: Novel Downloader,Scraper,Web Novel,Web Novel Downloader,Web Novel Scraper
10
+ Requires-Python: >=3.10
11
+ Requires-Dist: bs4>=0.0.2
12
+ Requires-Dist: click<9,>=8.0
13
+ Requires-Dist: dataclasses-json<1,>=0.6.7
14
+ Requires-Dist: ebooklib<1,>=0.18
15
+ Requires-Dist: platformdirs
16
+ Requires-Dist: python-dotenv
17
+ Requires-Dist: requests
18
+ Description-Content-Type: text/markdown
19
+
20
+ # Web Novel scraper CLI
21
+
22
+ ## Table of Contents
23
+ - [Introduction](#introduction)
24
+ - [Installation](#installation)
25
+ - [Basic Concepts](#basic-concepts)
26
+ - [Commands](#commands)
27
+ - [Basic Examples](#basic-examples)
28
+
29
+
30
+ ## Introduction
31
+ This tool allows you to scrape web novels from various sources. I made it because my hands hurt from scrolling too much.
32
+
33
+ ## Installation
34
+ To install the Web Novel Scraping CLI, you can use pip:
35
+
36
+ ```bash
37
+ pip install web-novel-scraper
38
+ ```
39
+ Or you can manually install it:
40
+
41
+ 1. Clone the repository:
42
+ ```bash
43
+ git clone https://github.com/ImagineBrkr/web-novel-scraper.git
44
+ ```
45
+ 2. Navigate to the project directory:
46
+ ```bash
47
+ cd web-novel-scraper
48
+ ```
49
+ 3. Install the project:
50
+ ```bash
51
+ python -m pip install .
52
+ ```
53
+ 4. Run the CLI tool:
54
+ ```bash
55
+ web-novel-scraper
56
+ ```
57
+
58
+ ## Basic Concepts
59
+ ### Novel
60
+ Refers to a novel which has at least, a Table of Contents (can be one or more) and chapters.
61
+ It also has some metadata that can be saved like author, language, tags, creation or end date, etc.
62
+
63
+ ### Table of Contents (TOC)
64
+ Source of Truth for all the chapters the novel will have. It can be from a main URL (it will be requested and saved; if there is more than one page, they will also get requested and saved), or the HTML files can be added directly from a file. All the chapters are autogenerated from this TOC.
65
+
66
+ ### Chapters
67
+ A Chapter comes from a URL, is requested and saved as a file on your local machine. Once a file is saved, you will not need to request it anymore.
68
+ From this chapter you can get the Title and the Chapter Content.
69
+
70
+ ### Decoder
71
+ A set of rules used to extract information from a chapter, such as links, content, title, etc.
72
+ We use the host to identify which set of rules we will use. This can be added manually or generated from a TOC URL.
73
+ Example:
74
+ ```json
75
+ {
76
+ "host": "novelbin.me",
77
+ "has_pagination": false,
78
+ "title": {
79
+ "element": "h2 a.chr-title",
80
+ "id": null,
81
+ "class": null,
82
+ "selector": null,
83
+ "attributes": null,
84
+ "array": false,
85
+ "extract": {
86
+ "type": "attr",
87
+ "key": "title"
88
+ }
89
+ },
90
+ "content": {
91
+ "element": "div#chr-content",
92
+ "id": null,
93
+ "class": null,
94
+ "selector": null,
95
+ "attributes": null,
96
+ "array": true
97
+ },
98
+ "index": {
99
+ "element": null,
100
+ "id": null,
101
+ "class": null,
102
+ "selector": "ul.list-chapter li a",
103
+ "attributes": null,
104
+ "array": true
105
+ },
106
+ "next_page": {
107
+ "element": null,
108
+ "id": null,
109
+ "class": null,
110
+ "selector": null,
111
+ "attributes": null,
112
+ "array": true
113
+ }
114
+ }
115
+ ```
116
+ Uses BeautifulSoup selectors for more flexibility. You can specify the element, id, class, selector, and whether multiple tags will be used.
117
+
118
+ - `has_pagination`: Used if there is a `toc_main_url` to find the URL of the next page, using `next_page`.
119
+ - `index`: Gets the `href` of all tags found when searching the TOC.
120
+ - `title` and `content`: The title and content of the chapter, respectively.
121
+
122
+ In the example above:
123
+ - The title is in an `a` tag within an `h2` tag with class `chr-title`, extracting the `title` attribute:
124
+ ```html
125
+ <h2><a class="chr-title" href="https://url-of-chapter" title="Chapter 1"><span class="chr-text">Chapter 1</span></a></h2>
126
+ ```
127
+ - The content is in a `div` with id `chr-content`:
128
+ ```html
129
+ <div id="chr-content" class="chr-c" style="font-family: Arial, sans-serif, serif; font-size: 18px; line-height: 160%; margin-top: 15px;">Content...</div>
130
+ ```
131
+ - The URL of each chapter is in the `href` of an `a` tag within an `li` tag, which is within a `ul` tag with class `list-chapter`:
132
+ ```html
133
+ <ul class="list-chapter">
134
+ <li><span class="glyphicon glyphicon-certificate"></span>&nbsp;<a href="https://url-of-chapter-1" title="Chapter 1"><span class="nchr-text chapter-title">Chapter 1</span></a></li>
135
+ </ul>
136
+ ```
137
+ ## Commands
138
+ The following commands are available in the Web Novel Scraping CLI:
139
+
140
+ ```bash
141
+ Usage: main.py [OPTIONS] COMMAND [ARGS]...
142
+
143
+ CLI Tool for web novel scraping.
144
+
145
+ Options:
146
+ --help Show this message and exit.
147
+
148
+ Commands:
149
+ add-tags Add tags to a novel.
150
+ add-toc-html Add TOC HTML to a novel.
151
+ clean-files Clean files of a novel.
152
+ create-novel Create a new novel.
153
+ delete-toc Delete the TOC of a novel.
154
+ remove-tags Remove tags from a novel.
155
+ request-all-chapters Request all chapters of a novel.
156
+ save-novel-to-epub Save the novel to EPUB format.
157
+ scrap-chapter Scrap a chapter of a novel.
158
+ set-cover-image Set the cover image for a novel.
159
+ set-host Set the host for a novel.
160
+ set-metadata Set metadata for a novel.
161
+ set-scraper-behavior Set scraper behavior for a novel.
162
+ set-toc-main-url Set the main URL for the TOC of a novel.
163
+ show-chapters Show chapters of a novel.
164
+ show-metadata Show metadata of a novel.
165
+ show-novel-info Show information about a novel.
166
+ show-scraper-behavior Show scraper behavior of a novel.
167
+ show-tags Show tags of a novel.
168
+ show-toc Show the TOC of a novel.
169
+ sync-toc Sync the TOC of a novel.
170
+ version Show program version.
171
+ ```
172
+
173
+ ## Basic Examples
174
+ Here are some basic examples:
175
+
176
+ ### Example 1: Creating a Novel using a main URL
177
+ ```bash
178
+ python src/main.py create-novel --title 'Novel 1' --author 'ImagineBrkr' --toc-main-url 'https://page.me/Novel-1/toc' --cover 'cover.jpg'
179
+ ```
180
+ Some pages have too much JavaScript, so you can just copy the HTML manually to a file and create the novel from it:
181
+ ```bash
182
+ python src/main.py create-novel --title 'Novel 1' --author 'ImagineBrkr' --toc-html 'toc.html' --host 'page.me' --cover 'cover.jpg'
183
+ ```
184
+ If there is more than one page for the TOC, you can add them:
185
+ ```bash
186
+ python src/main.py add-toc-html --title 'Novel 1' --toc-html 'toc2.html'
187
+ ```
188
+ You can create the chapters from this TOC, or synchronize if they were already created but there are new chapters.
189
+ ```bash
190
+ python src/main.py sync-toc --title 'Novel 1'
191
+ ```
192
+ The default directory will be %APPDATA%/ImagineBrkr/web-novel-scraper for Windows, all the files will be saved there, but you can change it.
193
+
194
+ ### Example 2: Requesting files
195
+ We can now download all the chapters
196
+ ```bash
197
+ python src/main.py request-all-chapters --title 'Novel 1'
198
+ ```
199
+
200
+ ### Example 3: Saving to EPUB
201
+ With
202
+ ```bash
203
+ python src/main.py save-novel-to-epub --title 'Novel 1'
204
+ ```
205
+
206
+ For more detailed usage and options, use --help for each command.
207
+
208
+ ## Configuration
209
+ ### Environment Variables
210
+
211
+ The Web Novel Scraping CLI uses the following environment variables for configuration:
212
+
213
+ - `SCRAPER_LOGGING_LEVEL`: Sets the logging level for the application. By default no logs are written, it accepts the following log levels: (DEBUG, INFO, WARNING, ERROR, CRITICAL).
214
+ ```bash
215
+ export SCRAPER_LOGGING_LEVEL=INFO
216
+ ```
217
+
218
+ - `SCRAPER_LOGGING_FILE`: Specifies the file where logs will be written. Default is written to the terminal.
219
+ ```bash
220
+ export SCRAPER_LOGGING_FILE=/path/to/logfile.log
221
+ ```
222
+
223
+ - `SCRAPER_BASE_DATA_DIR`: Defines the base directory for storing novel data. Default is the user data directory.
224
+ ```bash
225
+ export SCRAPER_BASE_DATA_DIR=/path/to/data/dir
226
+ ```
227
+
228
+ - `SCRAPER_FLARESOLVER_URL`: URL for the FlareSolverr service. Default is `http://localhost:8191/v1`.
229
+ ```bash
230
+ export SCRAPER_FLARESOLVER_URL=http://localhost:8191/v1
231
+ ```
@@ -0,0 +1,212 @@
1
+ # Web Novel scraper CLI
2
+
3
+ ## Table of Contents
4
+ - [Introduction](#introduction)
5
+ - [Installation](#installation)
6
+ - [Basic Concepts](#basic-concepts)
7
+ - [Commands](#commands)
8
+ - [Basic Examples](#basic-examples)
9
+
10
+
11
+ ## Introduction
12
+ This tool allows you to scrape web novels from various sources. I made it because my hands hurt from scrolling too much.
13
+
14
+ ## Installation
15
+ To install the Web Novel Scraping CLI, you can use pip:
16
+
17
+ ```bash
18
+ pip install web-novel-scraper
19
+ ```
20
+ Or you can manually install it:
21
+
22
+ 1. Clone the repository:
23
+ ```bash
24
+ git clone https://github.com/ImagineBrkr/web-novel-scraper.git
25
+ ```
26
+ 2. Navigate to the project directory:
27
+ ```bash
28
+ cd web-novel-scraper
29
+ ```
30
+ 3. Install the project:
31
+ ```bash
32
+ python -m pip install .
33
+ ```
34
+ 4. Run the CLI tool:
35
+ ```bash
36
+ web-novel-scraper
37
+ ```
38
+
39
+ ## Basic Concepts
40
+ ### Novel
41
+ Refers to a novel which has at least, a Table of Contents (can be one or more) and chapters.
42
+ It also has some metadata that can be saved like author, language, tags, creation or end date, etc.
43
+
44
+ ### Table of Contents (TOC)
45
+ Source of Truth for all the chapters the novel will have. It can be from a main URL (it will be requested and saved; if there is more than one page, they will also get requested and saved), or the HTML files can be added directly from a file. All the chapters are autogenerated from this TOC.
46
+
47
+ ### Chapters
48
+ A Chapter comes from a URL, is requested and saved as a file on your local machine. Once a file is saved, you will not need to request it anymore.
49
+ From this chapter you can get the Title and the Chapter Content.
50
+
51
+ ### Decoder
52
+ A set of rules used to extract information from a chapter, such as links, content, title, etc.
53
+ We use the host to identify which set of rules we will use. This can be added manually or generated from a TOC URL.
54
+ Example:
55
+ ```json
56
+ {
57
+ "host": "novelbin.me",
58
+ "has_pagination": false,
59
+ "title": {
60
+ "element": "h2 a.chr-title",
61
+ "id": null,
62
+ "class": null,
63
+ "selector": null,
64
+ "attributes": null,
65
+ "array": false,
66
+ "extract": {
67
+ "type": "attr",
68
+ "key": "title"
69
+ }
70
+ },
71
+ "content": {
72
+ "element": "div#chr-content",
73
+ "id": null,
74
+ "class": null,
75
+ "selector": null,
76
+ "attributes": null,
77
+ "array": true
78
+ },
79
+ "index": {
80
+ "element": null,
81
+ "id": null,
82
+ "class": null,
83
+ "selector": "ul.list-chapter li a",
84
+ "attributes": null,
85
+ "array": true
86
+ },
87
+ "next_page": {
88
+ "element": null,
89
+ "id": null,
90
+ "class": null,
91
+ "selector": null,
92
+ "attributes": null,
93
+ "array": true
94
+ }
95
+ }
96
+ ```
97
+ Uses BeautifulSoup selectors for more flexibility. You can specify the element, id, class, selector, and whether multiple tags will be used.
98
+
99
+ - `has_pagination`: Used if there is a `toc_main_url` to find the URL of the next page, using `next_page`.
100
+ - `index`: Gets the `href` of all tags found when searching the TOC.
101
+ - `title` and `content`: The title and content of the chapter, respectively.
102
+
103
+ In the example above:
104
+ - The title is in an `a` tag within an `h2` tag with class `chr-title`, extracting the `title` attribute:
105
+ ```html
106
+ <h2><a class="chr-title" href="https://url-of-chapter" title="Chapter 1"><span class="chr-text">Chapter 1</span></a></h2>
107
+ ```
108
+ - The content is in a `div` with id `chr-content`:
109
+ ```html
110
+ <div id="chr-content" class="chr-c" style="font-family: Arial, sans-serif, serif; font-size: 18px; line-height: 160%; margin-top: 15px;">Content...</div>
111
+ ```
112
+ - The URL of each chapter is in the `href` of an `a` tag within an `li` tag, which is within a `ul` tag with class `list-chapter`:
113
+ ```html
114
+ <ul class="list-chapter">
115
+ <li><span class="glyphicon glyphicon-certificate"></span>&nbsp;<a href="https://url-of-chapter-1" title="Chapter 1"><span class="nchr-text chapter-title">Chapter 1</span></a></li>
116
+ </ul>
117
+ ```
118
+ ## Commands
119
+ The following commands are available in the Web Novel Scraping CLI:
120
+
121
+ ```bash
122
+ Usage: main.py [OPTIONS] COMMAND [ARGS]...
123
+
124
+ CLI Tool for web novel scraping.
125
+
126
+ Options:
127
+ --help Show this message and exit.
128
+
129
+ Commands:
130
+ add-tags Add tags to a novel.
131
+ add-toc-html Add TOC HTML to a novel.
132
+ clean-files Clean files of a novel.
133
+ create-novel Create a new novel.
134
+ delete-toc Delete the TOC of a novel.
135
+ remove-tags Remove tags from a novel.
136
+ request-all-chapters Request all chapters of a novel.
137
+ save-novel-to-epub Save the novel to EPUB format.
138
+ scrap-chapter Scrap a chapter of a novel.
139
+ set-cover-image Set the cover image for a novel.
140
+ set-host Set the host for a novel.
141
+ set-metadata Set metadata for a novel.
142
+ set-scraper-behavior Set scraper behavior for a novel.
143
+ set-toc-main-url Set the main URL for the TOC of a novel.
144
+ show-chapters Show chapters of a novel.
145
+ show-metadata Show metadata of a novel.
146
+ show-novel-info Show information about a novel.
147
+ show-scraper-behavior Show scraper behavior of a novel.
148
+ show-tags Show tags of a novel.
149
+ show-toc Show the TOC of a novel.
150
+ sync-toc Sync the TOC of a novel.
151
+ version Show program version.
152
+ ```
153
+
154
+ ## Basic Examples
155
+ Here are some basic examples:
156
+
157
+ ### Example 1: Creating a Novel using a main URL
158
+ ```bash
159
+ python src/main.py create-novel --title 'Novel 1' --author 'ImagineBrkr' --toc-main-url 'https://page.me/Novel-1/toc' --cover 'cover.jpg'
160
+ ```
161
+ Some pages have too much JavaScript, so you can just copy the HTML manually to a file and create the novel from it:
162
+ ```bash
163
+ python src/main.py create-novel --title 'Novel 1' --author 'ImagineBrkr' --toc-html 'toc.html' --host 'page.me' --cover 'cover.jpg'
164
+ ```
165
+ If there is more than one page for the TOC, you can add them:
166
+ ```bash
167
+ python src/main.py add-toc-html --title 'Novel 1' --toc-html 'toc2.html'
168
+ ```
169
+ You can create the chapters from this TOC, or synchronize if they were already created but there are new chapters.
170
+ ```bash
171
+ python src/main.py sync-toc --title 'Novel 1'
172
+ ```
173
+ The default directory will be %APPDATA%/ImagineBrkr/web-novel-scraper for Windows, all the files will be saved there, but you can change it.
174
+
175
+ ### Example 2: Requesting files
176
+ We can now download all the chapters
177
+ ```bash
178
+ python src/main.py request-all-chapters --title 'Novel 1'
179
+ ```
180
+
181
+ ### Example 3: Saving to EPUB
182
+ With
183
+ ```bash
184
+ python src/main.py save-novel-to-epub --title 'Novel 1'
185
+ ```
186
+
187
+ For more detailed usage and options, use --help for each command.
188
+
189
+ ## Configuration
190
+ ### Environment Variables
191
+
192
+ The Web Novel Scraping CLI uses the following environment variables for configuration:
193
+
194
+ - `SCRAPER_LOGGING_LEVEL`: Sets the logging level for the application. By default no logs are written, it accepts the following log levels: (DEBUG, INFO, WARNING, ERROR, CRITICAL).
195
+ ```bash
196
+ export SCRAPER_LOGGING_LEVEL=INFO
197
+ ```
198
+
199
+ - `SCRAPER_LOGGING_FILE`: Specifies the file where logs will be written. Default is written to the terminal.
200
+ ```bash
201
+ export SCRAPER_LOGGING_FILE=/path/to/logfile.log
202
+ ```
203
+
204
+ - `SCRAPER_BASE_DATA_DIR`: Defines the base directory for storing novel data. Default is the user data directory.
205
+ ```bash
206
+ export SCRAPER_BASE_DATA_DIR=/path/to/data/dir
207
+ ```
208
+
209
+ - `SCRAPER_FLARESOLVER_URL`: URL for the FlareSolverr service. Default is `http://localhost:8191/v1`.
210
+ ```bash
211
+ export SCRAPER_FLARESOLVER_URL=http://localhost:8191/v1
212
+ ```