spio 0.0.6.post12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spio might be problematic. Click here for more details.

@@ -0,0 +1,1316 @@
1
+ import configparser
2
+ import io
3
+ import json
4
+ import logging
5
+ import os
6
+ import shutil
7
+ import sys
8
+ import tempfile
9
+ from io import BytesIO
10
+ from pathlib import Path
11
+ from typing import Callable, List
12
+ from typing import IO, Final, Optional, Union, Tuple, Iterator
13
+
14
+ import msal
15
+ import requests
16
+ from msal.application import ClientApplication
17
+ from msal_extensions import build_encrypted_persistence, FilePersistence, PersistedTokenCache
18
+ from office365.graph_client import GraphClient
19
+ from office365.onedrive.driveitems.conflict_behavior import ConflictBehavior
20
+ from office365.onedrive.driveitems.driveItem import DriveItem
21
+ from office365.onedrive.driveitems.uploadable_properties import DriveItemUploadableProperties
22
+ from office365.onedrive.files.system_info import FileSystemInfo
23
+ from office365.runtime.client_result import ClientResult
24
+ from office365.runtime.queries.upload_session import UploadSessionQuery
25
+ from typing_extensions import override
26
+
27
+ _GRAPH_CLIENT: GraphClient
28
+ _SHAREPOINT_HOST: str
29
+
30
+
31
+ def init_spio(cfg: dict):
32
+ """
33
+ Initialize Microsoft Graph API using provided configuration settings.
34
+
35
+ Call this method at the beginning of your application to set up the Microsoft Graph API connection.
36
+
37
+
38
+ Parameters:
39
+ - `cfg` (dict): Configuration dictionary containing settings for Graph API.
40
+
41
+ The `cfg` dictionary can include the following keys (with examples):
42
+ - 'graph_url': Microsoft Graph API endpoint (default: 'https://graph.microsoft.com/') # Optional
43
+ - 'sharepoint_host': SharePoint host URL (default: 'vitoresearch.sharepoint.com') # Optional
44
+ - 'user_account': Microsoft Graph username (e.g., 'john.smith@vito.be') # Required
45
+
46
+ Additionally, the 'msal' key can include the following subkeys for MSAL (Microsoft Authentication Library) settings:
47
+ - 'client_id': MSAL client ID # Optional
48
+ - 'authority': MSAL authority URL # Optional
49
+ - 'auth_method': Authentication method ('device' or 'interactive', default: 'device') # Optional
50
+ - 'scopes': List of MSAL scopes (default: ["Files.ReadWrite", "User.Read"]) # Optional, see: https://learn.microsoft.com/en-us/graph/permissions-reference
51
+ - 'token_cache': Path to the token cache file (default: "{USER_HOME}/msal_token_cache.bin") # Optional
52
+
53
+
54
+ All the configurable settings (except 'user_account') have default values in marvin.sherepoint.default_config.ini.
55
+
56
+ In most cases you will only have to provide your Microsoft Graph username:
57
+ ```python
58
+ init_spio({"user_account": "john.smith@vito.be"})
59
+ ```
60
+
61
+ Or if you work with Python config object:
62
+
63
+ ```python
64
+ from config import settings
65
+ init_spio(config) # config should contain a value for "user_account"
66
+ ```
67
+ """
68
+ # add the default settings if there are missing settings in 'cfg'
69
+ dflt_cfg = _default_settings()
70
+
71
+ def _get(key):
72
+ return cfg.get(key, dflt_cfg[key])
73
+
74
+ user_account = _get('user_account')
75
+ sharepoint_host = _get('sharepoint_host')
76
+ graph_url = _get('graph_url')
77
+ msal_cfg = dflt_cfg.get('msal', {})
78
+ msal_cfg.update(cfg.get('msal', {}))
79
+
80
+ global _GRAPH_CLIENT, _SHAREPOINT_HOST
81
+ _GRAPH_CLIENT = _create_graph_client(user_account, graph_url, msal_cfg)
82
+ _SHAREPOINT_HOST = sharepoint_host
83
+
84
+
85
+ def copy_file(remote_file_source: DriveItem, remote_folder_destination: DriveItem) -> ClientResult:
86
+ """
87
+ Copy a file from one location to another in the Microsoft Cloud.
88
+
89
+ Parameters:
90
+ - `remote_file_source` (DriveItem): Source file to be copied.
91
+ - `remote_folder_destination` (DriveItem): Destination folder.
92
+
93
+ Returns:
94
+ - `ClientResult`: Result of the copy operation.
95
+
96
+ Raises:
97
+ - `Exception`: If source is not a file or destination is not a folder.
98
+
99
+ Example Usage:
100
+ ```python
101
+ from marvin.sharepoint.mr_sharepoint import *
102
+
103
+ result = copy_file(find_item_in_my_onedrive('/path/to/file.txt'), find_item_in_my_onedrive('/path/to/destination/folder'))
104
+ ```
105
+ """
106
+ if not remote_file_source.is_file:
107
+ raise Exception(f"'{remote_file_source}' is not a file.")
108
+ if not remote_folder_destination.is_folder:
109
+ raise Exception(f"'{remote_folder_destination}' is not a folder.")
110
+
111
+ destination_reference = {'id': remote_folder_destination.id}
112
+ # return remote_file_source.copy(parent=remote_folder_destination, conflict_behavior=ConflictBehavior.Rename).execute_query()
113
+ return remote_file_source.copy(name=remote_file_source.name, parent=destination_reference, conflict_behavior=ConflictBehavior.Replace).execute_query()
114
+
115
+ def assert_file_exists(folder: DriveItem, file_name: str) -> DriveItem:
116
+ """
117
+ Assert that a file exists in a specified folder in the Microsoft Cloud.
118
+
119
+ Returns:
120
+ - `DriveItem`: The existing or created file.
121
+ """
122
+ try:
123
+ drive_item = find_item_by_rel_path(folder, file_name)
124
+ if drive_item is not None:
125
+ if drive_item.is_file:
126
+ return drive_item
127
+ else:
128
+ raise Exception(f"'{drive_item}' is a folder.")
129
+ except:
130
+ # ignore exception
131
+ ...
132
+ create_file(folder, file_name, b'')
133
+ return find_item_by_rel_path(folder, file_name)
134
+
135
+
136
+ def create_file(folder: DriveItem, file_name: str, content):
137
+ """
138
+ Create a new file in a specified folder in the Microsoft Cloud.
139
+
140
+ Parameters:
141
+ - `folder` (DriveItem): Destination folder.
142
+ - `file_name` (str): Name of the new file.
143
+ - `content`: (Binary )Content to be written to the file.
144
+
145
+ Example Usage:
146
+ ```python
147
+ from marvin.sharepoint.mr_sharepoint import *
148
+
149
+ create_file(find_item_in_my_onedrive('/path/to/folder'), "new_file.txt", "Hello, World!")
150
+ ```
151
+ """
152
+ logging.debug(f"create file: {file_name} in folder: {folder.web_url}")
153
+
154
+ # create local tempfile and upload it to the folder
155
+ with tempfile.TemporaryDirectory() as tmp_dir:
156
+ try:
157
+ tmp_file = Path(tmp_dir) / file_name
158
+ with open(tmp_file, 'wb') as f:
159
+ f.write(content)
160
+ upload_file_silent(tmp_file, folder)
161
+ finally:
162
+ shutil.rmtree(tmp_dir)
163
+
164
+ print("file created: " + file_name)
165
+
166
+
167
+ def clear_folder(drive_item: DriveItem):
168
+ """
169
+ Clear a folder in the Microsoft Cloud.
170
+
171
+ Parameters:
172
+ - `drive_item` (DriveItem): The folder to be cleared.
173
+
174
+ Example Usage:
175
+ ```python
176
+ from marvin.sharepoint.mr_sharepoint import *
177
+
178
+ clear_folder(find_item_in_my_onedrive('/path/to/folder'))
179
+ ```
180
+ """
181
+ logging.debug(f"delete folder: {drive_item.web_url}")
182
+ if drive_item.is_folder:
183
+ children = drive_item.children.get().execute_query()
184
+ for child in children:
185
+ child_clone = find_item_by_url(child.web_url) # FIXME: This is a workaround, in some cases 'download_folder(child)' and 'child.download(local_file)' throw an error: office365.runtime.client_request_exception.ClientRequestException: ('invalidRequest', 'Invalid request', '400 Client Error: Bad Request for url: https://graph.microsoft.com/v1.0/shares/u!aH...hZA=/driveItem/01TPTZXJTBYIK5VLBPPFHYQGIOXWHHOCQC/children')
186
+ child_clone.delete_object().execute_query()
187
+ else:
188
+ raise Exception(f"'{drive_item}' is not a folder.")
189
+
190
+
191
+ def delete_folder(drive_item: DriveItem):
192
+ """
193
+ Delete a folder from the Microsoft Cloud.
194
+
195
+ Parameters:
196
+ - `drive_item` (DriveItem): The folder to be deleted.
197
+
198
+ Example Usage:
199
+ ```python
200
+ from marvin.sharepoint.mr_sharepoint import *
201
+
202
+ delete_folder(find_item_in_my_onedrive('/path/to/folder'))
203
+ ```
204
+ """
205
+ logging.debug(f"delete folder: {drive_item.web_url}")
206
+ if drive_item.is_folder:
207
+ resp = drive_item.delete_object().execute_query()
208
+ else:
209
+ raise Exception(f"'{drive_item}' is not a folder.")
210
+
211
+
212
+ def delete_file(drive_item: DriveItem):
213
+ """
214
+ Delete a file from the Microsoft Cloud.
215
+
216
+ Parameters:
217
+ - `drive_item` (DriveItem): The file to be deleted.
218
+
219
+ Example Usage:
220
+ ```python
221
+ from marvin.sharepoint.mr_sharepoint import *
222
+
223
+ delete_file(find_item_in_my_onedrive('/path/to/file'))
224
+ ```
225
+ """
226
+ logging.debug(f"delete file: {drive_item.web_url}")
227
+ if drive_item.is_file:
228
+ resp = drive_item.delete_object().execute_query()
229
+ else:
230
+ raise Exception(f"'{drive_item}' is not a file.")
231
+
232
+ def create_text_file(folder: DriveItem, file_name: str, content: str, encoding='utf-8'):
233
+ """
234
+ Create a new text file in a specified folder in the Microsoft Cloud.
235
+
236
+ Parameters:
237
+ - `folder` (DriveItem): Destination folder.
238
+ - `file_name` (str): Name of the new file.
239
+ - `content` (str): Text content to be written to the file.
240
+ - `encoding` (str): Encoding for the text content (default: 'utf-8').
241
+
242
+ Example Usage:
243
+ ```python
244
+ from marvin.sharepoint.mr_sharepoint import *
245
+
246
+ create_text_file(find_item_in_my_onedrive('/path/to/folder'), "new_text_file.txt", "Hello, World!")
247
+ ```
248
+ """
249
+ create_file(folder, file_name, bytes(content, encoding=encoding))
250
+
251
+
252
+ def create_folder(name_folder: str, remote_parent_folder: DriveItem) -> DriveItem:
253
+ """
254
+ Create a new folder in a specified parent folder in the Microsoft Cloud.
255
+
256
+ Parameters:
257
+ - `name_folder` (str): Name of the new folder.
258
+ - `remote_parent_folder` (DriveItem): Parent folder.
259
+
260
+ Returns:
261
+ - `DriveItem`: Newly created folder.
262
+
263
+ Example Usage:
264
+ ```python
265
+ from marvin.sharepoint.mr_sharepoint import *
266
+
267
+ # create a new folder "NewFolder" in root of you OneDrive.
268
+ new_folder = create_folder("NewFolder", find_item_in_my_onedrive('/'))
269
+ ```
270
+ """
271
+ return remote_parent_folder.create_folder(name_folder, conflict_behavior=ConflictBehavior.Replace).execute_query()
272
+
273
+
274
+ def download_file_silent(remote_file: DriveItem, local_folder, chunk_size=1024 * 1024) -> Path:
275
+ """
276
+ Download a file from Microsoft Cloud to a local folder silently (without progress call back).
277
+
278
+ Parameters:
279
+ - `remote_file` (DriveItem): Source file to be downloaded.
280
+ - `local_folder` (str): Local folder to save the downloaded file.
281
+ - `chunk_size` (int): Size of each download chunk in bytes (default: 1 MB).
282
+
283
+ Returns:
284
+ - `Path`: Local path of the downloaded file.
285
+
286
+ Example Usage:
287
+ ```python
288
+ from marvin.sharepoint.mr_sharepoint import *
289
+
290
+ local_path = download_file_silent(find_item_in_my_onedrive('/path/to/one_drive/file.xlsx'), '/your/local/folder')
291
+ ```
292
+ """
293
+ def _empty_call_back(arg):
294
+ pass
295
+ return download_file(remote_file, local_folder, chunk_size=chunk_size, progress_call_back=_empty_call_back)
296
+
297
+
298
+ def download_file(remote_file: DriveItem, local_folder, chunk_size=1024 * 1024, progress_call_back: Callable = None) -> Path:
299
+ """
300
+ Download a file from Microsoft Cloud to a local folder with the option to track the download progress.
301
+
302
+ Parameters:
303
+ - `remote_file` (DriveItem): Source file to be downloaded.
304
+ - `local_folder` (str): Local folder to save the downloaded file.
305
+ - `chunk_size` (int): Size of each download chunk in bytes (default: 1 MB).
306
+ - `progress_callback` (Callable): Callback function to track download progress. (default: A logging.info() call back)
307
+
308
+ Returns:
309
+ - `Path`: Local path of the downloaded file.
310
+
311
+ Example Usage:
312
+ ```python
313
+ from marvin.sharepoint.mr_sharepoint import *
314
+
315
+ local_path = download_file_silent(find_item_in_my_onedrive('/path/to/one_drive/file.xlsx'), '/your/local/folder')
316
+ ```
317
+ """
318
+ path_local_folder = _to_path(local_folder)
319
+ if not path_local_folder.exists():
320
+ raise Exception(f"local_folder {local_folder} does not exist.")
321
+ if not path_local_folder.is_dir():
322
+ raise Exception(f"local_folder {local_folder} is not a folder.")
323
+
324
+ file_name = remote_file.name
325
+ file_size = remote_file.properties.get('size', None)
326
+
327
+ def _print_progress(x):
328
+ if file_size is None:
329
+ logging.info(f"{x} bytes of file '{file_name}' is downloaded to '{local_folder}'")
330
+ else:
331
+ logging.info(f"{((100.0 * x) / file_size):.1f}% of file '{file_name}' is downloaded to '{local_folder}'")
332
+
333
+ chunk_downloaded_callback = progress_call_back if progress_call_back is not None else _print_progress
334
+
335
+ local_path = Path(local_folder, file_name)
336
+ with open(local_path, "wb") as local_file:
337
+ remote_file.download_session(
338
+ local_file, chunk_downloaded_callback, chunk_size=chunk_size
339
+ ).execute_query()
340
+ return local_path
341
+
342
+
343
+ def download_folder(remote_parent_folder: DriveItem, local_folder, chunk_size=1024 * 1024) -> Path:
344
+ """
345
+ Download the contents of a remote folder in the Microsoft Cloud to a local folder.
346
+ """
347
+
348
+ # Convert local_folder to a Path object if it's not already
349
+ local_folder = Path(local_folder) if not isinstance(local_folder, Path) else local_folder
350
+
351
+ # Create the local folder if it doesn't exist
352
+ local_folder.mkdir(parents=True, exist_ok=True)
353
+
354
+ # Get the children (files and folders) of the remote parent folder
355
+ children = remote_parent_folder.children.get().execute_query()
356
+
357
+ for child in children:
358
+ # Construct the local path for this child
359
+ local_child_path = local_folder / child.name
360
+ child_clone = find_item_by_url(child.web_url) # FIXME: This is a workaround, in some cases 'download_folder(child)' and 'child.download(local_file)' throw an error: office365.runtime.client_request_exception.ClientRequestException: ('invalidRequest', 'Invalid request', '400 Client Error: Bad Request for url: https://graph.microsoft.com/v1.0/shares/u!aH...hZA=/driveItem/01TPTZXJTBYIK5VLBPPFHYQGIOXWHHOCQC/children')
361
+
362
+ if child_clone.is_folder:
363
+ # If the child is a folder, recursively download it
364
+
365
+ download_folder(child_clone, local_child_path, chunk_size)
366
+ else:
367
+ # If the child is a file, download it
368
+ with open(local_child_path, "wb") as local_file:
369
+ child_clone.download(local_file).execute_query()
370
+
371
+ return local_folder
372
+
373
+ def upload_file_silent(local_file, remote_folder: DriveItem, chunk_size=1024 * 1024) -> DriveItem:
374
+ """
375
+ Upload a file to the Microsoft Cloud silently (without progress call back).
376
+
377
+ Parameters:
378
+ - `local_file`: Local file to be uploaded.
379
+ - `remote_folder` (DriveItem): Destination folder on OneDrive.
380
+ - `chunk_size` (int): Size of each upload chunk in bytes (default: 1 MB).
381
+
382
+ Returns:
383
+ - `DriveItem`: Location of the upload file in the Cloud.
384
+
385
+ Example Usage:
386
+ ```python
387
+ from marvin.sharepoint.mr_sharepoint import *
388
+ from pathlib import Path
389
+
390
+ upload_result = upload_file_silent(Path('rel/to/local_file.txt'), find_item_by_url('https://vitoresearch.sharepoint.com/:w:/r/sites/unit-rma/Shared%20Documents/RMA-IT/Temporary/'))
391
+ ```
392
+ """
393
+ def _empty_call_back(arg):
394
+ pass
395
+ return upload_file(local_file, remote_folder, chunk_size=chunk_size, progress_call_back=_empty_call_back)
396
+
397
+
398
+ def upload_file(local_file, remote_folder: DriveItem, chunk_size=1024 * 1024, progress_call_back: Callable = None) -> DriveItem:
399
+ """
400
+ Upload a file to the Microsoft Cloud (with progress call back).
401
+
402
+ Parameters:
403
+ - `local_file`: Local file to be uploaded.
404
+ - `remote_folder` (DriveItem): Destination folder on OneDrive.
405
+ - `chunk_size` (int): Size of each upload chunk in bytes (default: 1 MB).
406
+ - `progress_callback` (Callable): Callback function to track the upload progress. (default: A logging.info() call back)
407
+
408
+ Returns:
409
+ - `DriveItem`: Location of the upload file in the Cloud.
410
+
411
+ Example Usage:
412
+ ```python
413
+ from marvin.sharepoint.mr_sharepoint import *
414
+ from pathlib import Path
415
+
416
+ upload_result = upload_file(Path('rel/to/local_file.txt'), find_item_by_url('https://vitoresearch.sharepoint.com/:w:/r/sites/unit-rma/Shared%20Documents/RMA-IT/Temporary/'))
417
+ ```
418
+ """
419
+ logging.debug(f"upload_file() local_file: {local_file} remote_folder: {remote_folder.web_url}")
420
+ path_local_file = _to_path(local_file)
421
+ if not path_local_file.exists():
422
+ raise Exception(f"local_file {local_file} does not exist.")
423
+ if not path_local_file.is_file():
424
+ raise Exception(f"local_file {local_file} is not a file.")
425
+
426
+ file_name = path_local_file.name
427
+ file_size = path_local_file.stat().st_size
428
+ chunk_uploaded_callback = progress_call_back if progress_call_back is not None \
429
+ else lambda x: logging.info(f"{((100.0 * x) / file_size):.1f}% of file '{file_name}' is uploaded to '{remote_folder.resource_path}'")
430
+
431
+ remote_file = (
432
+ remote_folder.resumable_upload(
433
+ str(path_local_file), chunk_size=chunk_size, chunk_uploaded=chunk_uploaded_callback
434
+ )
435
+ .get()
436
+ .execute_query()
437
+ )
438
+ return remote_file
439
+
440
+
441
+ def upload_folder(local_folder, remote_parent_folder: DriveItem):
442
+ """
443
+ Upload the contents of a local folder to a remote folder in the Microsoft Cloud.
444
+
445
+ Parameters:
446
+ - `local_folder`: Local folder to be uploaded.
447
+ - `remote_parent_folder` (DriveItem): Destination parent folder in the Microsoft Cloud.
448
+
449
+ Raises:
450
+ - `Exception`: If local_folder does not exist or is not a folder.
451
+
452
+ Example Usage:
453
+ ```python
454
+ from marvin.sharepoint.mr_sharepoint import *
455
+ from pathlib import Path
456
+
457
+ upload_folder(Path('/to/local/folder'), find_item_in_my_onedrive('/path/to/destination/folder'))
458
+ ```
459
+ """
460
+ path_local_folder = _to_path(local_folder)
461
+
462
+ if not path_local_folder.exists():
463
+ raise Exception(f"local_folder {local_folder} does not exist.")
464
+ if not path_local_folder.is_dir():
465
+ raise Exception(f"local_folder {local_folder} is not a folder.")
466
+
467
+ remote_folder = create_folder(path_local_folder.name, remote_parent_folder)
468
+
469
+ for path in path_local_folder.iterdir():
470
+ if path.is_file():
471
+ upload_file(path, remote_folder)
472
+ elif path.is_dir():
473
+ upload_folder(path, remote_folder)
474
+
475
+
476
+ def find_item_by_url(shared_url: str, graph_client: GraphClient = None) -> DriveItem:
477
+ """
478
+ Retrieve a DriveItem in the Microsoft Cloud using a shared URL.
479
+
480
+ Parameters:
481
+ - `shared_url` (str): The shared URL of the file on OneDrive.
482
+ - `graph_client` (GraphClient, optional): The GraphClient instance. Defaults to None. In this case the default GraphClient that was created during the init_graph() call will be used.
483
+
484
+ Returns:
485
+ - `DriveItem`: The DriveItem corresponding to the shared URL.
486
+
487
+ Example Usage:
488
+ ```python
489
+ from marvin.sharepoint.mr_sharepoint import *
490
+
491
+ item = find_item_by_url("https://example.sharepoint.com/:x:/r/sites/site_name/file_path")
492
+ ```
493
+ """
494
+ return _get_client(graph_client).shares.by_url(shared_url).drive_item.get().execute_query()
495
+
496
+
497
+ def find_item_in_site(name_site: str, rel_path: str, sharepoint_host: str = None, graph_client: GraphClient = None) -> DriveItem:
498
+ """
499
+ Retrieve a DriveItem within a SharePoint site.
500
+
501
+ Parameters:
502
+ - `name_site` (str): The name of the SharePoint site.
503
+ - `rel_path` (str): The relative path of the file within the site.
504
+ - `sharepoint_host` (str, optional): The SharePoint host URL (default: None). In this case the 'sharepoint_host' that was configured during the init_graph() call will be used.
505
+ - `graph_client` (GraphClient, optional): The GraphClient instance. Defaults to None. In this case the default GraphClient that was created during the init_graph() call will be used.
506
+
507
+ Returns:
508
+ - `DriveItem`: The DriveItem corresponding to the specified site and path.
509
+
510
+ Example Usage:
511
+ ```python
512
+ from marvin.sharepoint.mr_sharepoint import *
513
+
514
+ item = find_item_in_site('unit-rma', 'RMA-IT/Temporary/mr_sharepoint')
515
+ ```
516
+ """
517
+ host = sharepoint_host if sharepoint_host is not None else _SHAREPOINT_HOST
518
+ site_url = f'{host}:/sites/{name_site}:'
519
+ site = _get_client(graph_client).sites[site_url].get().execute_query()
520
+ site_root = _get_client(graph_client).sites[site.id].drive.root
521
+ return find_item_by_rel_path(site_root, rel_path)
522
+
523
+
524
+ def find_item_in_my_onedrive(rel_path: str, graph_client: GraphClient = None) -> DriveItem:
525
+ """
526
+ Retrieve a DriveItem within the user's OneDrive using its relative path.
527
+
528
+ Parameters:
529
+ - `rel_path` (str): The relative path of the file within the OneDrive.
530
+ - `graph_client` (GraphClient, optional): The GraphClient instance. Defaults to None. In this case the default GraphClient that was created during the init_graph() call will be used.
531
+
532
+ Returns:
533
+ - `DriveItem`: The DriveItem corresponding to the specified relative path.
534
+
535
+ Example Usage:
536
+ ```python
537
+ from marvin.sharepoint.mr_sharepoint import *
538
+
539
+ item = find_item_in_my_onedrive('/path/to/file.txt')
540
+ ```
541
+
542
+ """
543
+ onedrive_root = _get_client(graph_client).me.drive.root
544
+ return find_item_by_rel_path(onedrive_root, rel_path)
545
+
546
+
547
+ def find_item_by_rel_path(root: DriveItem, rel_path: str) -> DriveItem:
548
+ """
549
+ Retrieve a DriveItem based on its relative path from another specified root DriveItem.
550
+
551
+ Parameters:
552
+ - `root` (DriveItem): The root DriveItem to start the search.
553
+ - `rel_path` (str): The relative path of the file within the root.
554
+
555
+ Returns:
556
+ - `DriveItem`: The DriveItem corresponding to the specified relative path.
557
+
558
+ Example Usage:
559
+ ```python
560
+ from marvin.sharepoint.mr_sharepoint import *
561
+
562
+ root = find_item_in_my_onedrive('/path')
563
+ item = find_item_by_rel_path(root, 'to/file.txt')
564
+ ```
565
+ """
566
+ return root.get_by_path(rel_path).get().execute_query()
567
+
568
+
569
+ def _get_client(graph_client: GraphClient) -> GraphClient:
570
+ return graph_client if graph_client is not None else _GRAPH_CLIENT
571
+
572
+
573
+ def _to_path(path) -> Path:
574
+ if isinstance(path, Path):
575
+ return path
576
+ return Path(path)
577
+
578
+
579
+ def _create_graph_client(account: str, graph_url: str, cfg_msal: dict) -> GraphClient:
580
+ """
581
+ :param account: The SharePoint username (name.familyname@vito.be)
582
+ :param cfg_msal: should contain 'client_id', 'authority'
583
+ optional: 'token_cache', 'auth_method', 'scopes'
584
+ :return: A GraphClient instance
585
+ """
586
+ client_id = cfg_msal['client_id']
587
+ authority = cfg_msal['authority']
588
+
589
+ token_cache = cfg_msal.get('token_cache', Path(Path.home(), 'msal_token_cache.bin'))
590
+ auth_method = cfg_msal.get('auth_method', 'device') # 'interactive' or 'device'
591
+
592
+ scopes = cfg_msal.get('scopes', None)
593
+ if scopes is None:
594
+ scopes = [graph_url + permission for permission in ('Sites.Read.All', 'User.Read', 'AllSites.Read', 'MyFiles.Read', 'MyFiles.Write')]
595
+
596
+ app: ClientApplication = msal.PublicClientApplication(client_id=client_id, authority=authority, token_cache=_get_token_cache(token_cache))
597
+ client = GraphClient(lambda: _get_token(app, account, auth_method, scopes))
598
+ return client
599
+
600
+
601
+ def _get_token(app, account, method, scopes):
602
+ cached_accounts = app.get_accounts()
603
+ token = None
604
+ if cached_accounts:
605
+ logging.debug('Try using token from cache')
606
+ token = app.acquire_token_silent(account=cached_accounts[0], scopes=scopes)
607
+ if token is None: # either no cached account, or token refresh failed
608
+ if method == 'interactive':
609
+ token = app.acquire_token_interactive(scopes=scopes, login_hint=account)
610
+ else:
611
+ flow = app.initiate_device_flow(scopes=scopes)
612
+ if 'user_code' not in flow:
613
+ raise ValueError(f'Failed to create device flow. Err: {json.dumps(flow, indent=4)}')
614
+ print(flow['message'])
615
+ sys.stdout.flush()
616
+ input('Press enter after signing in from other device/browser to proceed, CTRL+C to abort.')
617
+ token = app.acquire_token_by_device_flow(flow)
618
+ logging.debug('Acquired new token: %s', token)
619
+ return token
620
+
621
+
622
+ def _get_token_cache(location):
623
+ persistence = _build_persistence(location)
624
+ logging.debug('Type of persistence: %s\nPersistence encrypted? %s',
625
+ persistence.__class__.__name__, persistence.is_encrypted)
626
+ return PersistedTokenCache(persistence)
627
+
628
+ def _build_persistence(location, fallback_to_plaintext=False):
629
+ """Build a suitable persistence instance based your current OS"""
630
+ # Note: This sample stores both encrypted persistence and plaintext persistence
631
+ # into same location, therefore their data would likely override with each other.
632
+ try:
633
+ return build_encrypted_persistence(location)
634
+ except: # pylint: disable=bare-except
635
+ # On Linux, encryption exception will be raised during initialization.
636
+ # On Windows and macOS, they won't be detected here,
637
+ # but will be raised during their load() or save().
638
+ if not fallback_to_plaintext:
639
+ raise
640
+ logging.warning("Encryption unavailable. Opting in to plain text.")
641
+ return FilePersistence(location)
642
+
643
+
644
+ def _recursive_merge(dct1, dct2):
645
+ merged = dct1.copy()
646
+ for key, value in dct2.items():
647
+ if key in merged:
648
+ if value is None or isinstance(value, (bool, float, int, str)):
649
+ # Use value from dct2 for scalar types
650
+ merged[key] = value
651
+ elif isinstance(value, list):
652
+ # Merge lists without duplicates
653
+ merged[key] = list(set(merged[key] + value))
654
+ else:
655
+ # Recursive merge for nested dictionaries
656
+ merged[key] = _recursive_merge(merged[key], value)
657
+ else:
658
+ # Key not in dct1, add it to the merged dictionary
659
+ merged[key] = value
660
+ return merged
661
+
662
+
663
+ def _default_settings() -> dict:
664
+ return _read_ini(Path(Path(__file__).parent, 'default_config.ini'))
665
+
666
+
667
+ def _read_ini(ini_file, default_section='default') -> dict:
668
+ config_dict = {}
669
+ config = configparser.ConfigParser(allow_no_value=True)
670
+ config.read(ini_file)
671
+ for section in config.sections():
672
+ config_dict[section] = {}
673
+ for option in config.options(section):
674
+ val = config.get(section, option).strip()
675
+ if _is_json_list(val):
676
+ config_dict[section][option] = _convert_to_list(val)
677
+ else:
678
+ config_dict[section][option] = val
679
+
680
+ dflt_dict = config_dict.get(default_section, None)
681
+ if isinstance(dflt_dict, dict):
682
+ for key, val in dflt_dict.items():
683
+ if key not in config_dict:
684
+ config_dict[key] = val
685
+ else:
686
+ raise Exception(f'Key {key} in default section is not unique.')
687
+ del config_dict[default_section]
688
+ return config_dict
689
+
690
+ def _is_json_list(input_string):
691
+ try:
692
+ json_list = json.loads(input_string)
693
+ return isinstance(json_list, list)
694
+ except json.JSONDecodeError:
695
+ return False
696
+
697
+ def _convert_to_list(input_string):
698
+ try:
699
+ return json.loads(input_string)
700
+ except json.JSONDecodeError:
701
+ raise ValueError("Input string is not a valid JSON")
702
+
703
+
704
+
705
+ class _DriveItemWrapper:
706
+
707
+ def __init__(self, _drive_item: DriveItem):
708
+ self.drive_item = _drive_item
709
+
710
+ def file_size(self) -> int:
711
+ return self.drive_item.properties.get('size', None)
712
+
713
+ def download_range(self, io_object: IO, start: int, end: int, chunk_size=1024 * 1024):
714
+ """
715
+ Download a specific byte range from start to end.
716
+
717
+ :type io_object: typing.IO
718
+ :param int start: The start byte.
719
+ :param int end: The end byte.
720
+ :param int chunk_size: The number of bytes it should read into memory.
721
+ """
722
+ # print("download_range() start: ", start, " end: ", end)
723
+
724
+ def _construct_request(request):
725
+ # type: (RequestOptions) -> None
726
+ request.stream = True
727
+ request.headers['Range'] = f'bytes={start}-{end}'
728
+
729
+ def _process_response(response):
730
+ # type: (requests.Response) -> None
731
+ bytes_read = 0
732
+ for chunk in response.iter_content(chunk_size=chunk_size):
733
+ bytes_read += len(chunk)
734
+ io_object.write(chunk)
735
+
736
+ self.drive_item.get_content().before_execute(_construct_request)
737
+ self.drive_item.context.after_execute(_process_response)
738
+ return self
739
+
740
+ def upload_range(self, io_object: IO, start: int, end: int, chunk_size=1024 * 1024):
741
+ """
742
+ TODO: Fix me
743
+ Upload a specific byte range from start to end.
744
+
745
+ :type io_object: typing.IO
746
+ :param int start: The start byte.
747
+ :param int end: The end byte.
748
+ :param int chunk_size: The number of bytes it should read into memory.
749
+ """
750
+
751
+ def _create_request(request):
752
+ request.url += "?@microsoft.graph.conflictBehavior={0}".format('replace')
753
+
754
+ def create_upload_session(item):
755
+ qry = UploadSessionQuery(self.drive_item, {"item": item})
756
+ self.drive_item.context.add_query(qry)
757
+ # .before_query_execute(_create_request)
758
+ return qry.return_type
759
+
760
+ fsi = FileSystemInfo()
761
+ props = DriveItemUploadableProperties(fsi, name="your_filename.ext")
762
+
763
+ props = DriveItemUploadableProperties()
764
+
765
+ upload_session = create_upload_session(props).execute_query()
766
+
767
+ def upload_chunk(upload_url, byte_range, data):
768
+ headers = {
769
+ # 'Content-Disposition': f'attachment; filename=""your_filename.ext"',
770
+ 'Content-Length': str(len(data)),
771
+ 'Content-Range': f'bytes {byte_range[0]}-{byte_range[1]}/{self.file_size()}'
772
+ }
773
+ response = requests.put(upload_url, headers=headers, data=data)
774
+ response.raise_for_status()
775
+
776
+ # Read and upload the byte range
777
+ io_object.seek(start)
778
+ while start <= end:
779
+ bytes_to_read = min(chunk_size, end - start + 1)
780
+ chunk = io_object.read(bytes_to_read)
781
+ if not chunk:
782
+ break
783
+
784
+ upload_chunk(upload_session.value.uploadUrl, (start, start + len(chunk) - 1), chunk)
785
+ start += len(chunk)
786
+ # Finish the upload session
787
+ upload_session.finish_upload(start, end).execute_query()
788
+ return self
789
+
790
+
791
+ class Location:
792
+ """
793
+ Represents the location of a SPIO path.
794
+
795
+ Attributes:
796
+ MY_DRIVE (str): The path refers to a location in your OneDrive.
797
+ SHARED (str): The path is a shared link.
798
+
799
+ Examples:
800
+ ```python
801
+ # To use the path for your OneDrive:
802
+ spio = SPIO('/path/to/SampleData.csv', Location.MY_DRIVE)
803
+
804
+ # To use a shared link path:
805
+ spio = SPIO('https://vitoresearch.sharepoint.com/:x:/r/sites/unit-rma/Shared%20Documents/RMA-IT/Temporary/mr_sharepoint/SampleData/SampleData.xlsx', Location.SHARED)
806
+ # or (because SHARED is the default location)
807
+ spio = SPIO('https://vitoresearch.sharepoint.com/:x:/r/sites/unit-rma/Shared%20Documents/RMA-IT/Temporary/mr_sharepoint/SampleData/SampleData.xlsx')
808
+ ```
809
+ """
810
+
811
+ def __init__(self):
812
+ pass
813
+
814
+ SHARED = "#shared_url#"
815
+ MY_DRIVE = "#my_drive#"
816
+
817
+
818
+ class SPIO(io.IOBase):
819
+ """
820
+ A class that wraps a SharePoint URL into a BytesIO object.
821
+
822
+ Args:
823
+ path (str): The SharePoint URL.
824
+ location (str, optional): The location of the path (MY_DRIVE or SHARED). Defaults to Location.SHARED.
825
+ gc (GraphClient, optional): The GraphClient instance. Defaults to None. In this case the default GraphClient that was created during the init_graph() call will be used.
826
+
827
+ Examples:
828
+ ```python
829
+ # Create an SPIO object for a shared link
830
+ spio = SPIO('https://vitoresearch.sharepoint.com/:x:/r/sites/unit-rma/Shared%20Documents/RMA-IT/Temporary/mr_sharepoint/SampleData/SampleData.xlsx')
831
+
832
+ # Create an SPIO object for a OneDrive location
833
+ spio = SPIO('/path/to/SampleData.csv', Location.MY_DRIVE)
834
+
835
+ # writing to a SPIO object
836
+ with SPIO('path/to/hi.txt', Location.MY_DRIVE) as file:
837
+ file.write_line('Hello, SPIO!\n')
838
+ file.write_line('This is a sample text.')
839
+
840
+ # with pandas, you can directly read from SPIO object into pd.DataFrame
841
+ df = pd.read_csv(SPIO('https://vitoresearch.sharepoint.com/:x:/r/sites/unit-rma/Shared%20Documents/RMA-IT/Temporary/mr_sharepoint/SampleData/SampleData.csv'), sep=';')
842
+
843
+ # and write a pd.DataFrame to a SPIO object
844
+ df.to_excel(SPIO('/path/to/SampleData.xlsx', Location.MY_DRIVE))
845
+ ```
846
+ """
847
+
848
+ _NO_DRIVE_ITEM: Final = object()
849
+
850
+ DEFAULT_CHUNK_SIZE: Final = 8 * 1024 * 1024
851
+
852
+ def __init__(self, path, location: str = Location.SHARED, read_chunks=-1, mem_buffered=True, writable=True, write_on_flush=True, gc: GraphClient = None):
853
+ """
854
+ Initialize the SPIO object.
855
+
856
+ """
857
+ super().__init__()
858
+ if mem_buffered:
859
+ self._io_delegate = BytesIO()
860
+ else:
861
+ self._io_delegate = tempfile.NamedTemporaryFile(delete=True, prefix="SPIO_", suffix=".bin")
862
+
863
+ self.graph_client = gc if gc is not None else _GRAPH_CLIENT
864
+ if self.graph_client is None:
865
+ raise Exception("GRAPH_CLIENT is None. Use init_graph() first.")
866
+ self.path = path
867
+ self.location = location
868
+ self.read_chunks = read_chunks
869
+ self._writable = writable
870
+ self._download_complete = False
871
+ self._downloaded_until = 0
872
+ self._read_until = 0
873
+ self._drive_item = None
874
+ self._closed = False
875
+ self._written = False
876
+ self._delegate_exposed = False
877
+ self._flushed = 0
878
+ self._write_on_flush = write_on_flush
879
+ self._checked_writable = None
880
+ self._temp_copies: List[Path] = []
881
+
882
+ @property
883
+ def web_url(self):
884
+ drive_item = self.get_drive_item()
885
+ return drive_item.web_url if drive_item is not None else None
886
+
887
+ @property
888
+ def name(self):
889
+ drive_item = self.get_drive_item()
890
+ return drive_item.name if drive_item is not None else None
891
+
892
+ def copy_file(self, assert_fully_downloaded=True) -> Path:
893
+ """
894
+ Returns the path to a local temporary copy the file.
895
+ """
896
+ temp_dir = Path(tempfile.mkdtemp(prefix="SPIO_"))
897
+ try:
898
+ temp_file = Path(temp_dir, self.get_drive_item().name)
899
+ with open(temp_file, 'wb') as open_file:
900
+ shutil.copyfileobj(self.copy_bytes_io(assert_fully_downloaded), open_file)
901
+ finally:
902
+ self._temp_copies.append(temp_dir)
903
+ return temp_file
904
+
905
+ def copy_text(self, assert_fully_downloaded=True, encoding='utf-8') -> str:
906
+ """
907
+ Returns: A copy of the underlying io buffer to a new string
908
+ """
909
+ return self.copy_bytes(assert_fully_downloaded).decode(encoding)
910
+
911
+ def copy_bytes(self, assert_fully_downloaded=True) -> bytes:
912
+ """
913
+ Returns: A aopy of the underlying io buffer to a new BytesIO object.
914
+ """
915
+ if assert_fully_downloaded:
916
+ self._assert_fully_downloaded()
917
+ tell = self._io_delegate.tell()
918
+ self._io_delegate.seek(0)
919
+ data = self._io_delegate.read()
920
+ self._io_delegate.seek(tell)
921
+ return data
922
+
923
+ def copy_bytes_io(self, assert_fully_downloaded=True) -> io.BytesIO:
924
+ """
925
+ Returns: A aopy of the underlying io buffer to a new BytesIO object.
926
+ """
927
+ return BytesIO(self.copy_bytes(assert_fully_downloaded))
928
+
929
+ def io_delegate(self):
930
+ if not self._delegate_exposed:
931
+ self._delegate_exposed = True
932
+ self._hook_io_delegate()
933
+ return self._io_delegate
934
+
935
+ def write_to(self, dest) -> str:
936
+ """
937
+ Write the content of the SPIO object to a file.
938
+
939
+ Args:
940
+ dest: Destination
941
+
942
+ Returns:
943
+ str: The file path.
944
+ """
945
+ tell = self._io_delegate.tell()
946
+ self._io_delegate.seek(0)
947
+ # dest_name = None
948
+ try:
949
+ if hasattr(dest, 'write') and hasattr(dest, 'name'):
950
+ # shutil.copyfileobj(self._io_delegate, dest)
951
+ dest.write(self.read())
952
+ dest_name = dest.name
953
+ # dest.close()
954
+ else:
955
+ path = dest if isinstance(dest, Path) else Path(dest)
956
+ with open(path, 'wb') as f:
957
+ f.write(self.read())
958
+ dest_name = str(path)
959
+ finally:
960
+ self._io_delegate.seek(tell)
961
+ return dest_name
962
+
963
+ @property
964
+ def chunk_size(self):
965
+ return self.read_chunks if self.read_chunks > 0 else 8 * 1024 * 1024
966
+
967
+ @property
968
+ def chunked_read(self):
969
+ return self.read_chunks > 0
970
+
971
+ @override
972
+ def seek(self, *args) -> int:
973
+ # print(f"seek: {args}")
974
+ if len(args) == 2 and args[1] == io.SEEK_END:
975
+ self._assert_fully_downloaded()
976
+ return self._io_delegate.seek(*args)
977
+
978
+ @override
979
+ def read(self, *args) -> bytes:
980
+ # print(f"read: {args}")
981
+ if len(args) == 1:
982
+ size = args[0]
983
+ if size is None:
984
+ self._read_until = self.get_drive_item().properties.get('size', None)
985
+ else:
986
+ self._read_until += size
987
+
988
+ if not self.chunked_read and self._downloaded_until == 0: # download the full file from the start
989
+ self._assert_fully_downloaded()
990
+ else:
991
+ self._assert_downloaded(until=self._read_until) # download the full file piece by piece
992
+ data = self._io_delegate.read(size)
993
+ return data
994
+ else:
995
+ self._assert_fully_downloaded()
996
+ return self._io_delegate.read(*args)
997
+
998
+
999
+ @override
1000
+ def readline(self, __size: Optional[int] = -1) -> bytes:
1001
+ # Check if the size parameter is provided
1002
+ size = __size if __size is not None else -1
1003
+ line = bytearray()
1004
+
1005
+ # Keep reading until a newline character or size limit is reached
1006
+ while size != 0:
1007
+ char = self.read(1)
1008
+ if not char:
1009
+ break
1010
+ line.extend(char)
1011
+ if char == b'\n':
1012
+ if line.endswith(b'\r\n') or line.endswith(b'\n'):
1013
+ break
1014
+ elif char == b'\r':
1015
+ if size != 0:
1016
+ next_char = self.read(1)
1017
+ if next_char == b'\n':
1018
+ line.extend(next_char)
1019
+ else:
1020
+ self.seek(-1, io.SEEK_CUR) # Move the cursor back
1021
+ break
1022
+ size -= 1
1023
+ return bytes(line)
1024
+
1025
+ @override
1026
+ def readlines(self, __hint: Optional[int] = None) -> list[bytes]:
1027
+ lines = []
1028
+ total_size = 0
1029
+ size_hint = __hint if __hint is not None else -1
1030
+
1031
+ while size_hint < 0 or total_size < size_hint:
1032
+ line = self.readline()
1033
+ if not line:
1034
+ break
1035
+ lines.append(line)
1036
+ total_size += len(line)
1037
+
1038
+ return lines
1039
+
1040
+ @override
1041
+ def close(self) -> None:
1042
+ if not self.closed:
1043
+ logging.debug("close()")
1044
+ self._closed = True
1045
+ if self._written:
1046
+ self._write_to_sharepoint()
1047
+ self._io_delegate.close()
1048
+ del self._io_delegate
1049
+ for tmpdir in self._temp_copies:
1050
+ try:
1051
+ logging.debug(f"unlinking {tmpdir}")
1052
+ shutil.rmtree(tmpdir)
1053
+ except Exception as e:
1054
+ logging.error(f"Error deleting temp dir {tmpdir}: {e}")
1055
+
1056
+ @override
1057
+ def flush(self, flush_delegate=True) -> None:
1058
+ logging.debug(f"flush()")
1059
+ # TODO: only upload the data that was not written since the last flush... Fix _DriveItemWrapper.upload_range()
1060
+ if flush_delegate:
1061
+ self._io_delegate.flush()
1062
+ if self._written:
1063
+ self._write_to_sharepoint()
1064
+
1065
+ def _write_to_sharepoint(self):
1066
+ drive_item_parent, file_name = self._try_create_parent_drive_item()
1067
+ # tell = self._io_delegate.tell()
1068
+ buffer_size = self._io_delegate.seek(0, 2)
1069
+ if self._flushed != buffer_size:
1070
+ data = self.copy_bytes(assert_fully_downloaded=False)
1071
+ logging.info(f"write file to sharepoint()")
1072
+ create_file(drive_item_parent, file_name, data)
1073
+ self._drive_item_might_be_created()
1074
+ self._flushed = buffer_size
1075
+
1076
+ @property
1077
+ @override
1078
+ def closed(self) -> bool:
1079
+ return self._closed
1080
+
1081
+ @override
1082
+ def readable(self) -> bool:
1083
+ drive_item = self.get_drive_item()
1084
+ if drive_item is None:
1085
+ return False
1086
+ return True
1087
+
1088
+ @override
1089
+ def write(self, *args) -> int:
1090
+ logging.debug(f"write: {len(args)}")
1091
+ self._raise_if_not_writble()
1092
+ self._written = True
1093
+ if len(args) == 1:
1094
+ arg = args[0]
1095
+ if not isinstance(arg, bytes):
1096
+ arg = bytes(str(arg), encoding='utf-8')
1097
+ wrote = self._io_delegate.write(arg)
1098
+ logging.debug(f"write: {wrote}")
1099
+ return wrote
1100
+ return 0
1101
+
1102
+ @override
1103
+ def writelines(self, *args) -> None:
1104
+ self._raise_if_not_writble()
1105
+ self._io_delegate.writelines(*args)
1106
+ self._written = True
1107
+
1108
+ def write_line(self, line: str, encoding='utf-8', linesep=None) -> None:
1109
+ if linesep is None:
1110
+ linesep = os.linesep
1111
+ if not line.endswith(linesep):
1112
+ # If not, append it
1113
+ line += linesep
1114
+ self.write(bytes(line, encoding=encoding))
1115
+
1116
+ def write_lines(self, lines: list[str], encoding='utf-8', linesep=None) -> None:
1117
+ for line in lines:
1118
+ self.write_line(line, encoding=encoding, linesep=linesep)
1119
+ self.flush()
1120
+
1121
+ def _raise_if_not_writble(self):
1122
+ if not self.writable():
1123
+ raise Exception("The SPIO is not writable.")
1124
+
1125
+ @override
1126
+ def writable(self) -> bool:
1127
+ if self._checked_writable is None:
1128
+ self._checked_writable = self._check_writable()
1129
+ return self._checked_writable
1130
+
1131
+ def get_drive_item(self) -> Optional[DriveItem]:
1132
+ if self._drive_item is None:
1133
+ self._drive_item = self._find_drive_item()
1134
+ return None if self._drive_item == SPIO._NO_DRIVE_ITEM else self._drive_item
1135
+
1136
+ def __str__(self) -> str:
1137
+ return f"SPIO(path='{self.path}', location='{self.location}')"
1138
+
1139
+ @property
1140
+ def mode(self):
1141
+ return "w+b" if self._writable else "rb"
1142
+
1143
+ @override
1144
+ def fileno(self) -> int:
1145
+ raise Exception("fileno() is not supported.")
1146
+
1147
+
1148
+ # IO delegates
1149
+
1150
+ @override
1151
+ def isatty(self) -> bool:
1152
+ return self._io_delegate.isatty()
1153
+
1154
+ @override
1155
+ def tell(self) -> int:
1156
+ return self._io_delegate.tell()
1157
+
1158
+ @override
1159
+ def seekable(self) -> bool:
1160
+ return self._io_delegate.seekable()
1161
+
1162
+ @override
1163
+ def truncate(self, __size: Optional[int]) -> int:
1164
+ return self._io_delegate.truncate(__size)
1165
+
1166
+ def _drive_item_might_be_created(self):
1167
+ if self._drive_item == SPIO._NO_DRIVE_ITEM:
1168
+ self._drive_item = None
1169
+
1170
+ def _hook_io_delegate(self):
1171
+ native_write = self._io_delegate.write
1172
+ native_flush = self._io_delegate.flush
1173
+
1174
+ def _write_hook(*args, **kwargs):
1175
+ self._written = True
1176
+ return native_write(*args, **kwargs)
1177
+
1178
+ def _close_hook():
1179
+ if not self._closed:
1180
+ return self.close()
1181
+
1182
+ def _flush_hook():
1183
+ native_flush()
1184
+ return self.flush(flush_delegate=False)
1185
+
1186
+ self._io_delegate.write = _write_hook
1187
+ self._io_delegate.close = _close_hook
1188
+ self._io_delegate.flush = _flush_hook
1189
+
1190
+ def _check_writable(self) -> bool:
1191
+ if not self._writable:
1192
+ return False
1193
+ drive_item = self.get_drive_item()
1194
+ if drive_item is not None:
1195
+ return True
1196
+ drive_item_parent, _ = self._try_create_parent_drive_item()
1197
+ if drive_item_parent is not None:
1198
+ return True
1199
+ return False
1200
+
1201
+ def _find_drive_item(self) -> Union[DriveItem, object]:
1202
+ try:
1203
+ if self.location == Location.SHARED:
1204
+ drive_item = find_item_by_url(self.path, self.graph_client)
1205
+ elif self.location == Location.MY_DRIVE:
1206
+ drive_item = find_item_in_my_onedrive(self.path, self.graph_client)
1207
+ else:
1208
+ drive_item = find_item_in_site(_SHAREPOINT_HOST, self.path, self.location, self.graph_client)
1209
+
1210
+ return SPIO._NO_DRIVE_ITEM if drive_item is None else drive_item
1211
+ except:
1212
+ return SPIO._NO_DRIVE_ITEM
1213
+
1214
+ def _try_create_parent_drive_item(self) -> Tuple:
1215
+ try:
1216
+ parent, file_name = self._get_parent_and_name(self.path)
1217
+ if self.location == Location.SHARED:
1218
+ drive_item = find_item_by_url(parent, self.graph_client)
1219
+ elif self.location == Location.MY_DRIVE:
1220
+ drive_item = find_item_in_my_onedrive(parent, self.graph_client)
1221
+ else:
1222
+ drive_item = find_item_in_site(_SHAREPOINT_HOST, parent, self.location, self.graph_client)
1223
+ return drive_item, file_name
1224
+ except:
1225
+ return None, None
1226
+
1227
+ def _get_parent_and_name(self, url):
1228
+ url_string = str(url).replace('\\', '/')
1229
+ last_slash_index = url_string.rfind('/')
1230
+ parent_url = url_string[:last_slash_index]
1231
+ name_url = url_string[last_slash_index + 1:]
1232
+ return parent_url, name_url
1233
+
1234
+ def _assert_fully_downloaded(self):
1235
+ drive_item_file_size = self.get_drive_item().properties.get('size', None)
1236
+ self._assert_downloaded(until=drive_item_file_size)
1237
+
1238
+ def _assert_downloaded(self, raise_when_drive_item_not_found: bool = True, until: int = None):
1239
+ if self._need_download(raise_when_drive_item_not_found, until):
1240
+ drive_item = self.get_drive_item()
1241
+ if drive_item is not None:
1242
+ drive_item_wrapper = _DriveItemWrapper(drive_item)
1243
+ # self._downloaded = True
1244
+ if until is None:
1245
+ until = drive_item_wrapper.file_size()
1246
+
1247
+ if until is None:
1248
+ logging.debug(f"full download: {drive_item.web_url}")
1249
+ drive_item.download(self._io_delegate)
1250
+ self._download_complete = True
1251
+ else:
1252
+ if until < self._downloaded_until + self.chunk_size:
1253
+ until = self._downloaded_until + self.chunk_size
1254
+
1255
+ if until > drive_item_wrapper.file_size():
1256
+ until = drive_item_wrapper.file_size()
1257
+
1258
+ if until > self._downloaded_until:
1259
+ curr_pos = self.tell()
1260
+ self.seek(self._downloaded_until) # set the seek to the last downloaded position
1261
+ drive_item_wrapper.download_range(self._io_delegate, self._downloaded_until, until, chunk_size=self.chunk_size)
1262
+ self.graph_client.execute_query()
1263
+ self.seek(curr_pos)
1264
+
1265
+ self._downloaded_until = until
1266
+ self._download_complete = until == drive_item_wrapper.file_size()
1267
+ else:
1268
+ logging.debug(f"The file on SharePoint does not exist ({self.__str__()}).")
1269
+ if raise_when_drive_item_not_found:
1270
+ raise Exception(f"Cannot find a SharePoint drive item for {self.__str__()}.")
1271
+
1272
+ def _need_download(self, raise_when_drive_item_not_found: bool = True, until: int = None):
1273
+ if self._download_complete:
1274
+ return False
1275
+ if until is None:
1276
+ until = self._get_file_size(raise_when_drive_item_not_found)
1277
+ if until is None:
1278
+ return True
1279
+ return self._downloaded_until < until
1280
+
1281
+ def _get_file_size(self, raise_when_drive_item_not_found: bool):
1282
+ drive_item = self.get_drive_item()
1283
+ if drive_item is not None:
1284
+ return drive_item.properties.get('size', None)
1285
+ if raise_when_drive_item_not_found:
1286
+ raise Exception(f"Cannot find a SharePoint drive item for {self.__str__()}.")
1287
+ else:
1288
+ return 0
1289
+
1290
+ @override
1291
+ def __iter__(self) -> Iterator[bytes]:
1292
+ self.seek(0)
1293
+ return self
1294
+
1295
+ @override
1296
+ def __next__(self) -> bytes:
1297
+ line = self.readline()
1298
+ if line == b'':
1299
+ raise StopIteration
1300
+ return line
1301
+
1302
+ @override
1303
+ def __enter__(self) -> 'SPIO':
1304
+ return self
1305
+
1306
+ @override
1307
+ def __exit__(self, exc_type, exc_val, exc_tb):
1308
+ return self.close()
1309
+
1310
+ @override
1311
+ def __del__(self) -> None:
1312
+ try:
1313
+ self.close()
1314
+ except Exception as e:
1315
+ logging.error(f"Error closing SPIO object: {e}")
1316
+