PyPI - thordata-sdk - Versions diffs - 1.5.0__py3-none-any.whl → 1.7.0__py3-none-any.whl - Mend

thordata-sdk 1.5.0py3-none-any.whl → 1.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

thordata/__init__.py +1 -1
thordata/async_client.py +55 -13
thordata/client.py +64 -13
thordata/enums.py +2 -2
thordata/exceptions.py +80 -20
thordata/models.py +1 -1
thordata/retry.py +1 -1
thordata/tools/__init__.py +11 -1
thordata/tools/code.py +17 -4
thordata/tools/ecommerce.py +194 -10
thordata/tools/professional.py +155 -0
thordata/tools/search.py +47 -5
thordata/tools/social.py +225 -41
thordata/tools/travel.py +100 -0
thordata/tools/video.py +80 -7
thordata/types/serp.py +6 -2
thordata/types/task.py +75 -9
thordata/types/universal.py +37 -5
{thordata_sdk-1.5.0.dist-info → thordata_sdk-1.7.0.dist-info}/METADATA +63 -7
thordata_sdk-1.7.0.dist-info/RECORD +35 -0
{thordata_sdk-1.5.0.dist-info → thordata_sdk-1.7.0.dist-info}/WHEEL +1 -1
thordata/_example_utils.py +0 -77
thordata/demo.py +0 -138
thordata_sdk-1.5.0.dist-info/RECORD +0 -35
{thordata_sdk-1.5.0.dist-info → thordata_sdk-1.7.0.dist-info}/licenses/LICENSE +0 -0
{thordata_sdk-1.5.0.dist-info → thordata_sdk-1.7.0.dist-info}/top_level.txt +0 -0

thordata/tools/social.py CHANGED Viewed

@@ -12,12 +12,47 @@ from .base import ToolRequest
 class TikTok:
     @dataclass
     class Post(ToolRequest):
-        """TikTok Post Information Scraper"""
+        """TikTok Post Information Scraper by URL"""
         SPIDER_ID = "tiktok_posts_by-url"
         SPIDER_NAME = "tiktok.com"
         url: str
-        page_turning: int | None = None
+        country: str | None = None
+    @dataclass
+    class PostsByKeywords(ToolRequest):
+        """TikTok Post Information Scraper by Keywords"""
+        SPIDER_ID = "tiktok_posts_by-keywords"
+        SPIDER_NAME = "tiktok.com"
+        search_keyword: str
+        num_of_posts: int | None = None
+        posts_to_not_include: str | None = None
+        country: str | None = None
+    @dataclass
+    class PostsByProfileUrl(ToolRequest):
+        """TikTok Post Information Scraper by Profile URL"""
+        SPIDER_ID = "tiktok_posts_by-profileurl"
+        SPIDER_NAME = "tiktok.com"
+        url: str
+        start_date: str | None = None
+        end_date: str | None = None
+        num_of_posts: int | None = None
+        what_to_collect: str | None = None
+        post_type: str | None = None
+        posts_to_not_include: str | None = None
+        country: str | None = None
+    @dataclass
+    class PostsByListUrl(ToolRequest):
+        """TikTok Post Information Scraper by List URL"""
+        SPIDER_ID = "tiktok_posts_by-listurl"
+        SPIDER_NAME = "tiktok.com"
+        url: str
+        num_of_posts: int | None = None
     @dataclass
     class Comment(ToolRequest):
@@ -30,33 +65,62 @@ class TikTok:
     @dataclass
     class Profile(ToolRequest):
-        """TikTok Profile Information Scraper"""
+        """TikTok Profile Information Scraper by URL"""
         SPIDER_ID = "tiktok_profiles_by-url"
         SPIDER_NAME = "tiktok.com"
         url: str  # Profile URL (e.g. https://www.tiktok.com/@user)
-        search_url: str | None = None
+        country: str | None = None
+    @dataclass
+    class ProfilesByListUrl(ToolRequest):
+        """TikTok Profile Information Scraper by List URL"""
+        SPIDER_ID = "tiktok_profiles_by-listurl"
+        SPIDER_NAME = "tiktok.com"
+        search_url: str
         country: str | None = None
         page_turning: int | None = None
     @dataclass
     class Shop(ToolRequest):
-        """TikTok Shop Information Scraper"""
+        """TikTok Shop Information Scraper by URL"""
         SPIDER_ID = "tiktok_shop_by-url"
         SPIDER_NAME = "tiktok.com"
         url: str
-        category_url: str | None = None
-        keyword: str | None = None
+    @dataclass
+    class ShopByCategoryUrl(ToolRequest):
+        """TikTok Shop Information Scraper by Category URL"""
+        SPIDER_ID = "tiktok_shop_by-category-url"
+        SPIDER_NAME = "tiktok.com"
+        category_url: str
+    @dataclass
+    class ShopByKeywords(ToolRequest):
+        """TikTok Shop Information Scraper by Keywords"""
+        SPIDER_ID = "tiktok_shop_by-keywords"
+        SPIDER_NAME = "tiktok.com"
+        keyword: str
+        domain: str = "https://www.tiktok.com/shop"
         page_turning: int | None = None
 class Facebook:
+    @dataclass
+    class PostDetails(ToolRequest):
+        """Facebook Post Details Scraper"""
+        SPIDER_ID = "facebook_post_by-posts-url"
+        SPIDER_NAME = "facebook.com"
+        url: str
     @dataclass
     class Posts(ToolRequest):
-        """Facebook Posts Scraper"""
+        """Facebook Posts Scraper by Keywords"""
         SPIDER_ID = "facebook_post_by-keywords"
         SPIDER_NAME = "facebook.com"
@@ -66,27 +130,70 @@ class Facebook:
         number: int = 10
     @dataclass
-    class PostDetails(ToolRequest):
-        """Facebook Post Details Scraper"""
+    class EventByEventListUrl(ToolRequest):
+        """Facebook Events Scraper by Event List URL"""
-        SPIDER_ID = "facebook_post_by-posts-url"
+        SPIDER_ID = "facebook_event_by-eventlist-url"
+        SPIDER_NAME = "facebook.com"
+        url: str
+        upcoming_events_only: str | None = None
+    @dataclass
+    class EventBySearchUrl(ToolRequest):
+        """Facebook Events Scraper by Search URL"""
+        SPIDER_ID = "facebook_event_by-search-url"
+        SPIDER_NAME = "facebook.com"
+        url: str
+    @dataclass
+    class EventByEventsUrl(ToolRequest):
+        """Facebook Events Scraper by Events URL"""
+        SPIDER_ID = "facebook_event_by-events-url"
+        SPIDER_NAME = "facebook.com"
+        url: str
+    @dataclass
+    class Profile(ToolRequest):
+        """Facebook Profile Scraper"""
+        SPIDER_ID = "facebook_profile_by-profiles-url"
+        SPIDER_NAME = "facebook.com"
+        url: str
+    @dataclass
+    class Comment(ToolRequest):
+        """Facebook Post Comments Scraper"""
+        SPIDER_ID = "facebook_comment_by-comments-url"
         SPIDER_NAME = "facebook.com"
         url: str
+        get_all_replies: str | None = None
+        limit_records: str | None = None
+        comments_sort: str | None = None  # All comments
 class Instagram:
     @dataclass
     class Profile(ToolRequest):
-        """Instagram Profile Scraper"""
+        """Instagram Profile Scraper by Username"""
         SPIDER_ID = "ins_profiles_by-username"
         SPIDER_NAME = "instagram.com"
         username: str
-        profileurl: str | None = None
+    @dataclass
+    class ProfileByUrl(ToolRequest):
+        """Instagram Profile Scraper by Profile URL"""
+        SPIDER_ID = "ins_profiles_by-profileurl"
+        SPIDER_NAME = "instagram.com"
+        profileurl: str
     @dataclass
     class Post(ToolRequest):
-        """Instagram Post Information Scraper"""
+        """Instagram Post Information Scraper by Profile URL"""
         SPIDER_ID = "ins_posts_by-profileurl"
         SPIDER_NAME = "instagram.com"
@@ -96,14 +203,45 @@ class Instagram:
         end_date: str | None = None
         post_type: str | None = None  # Post or Reel
+    @dataclass
+    class PostByUrl(ToolRequest):
+        """Instagram Post Information Scraper by Post URL"""
+        SPIDER_ID = "ins_posts_by-posturl"
+        SPIDER_NAME = "instagram.com"
+        posturl: str
     @dataclass
     class Reel(ToolRequest):
-        """Instagram Reel Information Scraper"""
+        """Instagram Reel Information Scraper by URL"""
         SPIDER_ID = "ins_reel_by-url"
         SPIDER_NAME = "instagram.com"
         url: str
+    @dataclass
+    class AllReel(ToolRequest):
+        """Instagram All Reel Information Scraper by URL"""
+        SPIDER_ID = "ins_allreel_by-url"
+        SPIDER_NAME = "instagram.com"
+        url: str
+        num_of_posts: int | None = None
+        posts_to_not_include: str | None = None
+        start_date: str | None = None
+        end_date: str | None = None
+    @dataclass
+    class ReelByListUrl(ToolRequest):
+        """Instagram Reel Information Scraper by List URL"""
+        SPIDER_ID = "ins_reel_by-listurl"
+        SPIDER_NAME = "instagram.com"
+        url: str
         num_of_posts: int | None = None
+        posts_to_not_include: str | None = None
+        start_date: str | None = None
+        end_date: str | None = None
     @dataclass
     class Comment(ToolRequest):
@@ -117,30 +255,35 @@ class Instagram:
 class Twitter:
     @dataclass
     class Profile(ToolRequest):
-        """Twitter(X) Profile Scraper"""
+        """Twitter(X) Profile Scraper by Profile URL"""
-        SPIDER_ID = "twitter_profiles_by-url"
-        SPIDER_NAME = "twitter.com"
+        SPIDER_ID = "twitter_profile_by-profileurl"
+        SPIDER_NAME = "x.com"
         url: str
-        max_number_of_posts: int | None = None
-        user_name: str | None = None
     @dataclass
-    class Post(ToolRequest):
-        """
-        Twitter(X) Post Information Scraper
-        Updates based on integration snippet:
-        - SPIDER_NAME is 'x.com'
-        - Only 'url' is required.
-        """
+    class ProfileByUsername(ToolRequest):
+        """Twitter(X) Profile Scraper by Username"""
+        SPIDER_ID = "twitter_profile_by-username"
+        SPIDER_NAME = "x.com"
+        user_name: str
-        SPIDER_ID = "twitter_by-posturl_by-url"
-        SPIDER_NAME = "x.com"  # Updated from snippet
+    @dataclass
+    class Post(ToolRequest):
+        """Twitter(X) Post Information Scraper by Post URL"""
+        SPIDER_ID = "twitter_post_by-posturl"
+        SPIDER_NAME = "x.com"
         url: str  # Post URL (e.g. https://x.com/user/status/123)
-        start_date: str | None = None
-        end_date: str | None = None
+    @dataclass
+    class PostByProfileUrl(ToolRequest):
+        """Twitter(X) Post Information Scraper by Profile URL"""
+        SPIDER_ID = "twitter_post_by-profileurl"
+        SPIDER_NAME = "x.com"
+        url: str  # Profile URL
 class LinkedIn:
@@ -154,30 +297,70 @@ class LinkedIn:
     @dataclass
     class Jobs(ToolRequest):
-        """LinkedIn Job Listing Scraper"""
+        """LinkedIn Job Listing Scraper by Job Listing URL"""
         SPIDER_ID = "linkedin_job_listings_information_by-job-listing-url"
         SPIDER_NAME = "linkedin.com"
         job_listing_url: str
+        page_turning: int | None = None
+    @dataclass
+    class JobByUrl(ToolRequest):
+        """LinkedIn Job Listing Scraper by Job URL"""
+        SPIDER_ID = "linkedin_job_listings_information_by-job-url"
+        SPIDER_NAME = "linkedin.com"
+        job_url: str
+    @dataclass
+    class JobByKeyword(ToolRequest):
+        """LinkedIn Job Listing Scraper by Keyword"""
+        SPIDER_ID = "linkedin_job_listings_information_by-keyword"
+        SPIDER_NAME = "linkedin.com"
         location: str
-        job_url: str | None = None
+        keyword: str
+        time_range: str | None = None
+        experience_level: str | None = None
+        job_type: str | None = None
+        remote: str | None = None
+        company: str | None = None
+        selective_search: str | None = None
+        jobs_to_not_include: str | None = None
+        location_radius: str | None = None
         page_turning: int | None = None
-        keyword: str | None = None
-        remote: str | None = None  # On_site, Remote, Hybrid
 class Reddit:
     @dataclass
     class Posts(ToolRequest):
-        """Reddit Post Information Scraper"""
+        """Reddit Post Information Scraper by URL"""
         SPIDER_ID = "reddit_posts_by-url"
         SPIDER_NAME = "reddit.com"
         url: str
-        keyword: str | None = None
-        subreddit_url: str | None = None
+    @dataclass
+    class PostsByKeywords(ToolRequest):
+        """Reddit Post Information Scraper by Keywords"""
+        SPIDER_ID = "reddit_posts_by-keywords"
+        SPIDER_NAME = "reddit.com"
+        keyword: str
+        date: str | None = None  # All time
+        num_of_posts: int | None = None
+        sort_by: str | None = None
+    @dataclass
+    class PostsBySubredditUrl(ToolRequest):
+        """Reddit Post Information Scraper by Subreddit URL"""
+        SPIDER_ID = "reddit_posts_by-subredditurl"
+        SPIDER_NAME = "reddit.com"
+        url: str
+        sort_by: str | None = None
         num_of_posts: int | None = None
-        sort_by: str | None = None  # Relevance, Hot, Top, New
+        sort_by_time: str | None = None  # All Time
     @dataclass
     class Comment(ToolRequest):
@@ -187,4 +370,5 @@ class Reddit:
         SPIDER_NAME = "reddit.com"
         url: str
         days_back: int | None = None
-        load_all_replies: bool | None = None
+        load_all_replies: str | None = None
+        comment_limit: int | None = None

thordata/tools/travel.py ADDED Viewed

@@ -0,0 +1,100 @@
+"""
+Travel & Real Estate Scraper Tools (Booking, Zillow, Airbnb)
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from .base import ToolRequest
+class Booking:
+    """Namespace for Booking.com tools."""
+    @dataclass
+    class HotelByUrl(ToolRequest):
+        """Booking Hotel Information Scraper by URL"""
+        SPIDER_ID = "booking_hotellist_by-url"
+        SPIDER_NAME = "booking.com"
+        url: str
+class Zillow:
+    """Namespace for Zillow tools."""
+    @dataclass
+    class PriceByUrl(ToolRequest):
+        """Zillow Property Price History Information Scraper by URL"""
+        SPIDER_ID = "zillow_price_by-url"
+        SPIDER_NAME = "zillow.com"
+        url: str
+    @dataclass
+    class ProductByUrl(ToolRequest):
+        """Zillow Property Details Information Scraper by URL"""
+        SPIDER_ID = "zillow_product_by-url"
+        SPIDER_NAME = "zillow.com"
+        url: str
+    @dataclass
+    class ProductByFilter(ToolRequest):
+        """Zillow Property Details Information Scraper by Filter"""
+        SPIDER_ID = "zillow_product_by-filter"
+        SPIDER_NAME = "zillow.com"
+        keywords_location: str
+        listingCategory: str | None = None  # For Rent, For Sale
+        HomeType: str | None = None  # Houses
+        days_on_zillow: str | None = None  # Any
+        maximum: int | None = None
+    @dataclass
+    class ProductByListUrl(ToolRequest):
+        """Zillow Property Details Information Scraper by List URL"""
+        SPIDER_ID = "zillow_product_by-listurl"
+        SPIDER_NAME = "zillow.com"
+        url: str
+        maximum: int | None = None
+class Airbnb:
+    """Namespace for Airbnb tools."""
+    @dataclass
+    class ProductBySearchUrl(ToolRequest):
+        """Airbnb Properties Information Scraper by Search URL"""
+        SPIDER_ID = "airbnb_product_by-searchurl"
+        SPIDER_NAME = "airbnb.com"
+        searchurl: str
+        country: str | None = None
+    @dataclass
+    class ProductByLocation(ToolRequest):
+        """Airbnb Properties Information Scraper by Location"""
+        SPIDER_ID = "airbnb_product_by-location"
+        SPIDER_NAME = "airbnb.com"
+        location: str
+        check_in: str | None = None
+        check_out: str | None = None
+        num_of_adults: str | None = None
+        num_of_children: str | None = None
+        num_of_infants: str | None = None
+        num_of_pets: str | None = None
+        country: str | None = None
+        currency: str | None = None
+    @dataclass
+    class ProductByUrl(ToolRequest):
+        """Airbnb Properties Information Scraper by URL"""
+        SPIDER_ID = "airbnb_product_by-url"
+        SPIDER_NAME = "airbnb.com"
+        url: str
+        country: str | None = None

thordata/tools/video.py CHANGED Viewed

@@ -46,14 +46,23 @@ class YouTube:
     @dataclass
     class Profile(VideoToolRequest):
-        """YouTube Profile Scraper. Uses video_builder."""
+        """YouTube Profile Scraper by Keyword. Uses video_builder."""
         SPIDER_ID = "youtube_profiles_by-keyword"
         SPIDER_NAME = "youtube.com"
-        url: str  # Channel URL
+        keyword: str
         page_turning: int = 1
-        keyword: str | None = None
+        common_settings: CommonSettings = field(default_factory=CommonSettings)
+    @dataclass
+    class ProfileByUrl(VideoToolRequest):
+        """YouTube Profile Scraper by URL. Uses video_builder."""
+        SPIDER_ID = "youtube_profiles_by-url"
+        SPIDER_NAME = "youtube.com"
+        url: str  # Channel URL
         common_settings: CommonSettings = field(default_factory=CommonSettings)
     @dataclass
@@ -69,13 +78,77 @@ class YouTube:
         common_settings: CommonSettings = field(default_factory=CommonSettings)
     @dataclass
-    class VideoInfo(ToolRequest):
-        """YouTube Video Post Scraper (Metadata only). Standard builder."""
+    class VideoInfo(VideoToolRequest):
+        """YouTube Video Basic Information Scraper. Uses video_builder."""
+        SPIDER_ID = "youtube_product_by-id"
+        SPIDER_NAME = "youtube.com"
+        video_id: str
+        common_settings: CommonSettings = field(default_factory=CommonSettings)
+    @dataclass
+    class VideoPostByUrl(ToolRequest):
+        """YouTube Video Post Scraper by URL. Uses standard builder."""
-        # Note: This one does NOT inherit from VideoToolRequest because it uses the standard builder
-        # and doesn't support common_settings in the same way.
         SPIDER_ID = "youtube_video-post_by-url"
         SPIDER_NAME = "youtube.com"
         url: str  # Channel Video URL
+        order_by: str | None = None
+        start_index: str | None = None
         num_of_posts: str | None = None
+    @dataclass
+    class VideoPostBySearchFilters(ToolRequest):
+        """YouTube Video Post Scraper by Search Filters. Uses standard builder."""
+        SPIDER_ID = "youtube_video-post_by-search-filters"
+        SPIDER_NAME = "youtube.com"
+        keyword_search: str
+        features: str | None = None
+        type: str | None = None  # Videos
+        duration: str | None = None
+        upload_date: str | None = None
+        num_of_posts: str | None = None
+    @dataclass
+    class VideoPostByHashtag(ToolRequest):
+        """YouTube Video Post Scraper by Hashtag. Uses standard builder."""
+        SPIDER_ID = "youtube_video-post_by-hashtag"
+        SPIDER_NAME = "youtube.com"
+        hashtag: str
+        num_of_posts: str | None = None
+    @dataclass
+    class VideoPostByPodcastUrl(ToolRequest):
+        """YouTube Video Post Scraper by Podcast URL. Uses standard builder."""
+        SPIDER_ID = "youtube_video-post_by-podcast-url"
+        SPIDER_NAME = "youtube.com"
+        url: str  # Playlist URL
+        num_of_posts: str | None = None
+    @dataclass
+    class VideoPostByKeyword(ToolRequest):
+        """YouTube Video Post Scraper by Keyword. Uses standard builder."""
+        SPIDER_ID = "youtube_video-post_by-keyword"
+        SPIDER_NAME = "youtube.com"
+        keyword: str
+        num_of_posts: str | None = None
+    @dataclass
+    class VideoPostByExplore(ToolRequest):
+        """YouTube Video Post Scraper by Explore URL. Uses standard builder."""
+        SPIDER_ID = "youtube_video-post_by-explore"
+        SPIDER_NAME = "youtube.com"
+        url: str
+        all_tabs: str | None = None

thordata/types/serp.py CHANGED Viewed

@@ -117,7 +117,7 @@ class SerpRequest(ThordataBaseConfig):
     render_js: bool | None = None
     no_cache: bool | None = None
-    # Output
+    # Output format: "json" (json=1), "html" (json=3), "light_json" (json=4), or "both" (json=2)
     output_format: str = "json"
     # Advanced Google
@@ -155,13 +155,17 @@ class SerpRequest(ThordataBaseConfig):
         }
         # JSON output handling
+        # Dashboard mapping: json=1 (json), json=3 (html), json=4 (light json), json=2 (both)
         fmt = self.output_format.lower()
         if fmt == "json":
             payload["json"] = "1"
         elif fmt == "html":
-            pass  # No json param means HTML
+            payload["json"] = "3"
+        elif fmt in ("light_json", "light-json", "lightjson"):
+            payload["json"] = "4"
         elif fmt in ("2", "both", "json+html"):
             payload["json"] = "2"
+        # If no json param is set, default to HTML (legacy behavior)
         # Query param handling
         if engine == "yandex":

thordata-sdk 1.5.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

thordata-sdk 1.5.0py3-none-any.whl → 1.7.0py3-none-any.whl