feat: users

ostia27 · ostia27 · commit 5a6c33377ba9 · 2026-02-03T19:47:22.000+03:00
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2024
+Copyright (c) 2026
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # rule34scraper
 
-A fast Python API wrapper for booru-style image boards using selectolax (Lexbor engine).
+A high-performance Python API wrapper for **Rule34.xxx** (and other booru-style image boards). Built for speed and reliability, it uses `selectolax` (Lexbor engine) for lightning-fast parsing and `httpx` for both synchronous and asynchronous requests.
 
 ## Installation
 
@@ -10,90 +10,96 @@ pip install rule34scraper
 
 ## Usage
 
-### Basic Usage
+### Basic Searching
+
+Returns a list of `Post` objects from a search result page.
 
 ```python
 from rule34scraper import R34Client
 
 with R34Client() as client:
-    # Search posts by tags
-    posts, tags = client.get_posts(tags="landscape", page=1)
+    # Search posts by tags (landscape, highres)
+    posts, tags = client.get_posts(tags="landscape highres", page=1)
     
     for post in posts:
-        print(f"ID: {post.id}, Score: {post.score}, Rating: {post.rating}")
-    
-    # Get post details
-    details = client.get_post_details(posts[0].id)
-    print(f"Image: {details.image_url}")
-    print(f"Size: {details.width}x{details.height}")
-    
-    # Download image
-    client.download_post(details, directory="downloads/")
+        print(f"ID: {post.id} | Score: {post.score} | Rating: {post.rating}")
 ```
 
-### Custom Base URL
+### Detailed Metadata (and Creator info)
 
-```python
-from rule34scraper import R34Client
+Search results provide basic info. For full metadata (including the **creator** name, high-res URLs, and comments), use `get_post_details`.
 
-# Use a different booru site
-client = R34Client(
-    base_url="https://example.com/index.php",
-    posts_per_page=42,
-    timeout=60.0,
-)
-
-# Custom headers
-client = R34Client(
-    base_url="https://example.com/index.php",
-    headers={"Cookie": "session=abc123"},
-)
+```python
+with R34Client() as client:
+    posts, _ = client.get_posts(tags="fantasy")
+    
+    # Get deep details for a specific post
+    details = client.get_post_details(posts[0].id)
+    
+    print(f"Post #{details.id} | Creator: {details.creator.name}")
+    print(f"Full Image: {details.image_url} ({details.width}x{details.height})")
+    print(f"Tags: {', '.join([t.name for t in details.tags[:5]])}")
 ```
 
-### Async Client
+### Async Usage
+
+Ideal for high-throughput applications.
 
 ```python
 import asyncio
 from rule34scraper import AsyncR34Client
 
 async def main():
     async with AsyncR34Client() as client:
-        posts, tags = await client.get_posts(tags="portrait", page=1)
-        details = await client.get_post_details(posts[0].id)
-        print(f"Image: {details.image_url}")
+        posts, _ = await client.get_posts(tags="portrait")
+        if posts:
+            details = await client.get_post_details(posts[0].id)
+            print(f"Async Detail Result: {details.id}")
 
 asyncio.run(main())
 ```
 
-### User Profiles
+### User Profiles & Favorites
 
 ```python
 with R34Client() as client:
     profile = client.get_user_profile("username")
-    print(f"User: {profile.username} (ID: {profile.id})")
-    print(f"Level: {profile.level}")
-    print(f"Posts: {profile.post_count}")
+    print(f"User: {profile.username} | Level: {profile.level}")
     print(f"Favorites: {profile.favorite_count}")
+    
+    # Access recent uploads or favorite posts
+    for fav in profile.recent_favorites[:5]:
+        print(f"Favorite Post: {fav.detail_url}")
 ```
 
-## Models
+### Downloading Media
+
+```python
+with R34Client() as client:
+    details = client.get_post_details(123456)
+    # Automatically handles file naming and subdirectory creation
+    filepath = client.download_post(details, directory="my_collection")
+    print(f"Saved to: {filepath}")
+```
 
-- `Post` - Thumbnail entry from search results
-- `PostDetails` - Full post metadata (image URL, dimensions, tags, comments)
-- `Tag` - Tag with name, count, and type
-- `PostComment` - User comment on a post
-- `UserProfile` - User profile with stats and recent posts
+## Configuration
 
-## Configuration Options
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `base_url` | `str` | `https://rule34.xxx` | Base domain for requests. |
+| `timeout` | `float` | `30.0` | Request timeout in seconds. |
+| `posts_per_page` | `int` | `42` | Default count for pagination. |
+| `max_retries` | `int` | `5` | Retry attempts on rate limiting. |
+| `headers` | `dict` | *Browser-like* | Custom User-Agent or Cookies. |
+
+## Models
 
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `base_url` | str | `https://rule34.xxx/index.php` | Base URL for the API |
-| `timeout` | float | 30.0 | Request timeout in seconds |
-| `posts_per_page` | int | 42 | Posts per page for pagination |
-| `headers` | dict | Browser-like headers | Custom HTTP headers |
-| `max_retries` | int | 5 | Max retries for rate limits (429) |
+- **Post**: Basic entry from listings (id, preview_url, tags, score, rating).
+- **PostDetails**: Full data from the post page (creator, image_url, sample_url, dimensions, comments).
+- **Tag**: Tag entry with `name`, `count`, and `type` (e.g., character, artist).
+- **UserProfile**: User stats, join date, and lists of recent uploads/favorites.
+- **PostComment**: Comment on a post with user, text, and timestamp.
 
 ## License
 
-MIT
+MIT - See the [LICENSE](LICENSE) file for details.
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "rule34scraper"
-version = "1.0.2"
+version = "1.0.3"
 description = "A fast Python API wrapper for rule34 image board"
 readme = "README.md"
 license = "MIT"
@@ -27,6 +27,7 @@ dependencies = [
     "httpx>=0.24.0",
     "selectolax>=0.3.0",
     "tenacity>=8.2.0",
+    "aiofiles>=23.2.0",
 ]
 
 [project.optional-dependencies]
diff --git a/rule34scraper/__init__.py b/rule34scraper/__init__.py
@@ -25,4 +25,4 @@
     "CommentParser",
     "UserProfileParser",
 ]
-__version__ = "0.1.0"
+__version__ = "1.0.3"
diff --git a/rule34scraper/client.py b/rule34scraper/client.py
@@ -5,8 +5,10 @@
 from typing import Dict, List, Optional, Tuple, Union
 
 import httpx
+import aiofiles
 from tenacity import (
-    retry,
+    Retrying,
+    AsyncRetrying,
     stop_after_attempt,
     wait_exponential,
     retry_if_exception_type,
@@ -76,18 +78,17 @@ def client(self) -> httpx.Client:
 
     def _get(self, url: str, params: Dict = None) -> httpx.Response:
         """Execute GET request with retry logic for rate limits."""
-        @retry(
+        for attempt in Retrying(
             stop=stop_after_attempt(self._max_retries),
             wait=wait_exponential(multiplier=1, min=1.0, max=60.0),
             retry=retry_if_exception_type((RateLimitError, httpx.TransportError)),
             before_sleep=before_sleep_log(logger, logging.WARNING),
             reraise=True,
-        )
-        def _request() -> httpx.Response:
-            response = self.client.get(url, params=params)
-            return _check_rate_limit(response)
+        ):
+            with attempt:
+                response = self.client.get(url, params=params)
+                return _check_rate_limit(response)
         
-        return _request()
 
     def close(self) -> None:
         """Close the HTTP client."""
@@ -237,18 +238,19 @@ def client(self) -> httpx.AsyncClient:
 
     async def _get(self, url: str, params: Dict = None) -> httpx.Response:
         """Execute GET request with retry logic for rate limits."""
-        @retry(
+        async for attempt in AsyncRetrying(
             stop=stop_after_attempt(self._max_retries),
             wait=wait_exponential(multiplier=1, min=1.0, max=60.0),
             retry=retry_if_exception_type((RateLimitError, httpx.TransportError)),
             before_sleep=before_sleep_log(logger, logging.WARNING),
             reraise=True,
-        )
-        async def _request() -> httpx.Response:
-            response = await self.client.get(url, params=params)
-            return _check_rate_limit(response)
-        
-        return await _request()
+        ):
+            with attempt:
+                response = await self.client.get(url, params=params)
+                return _check_rate_limit(response)
+                
+        # This line is theoretically unreachable due to reraise=True
+        raise RuntimeError("Unreachable code reached")
 
     async def close(self) -> None:
         """Close the HTTP client."""
@@ -314,9 +316,9 @@ async def download(
         async with httpx.AsyncClient(headers=download_headers, follow_redirects=True, timeout=60.0) as client:
             async with client.stream("GET", url) as response:
                 response.raise_for_status()
-                with open(path, "wb") as f:
+                async with aiofiles.open(path, "wb") as f:
                     async for chunk in response.aiter_bytes(chunk_size=chunk_size):
-                        f.write(chunk)
+                        await f.write(chunk)
 
         return path
 
diff --git a/rule34scraper/models.py b/rule34scraper/models.py
@@ -2,17 +2,6 @@
 from typing import List, Optional
 
 
-@dataclass
-class Post:
-    id: int
-    preview_url: str
-    tags: List[str]
-    score: int
-    rating: str
-    detail_url: str
-    is_video: bool = False
-
-
 @dataclass
 class Tag:
     name: str
@@ -29,6 +18,24 @@ class PostComment:
     timestamp: str
 
 
+
+@dataclass
+class User:
+    name: str
+    id: Optional[int] = None
+
+
+@dataclass
+class Post:
+    id: int
+    preview_url: str
+    tags: List[str]
+    score: int
+    rating: str
+    detail_url: str
+    is_video: bool = False
+
+
 @dataclass
 class PostDetails:
     id: int
@@ -38,7 +45,7 @@ class PostDetails:
     height: int
     rating: str
     score: int
-    uploader: str
+    creator: User
     posted_at: str
     source_url: Optional[str]
     tags: List[Tag]
diff --git a/rule34scraper/parser.py b/rule34scraper/parser.py
@@ -2,14 +2,14 @@
 from typing import List, Optional, Tuple
 from selectolax.lexbor import LexborHTMLParser, LexborNode
 
-from .models import Post, Tag, PostComment, PostDetails, UserProfile
+from .models import Post, Tag, PostComment, PostDetails, UserProfile, User
 
 DEFAULT_BASE_URL = "https://rule34.xxx"
 
 
 class PostParser:
-    SCORE_PATTERN = re.compile(r"score:(\d+)")
-    RATING_PATTERN = re.compile(r"rating:(\w+)")
+    SCORE_PATTERN = re.compile(r"score:(\d+)", re.IGNORECASE)
+    RATING_PATTERN = re.compile(r"rating:(\w+)", re.IGNORECASE)
 
     @classmethod
     def parse_html(cls, html: str, base_url: str = DEFAULT_BASE_URL) -> List[Post]:
@@ -40,11 +40,14 @@ def _parse_thumb(cls, thumb: LexborNode, base_url: str = DEFAULT_BASE_URL) -> Op
         preview_url = img_attrs.get("src", "")
 
         alt_text = img_attrs.get("alt", "")
-        tags = alt_text.split() if alt_text else []
+        title_text = img_attrs.get("title", "")
+        # Combine both for metadata extraction as R34 is inconsistent
+        metadata_source = f"{alt_text} {title_text}"
 
-        title = img_attrs.get("title", "")
-        score = cls._extract_score(title)
-        rating = cls._extract_rating(title)
+        tags = alt_text.split() if alt_text else []
+        
+        score = cls._extract_score(metadata_source)
+        rating = cls._extract_rating(metadata_source)
 
         href = attrs.get("href", "")
         if href.startswith("http"):
@@ -123,7 +126,8 @@ def _parse_tag_item(cls, li: LexborNode) -> Optional[Tag]:
 
 class PostDetailsParser:
     ID_PATTERN = re.compile(r"Id:\s*(\d+)")
-    POSTED_PATTERN = re.compile(r"Posted:\s*(.*?)\s*by")
+    POSTED_PATTERN = re.compile(r"Posted:\s*(.*?)(?:\s*by|$)", re.IGNORECASE)
+    UPLOADER_PATTERN = re.compile(r"by\s+(.*)$", re.IGNORECASE)
     IMAGE_JS_PATTERN = re.compile(r"image\s*=\s*(\{[^}]+\})")
     WIDTH_PATTERN = re.compile(r"['\"]?width['\"]?\s*:\s*(\d+)")
     HEIGHT_PATTERN = re.compile(r"['\"]?height['\"]?\s*:\s*(\d+)")
@@ -146,7 +150,7 @@ def parse_html(cls, html: str) -> Optional[PostDetails]:
         post_id = 0
         rating = "unknown"
         score = 0
-        uploader = ""
+        uploader_name = "unknown"
         posted_at = ""
         source_url = None
 
@@ -173,9 +177,18 @@ def parse_html(cls, html: str) -> Optional[PostDetails]:
                 elif "Posted:" in text:
                     match = cls.POSTED_PATTERN.search(text)
                     posted_at = match.group(1).strip() if match else ""
+                    
+                    # Try to get uploader from link first
                     uploader_link = li.css_first("a")
                     if uploader_link:
-                        uploader = uploader_link.text(strip=True)
+                        uploader_name = uploader_link.text(strip=True)
+                    else:
+                        # Fallback to text parsing (for Anonymous)
+                        up_match = cls.UPLOADER_PATTERN.search(text)
+                        if up_match:
+                            uploader_name = up_match.group(1).strip()
+                        elif "by " in text:
+                            uploader_name = text.split("by ")[-1].strip()
 
                 elif "Source:" in text:
                     source_link = li.css_first("a")
@@ -193,7 +206,7 @@ def parse_html(cls, html: str) -> Optional[PostDetails]:
             height=height,
             rating=rating,
             score=score,
-            uploader=uploader,
+            creator=User(name=uploader_name),
             posted_at=posted_at,
             source_url=source_url,
             tags=tags,

Original file line number	Diff line number	Diff line change
`@@ -25,4 +25,4 @@`
`25`	`25`	`"CommentParser",`
`26`	`26`	`"UserProfileParser",`
`27`	`27`	`]`
`28`		`-__version__ = "0.1.0"`
	`28`	`+__version__ = "1.0.3"`