internetarchive · cdrini · Dec 17, 2025 · Dec 5, 2025 · Dec 5, 2025 · Dec 5, 2025
diff --git a/openlibrary/fastapi/search.py b/openlibrary/fastapi/search.py
@@ -1,11 +1,18 @@
 from __future__ import annotations
 
 import json
-from typing import Annotated, Any
-
-from fastapi import APIRouter, Depends, Query, Request
-from fastapi.responses import JSONResponse
-from pydantic import BaseModel, Field, model_validator
+from typing import Annotated, Any, Literal, Self
+
+from fastapi import APIRouter, Query, Request
+from pydantic import (
+    BaseModel,
+    BeforeValidator,
+    ConfigDict,
+    Field,
+    computed_field,
+    field_validator,
+    model_validator,
+)
 
 from openlibrary.core.fulltext import fulltext_search_async
 from openlibrary.plugins.inside.code import RESULTS_PER_PAGE
@@ -21,12 +28,16 @@
 
 # Ideally this will go in a models files, we'll move it for the 2nd endpoint
 class Pagination(BaseModel):
-    limit: Annotated[int, Query(ge=0)] = 100
-    offset: Annotated[int | None, Query(ge=0)] = None
-    page: Annotated[int | None, Query(ge=1)] = None
+    """Reusable pagination parameters for API endpoints."""
+
+    limit: int = Field(100, ge=0, description="Maximum number of results to return.")
+    offset: int | None = Field(
+        None, ge=0, description="Number of results to skip.", exclude=True
+    )
+    page: int | None = Field(None, ge=1, description="Page number (1-indexed).")
 
     @model_validator(mode='after')
-    def normalize_pagination(self) -> Pagination:
+    def normalize_pagination(self) -> Self:
         if self.offset is not None:
             self.page = None
         elif self.page is None:
@@ -36,12 +47,12 @@ def normalize_pagination(self) -> Pagination:
 
 class PublicQueryOptions(BaseModel):
     """
-    This class has all the parameters that are passed as "query"
+    All parameters (and Pagination) that will be passed to the query.
     """
 
-    q: str = Query("", description="The search query, like keyword.")
+    q: str = Field("", description="The search query string.")
 
-    # from check_params in works.py
+    # from public_api_params in works.py
     title: str | None = None
     publisher: str | None = None
     oclc: str | None = None
@@ -52,86 +63,159 @@ class PublicQueryOptions(BaseModel):
     person: str | None = None
     time: str | None = None
     # from workscheme facet_fields
-    has_fulltext: bool | None = None
-    public_scan_b: bool | None = None
+    has_fulltext: Literal["true", "false"] | None = None
+    public_scan_b: list[Literal["true", "false"]] = []
 
-    """
-    The day will come when someone asks, why do we have Field wrapping Query
-    The answer seems to be:
-    1. Depends(): Tells FastAPI to explode the Pydantic model into individual arguments (dependency injection).
-    2. Field(Query([])): Overrides the default behavior for lists. It forces FastAPI to look for ?author_key=...
-       in the URL query string instead of expecting a JSON array in the request body.
-    The Field part is needed because FastAPI's default guess for lists inside Pydantic models is wrong for this use case.
-       It guesses "JSON Body," and you have to manually correct it to "Query String."
-    See: https://github.com/internetarchive/openlibrary/pull/11517#issuecomment-3584196385
-    """
-    author_key: list[str] = Field(Query([]))
-    subject_facet: list[str] = Field(Query([]))
-    person_facet: list[str] = Field(Query([]))
-    place_facet: list[str] = Field(Query([]))
-    time_facet: list[str] = Field(Query([]))
-    first_publish_year: list[str] = Field(Query([]))
-    publisher_facet: list[str] = Field(Query([]))
-    language: list[str] = Field(Query([]))
-    author_facet: list[str] = Field(Query([]))
-
-
-@router.get("/search.json")
+    # List fields (facets)
-    # List fields (facets)
+    # List fields (facets). Note the examples power some of the unit tests.
-    # List fields (facets)
+    # List fields (facets). Note the examples power some of the unit tests.
+    author_key: list[str] = Field(
+        [], description="Filter by author key.", examples=["OL1394244A"]
+    )
+    subject_facet: list[str] = Field(
+        [], description="Filter by subject.", examples=["Fiction", "City planning"]
+    )
+    person_facet: list[str] = Field(
+        [],
+        description="Filter by person. Not the author but the person who is the subject of the work.",
+        examples=["Jane Jacobs (1916-2006)", "Cory Doctorow"],
+    )
+    place_facet: list[str] = Field(
+        [], description="Filter by place.", examples=["New York", "Xiamen Shi"]
+    )
+    time_facet: list[str] = Field(
+        [],
+        description="Filter by time. It can be formatted many ways.",
+        examples=["20th century", "To 70 A.D."],
+    )
+    first_publish_year: list[str] = Field(
+        [], description="Filter by first publish year.", examples=["2020"]
+    )
+    publisher_facet: list[str] = Field(
+        [], description="Filter by publisher.", examples=["Urban Land Institute"]
+    )
+    language: list[str] = Field(
+        [],
+        description="Filter by language using three-letter language codes.",
+        examples={
+            "english": {
+                "summary": "English",
+                "description": "Returns results in English",
+                "value": ["eng"],
+            },
+            "spanish": {
+                "summary": "Spanish",
+                "description": "Returns results in Spanish",
+                "value": ["spa"],
+            },
+            "english_and_spanish": {
+                "summary": "English + Spanish",
+                "description": "Bilingual results",
+                "value": ["eng", "spa"],
+            },
+        },
+    )
+    author_facet: list[str] = Field(
+        [],
+        description="(alias for author_key) Filter by author key.",
+        examples=["OL1394244A"],
+    )
+
+    isbn: str | None = None
+    author: str | None = None
+
+    @field_validator('q')
+    @classmethod
+    def parse_q_string(cls, v: str) -> str:
+        if q_error := validate_search_json_query(v):
+            raise ValueError(q_error)
+        return v
+
+
+class SearchRequestParams(PublicQueryOptions, Pagination):
+    fields: Annotated[list[str], BeforeValidator(parse_fields_string)] = Field(
+        ",".join(sorted(WorkSearchScheme.default_fetched_fields)),
+        description="The fields to return.",
+    )
+    query: Annotated[dict[str, Any], BeforeValidator(parse_query_json)] = Field(
+        None, description="A full JSON encoded solr query.", examples=['{"q": "mark"}']
+    )
+    sort: str | None = Field(None, description="The sort order of results.")
+    spellcheck_count: int | None = Field(
+        default_spellcheck_count,
+        description="The number of spellcheck suggestions.",
+    )
+
+    def parse_fields_string(v: str | list[str]) -> list[str]:
+        if isinstance(v, str):
+            v = [v]
+        return [f.strip() for item in v for f in str(item).split(",") if f.strip()]
+
+    def parse_query_json(v: str) -> dict[str, Any]:
+        try:
+            return json.loads(v)
+        except json.JSONDecodeError as e:
+            raise ValueError(f"Invalid JSON in 'query' parameter: {e}")
+
+    @computed_field
+    def selected_query(self) -> dict[str, Any]:
+        if isinstance(self.query, dict):
+            return self.query
+        else:
+            # Include all fields that belong to PublicQueryOptions (the search query part)
+            # This automatically excludes SearchRequestParams-specific fields like fields, sort, etc.
+            query_fields = set(PublicQueryOptions.model_fields.keys())
+            # Add dynamically handled fields from WorkSearchScheme
+            query_fields |= WorkSearchScheme.all_fields
+            query_fields |= set(WorkSearchScheme.field_name_map.keys())
+            q = self.model_dump(include=query_fields, exclude_none=True)
+            return q
+
+
+class SearchResponse(BaseModel):
+    """The response from a (books) search query."""
+
+    model_config = ConfigDict(extra='allow')
+
+    numFound: int
+    start: int
+    numFoundExact: bool
+    num_found: int
+
+    documentation_url: str = "https://openlibrary.org/dev/docs/api/search"
+    q: str
+    offset: int | None
+
+    # Defined last so it renders at the bottom.
+    # We use dict[str, Any] to avoid documenting the internal book fields.
+    docs: list[dict[str, Any]] = []
+
+
+@router.get("/search.json", tags=["search"], response_model=SearchResponse)
 async def search_json(
     request: Request,
-    pagination: Annotated[Pagination, Depends()],
-    public_query_options: Annotated[PublicQueryOptions, Depends()],
-    sort: str | None = Query(None, description="The sort order of results."),
-    fields: str | None = Query(None, description="The fields to return."),
-    spellcheck_count: int | None = Query(
-        default_spellcheck_count, description="The number of spellcheck suggestions."
-    ),
-    query_str: str | None = Query(
-        None, alias="query", description="A full JSON encoded solr query."
-    ),
-):
+    params: Annotated[SearchRequestParams, Query()],
+) -> Any:
     """
     Performs a search for documents based on the provided query.
     """
-    query: dict[str, Any] = {}
-    if query_str:
-        query = json.loads(query_str)
-    else:
-        # In an ideal world, we would pass the model unstead of the dict but that's a big refactoring down the line
-        query = public_query_options.model_dump(exclude_none=True)
-        query.update({"page": pagination.page, "limit": pagination.limit})
-
-    _fields: list[str] = list(WorkSearchScheme.default_fetched_fields)
-    if fields:
-        _fields = fields.split(',')
-
-    if q_error := validate_search_json_query(public_query_options.q):
-        return JSONResponse(status_code=422, content={"error": q_error})
-
-    response = await work_search_async(
-        query,
-        sort=sort,
-        page=pagination.page,
-        offset=pagination.offset,
-        limit=pagination.limit,
-        fields=_fields,
+    raw_response = await work_search_async(
+        params.selected_query,
+        sort=params.sort,
+        page=params.page,
+        offset=params.offset,
+        limit=params.limit,
+        fields=params.fields,
         # We do not support returning facets from /search.json,
         # so disable it. This makes it much faster.
         facet=False,
-        spellcheck_count=spellcheck_count,
+        spellcheck_count=params.spellcheck_count,
         request_label='BOOK_SEARCH_API',
         lang=request.state.lang,
     )
 
-    response['documentation_url'] = "https://openlibrary.org/dev/docs/api/search"
-    response['q'] = public_query_options.q
-    response['offset'] = pagination.offset
-
-    # Put docs at the end of the response
-    docs = response.pop('docs', [])
-    response['docs'] = docs
+    raw_response['q'] = params.q
+    raw_response['offset'] = params.offset
 
-    return response
+    return raw_response
 
 
 @router.get("/search/inside.json")

diff --git a/openlibrary/plugins/worksearch/code.py b/openlibrary/plugins/worksearch/code.py
@@ -1264,6 +1264,7 @@ class search_json(delegate.page):
     def GET(self):
         i = web.input(
             author_key=[],
+            author_facet=[],
             subject_facet=[],
             person_facet=[],
             place_facet=[],

diff --git a/openlibrary/plugins/worksearch/schemes/works.py b/openlibrary/plugins/worksearch/schemes/works.py
@@ -238,7 +238,8 @@ class WorkSearchScheme(SearchScheme):
             ): lambda: f'ebook_access:[* TO {get_fulltext_min()}]',
         }
     )
-    check_params = frozenset(
+    # These are extra public api params on top of facets, which are also public
+    public_api_params = frozenset(
         {
             'title',
             'publisher',
@@ -250,6 +251,8 @@ class WorkSearchScheme(SearchScheme):
             'person',
             'time',
             'author_key',
+            'author',
+            'isbn',
         }
     )
 
@@ -287,27 +290,24 @@ def transform_user_query(
 
     def build_q_from_params(self, params: dict[str, Any]) -> str:
         q_list = []
-        if 'author' in params:
-            v = params['author'].strip()
-            m = re_author_key.search(v)
-            if m:
-                q_list.append(f"author_key:({m.group(1)})")
-            else:
-                v = fully_escape_query(v)
-                q_list.append(f"(author_name:({v}) OR author_alternative_name:({v}))")
-
-        # support web.input fields being either a list or string
-        # when default values used
-        q_list += [
-            f'{k}:({fully_escape_query(val)})'
-            for k in (self.check_params & set(params))
-            for val in (params[k] if isinstance(params[k], list) else [params[k]])
-        ]
-
-        if params.get('isbn'):
-            q_list.append(
-                'isbn:(%s)' % (normalize_isbn(params['isbn']) or params['isbn'])
-            )
+        for k in self.public_api_params & set(params):
+            values = params[k] if isinstance(params[k], list) else [params[k]]
+            for val in values:
+                if k == 'author':
+                    v = val.strip()
+                    m = re_author_key.search(v)
+                    if m:
+                        q_list.append(f"author_key:({m.group(1)})")
+                    else:
+                        v = fully_escape_query(v)
+                        q_list.append(
+                            f"(author_name:({v}) OR author_alternative_name:({v}))"
+                        )
+                elif k == 'isbn':
+                    normalized = normalize_isbn(val)
+                    q_list.append(f'isbn:({normalized or val})')
+                else:
+                    q_list.append(f'{k}:({fully_escape_query(val)})')
 
         return ' AND '.join(q_list)