posit-dev
diff --git a/‎docs/user-guide/test-data-generation.qmd‎
Lines changed: 56 additions & 5 deletions b/‎docs/user-guide/test-data-generation.qmd‎
Lines changed: 56 additions & 5 deletions
diff --git a/‎pointblank/countries/__init__.py‎
Lines changed: 9 additions & 18 deletions b/‎pointblank/countries/__init__.py‎
Lines changed: 9 additions & 18 deletions
diff --git a/‎pointblank/countries/data/AT/address.json‎
Lines changed: 1 addition & 1 deletion b/‎pointblank/countries/data/AT/address.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pointblank/countries/data/BR/address.json‎
Lines changed: 1 addition & 1 deletion b/‎pointblank/countries/data/BR/address.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pointblank/countries/data/CH/address.json‎
Lines changed: 1 addition & 1 deletion b/‎pointblank/countries/data/CH/address.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pointblank/countries/data/DE/address.json‎
Lines changed: 1 addition & 1 deletion b/‎pointblank/countries/data/DE/address.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pointblank/countries/data/ES/address.json‎
Lines changed: 1 addition & 1 deletion b/‎pointblank/countries/data/ES/address.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pointblank/countries/data/FI/address.json‎
Lines changed: 1 addition & 1 deletion b/‎pointblank/countries/data/FI/address.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pointblank/countries/data/HR/address.json‎
Lines changed: 1 addition & 1 deletion b/‎pointblank/countries/data/HR/address.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pointblank/countries/data/IT/address.json‎
Lines changed: 1 addition & 1 deletion b/‎pointblank/countries/data/IT/address.json‎
Lines changed: 1 addition & 1 deletion
@@ -369,6 +369,57 @@ You can use either ISO 3166-1 alpha-2 codes (e.g., `"US"`) or alpha-3 codes (e.g
 
 Additional countries and expanded coverage are planned for future releases.
 
+### Mixing Multiple Countries
+
+When you need test data that spans multiple locales (e.g., simulating an international customer
+base), you can pass a list or dict to the `country=` parameter instead of a single string.
+
+Passing a list of country codes splits rows equally across those countries. Here, 200 rows are
+divided evenly among the US, Germany, and Japan (~67 each):
+
+```{python}
+schema = pb.Schema(
+    name=pb.string_field(preset="name"),
+    city=pb.string_field(preset="city"),
+    postcode=pb.string_field(preset="postcode"),
+)
+
+pb.preview(pb.generate_dataset(schema, n=200, seed=23, country=["US", "DE", "JP"]))
+```
+
+To control the proportion of rows per country, pass a dict mapping country codes to weights. The
+following generates 200 rows with 70% from the US, 20% from Germany, and 10% from France:
+
+```{python}
+pb.preview(
+    pb.generate_dataset(
+        schema, n=200, seed=23,
+        country={"US": 0.7, "DE": 0.2, "FR": 0.1},
+    )
+)
+```
+
+Weights are auto-normalized, so `{"US": 7, "DE": 2, "FR": 1}` is equivalent to the example above.
+Row counts are allocated using largest-remainder apportionment, ensuring they always sum to exactly
+`n`.
+
+By default, rows from different countries are interleaved randomly (`shuffle=True`). Set
+`shuffle=False` to keep rows grouped by country in the order the countries are listed:
+
+```{python}
+pb.preview(
+    pb.generate_dataset(
+        schema, n=120, seed=23,
+        country=["US", "DE", "JP"], shuffle=False,
+    )
+)
+```
+
+All coherence systems (address, person, business) work correctly within each country's batch of
+rows. A French row will have a French name with a matching French email; a Japanese row will have a
+Japanese name with a matching Japanese email. Non-preset columns (integers, floats, booleans, dates)
+are generated independently for each batch but still respect their field constraints.
+
 ## Output Formats
 
 The `generate_dataset()` function supports multiple output formats via the `output=` parameter,
@@ -381,20 +432,20 @@ schema = pb.Schema(
 )
 ```
 
-The default output is a **Polars DataFrame**, which offers excellent performance and a modern API
-for data manipulation:
+The default output is a Polars DataFrame, which offers excellent performance and a modern API for
+data manipulation:
 
 ```{python}
-# Polars DataFrame (default)
 polars_df = pb.generate_dataset(schema, n=100, seed=23, output="polars")
+
 pb.preview(polars_df)
 ```
 
 If your workflow uses Pandas, simply specify `output="pandas"` to get a **Pandas DataFrame**:
 
 ```{python}
-# Pandas DataFrame
 pandas_df = pb.generate_dataset(schema, n=100, seed=23, output="pandas")
+
 pb.preview(pandas_df)
 ```
 
@@ -592,7 +643,7 @@ By incorporating test data generation into your process, you can:
 - create reproducible test fixtures for automated testing and CI/CD pipelines
 - generate locale-specific data for internationalization testing across 55 countries
 - ensure coherent relationships between related fields like names, emails, addresses, jobs, and
-  license plates
+license plates
 - produce datasets of any size with consistent, realistic values
 
 Whether you're building validation logic, testing data pipelines, or simply need sample data for
 
@@ -1,11 +1,3 @@
-"""
-Country-based data generation for synthetic test data.
-
-This module provides country-specific data generation without external dependencies.
-It supports generating realistic names, addresses, emails, and other data types
-with proper localization based on ISO 3166-1 country codes.
-"""
-
 from __future__ import annotations
 
 import json
@@ -587,9 +579,8 @@ def seed(self, seed: int) -> None:
     def _get_person(self, gender: str | None = None) -> dict[str, str]:
         """Get a coherent person (first_name, last_name, gender) from the data.
 
-        If person data has ``ethnic_groups``, picks a group first (weighted by population
-        share) then draws first and last names from within that group so they remain
-        ethnically coherent.
+        If person data has `ethnic_groups`, picks a group first (weighted by population share) then
+        draws first and last names from within that group so they remain ethnically coherent.
         """
         # If no gender specified, randomly select one (weighted toward male/female)
         if gender is None:
@@ -678,8 +669,8 @@ def _generate_first_name(self, gender: str | None = None) -> str:
     def _generate_last_name(self, gender: str | None = None) -> str:
         """Generate a random last name (internal, no caching).
 
-        If last_names is a dict with 'male'/'female' keys (e.g., IS patronymics),
-        picks from the gender-appropriate list.
+        If last_names is a dict with 'male'/'female' keys (e.g., IS patronymics), picks from the
+        gender-appropriate list.
         """
         names = self._data.person.get("last_names", ["Smith"])
 
@@ -702,9 +693,9 @@ def init_row_persons(self, n_rows: int) -> None:
         """
         Pre-generate person data for multiple rows to ensure coherence across columns.
 
-        This should be called before generating a dataset with person-related columns.
-        When active, first_name(), last_name(), name(), email() will use the person
-        for the current row (set via set_row()).
+        This should be called before generating a dataset with person-related columns. When active,
+        `first_name()`, `last_name()`, `name()`, `email()` will use the person for the current row
+        (set via `set_row()`).
 
         Parameters
         ----------
@@ -721,8 +712,8 @@ def new_person(self, gender: str | None = None) -> dict[str, str]:
         """
         Select a new random person and cache it for coherent generation.
 
-        Call this before generating related person components (first_name, last_name, email)
-        to ensure they all refer to the same person.
+        Call this before generating related person components (first_name, last_name, email) to
+        ensure they all refer to the same person.
 
         Returns
         -------
 
@@ -1569,7 +1569,7 @@
         "{street} {building_number}, {postcode} {city}",
         "{street} {building_number}/{unit}, {postcode} {city}"
     ],
-    "country": "Österreich",
+    "country": "Austria",
     "country_code": "AT",
     "phone_area_codes": {
         "Wien": [
 
@@ -1748,7 +1748,7 @@
         "{street}, {building_number}, {postcode} {city} - {state_abbr}",
         "{street}, {building_number}, Apto {unit}, {postcode} {city} - {state_abbr}"
     ],
-    "country": "Brasil",
+    "country": "Brazil",
     "country_code": "BR",
     "phone_area_codes": {
         "São Paulo": [
 
@@ -3063,7 +3063,7 @@
         "{street} {building_number}, CH-{postcode} {city}",
         "{street} {building_number}, {postcode} {city} ({state})"
     ],
-    "country": "Schweiz",
+    "country": "Switzerland",
     "country_code": "CH",
     "phone_area_codes": {
         "Zürich": [
 
@@ -4324,7 +4324,7 @@
         "{street} {building_number}, {postcode} {city}",
         "{street} {building_number}, Whg. {unit}, {postcode} {city}"
     ],
-    "country": "Deutschland",
+    "country": "Germany",
     "country_code": "DE",
     "phone_area_codes": {
         "Berlin": [
 
@@ -4002,7 +4002,7 @@
         "{street}, {building_number}, {unit}º, {postcode} {city}",
         "{street}, {building_number}, Piso {unit}, {postcode} {city}, {state}"
     ],
-    "country": "España",
+    "country": "Spain",
     "country_code": "ES",
     "phone_area_codes": {
         "Comunidad de Madrid": [
 
@@ -1568,7 +1568,7 @@
         "{street} {building_number}, {postcode} {city}",
         "{street} {building_number} {unit}, {postcode} {city}"
     ],
-    "country": "Suomi",
+    "country": "Finland",
     "country_code": "FI",
     "phone_area_codes": {
         "Uusimaa": [
 
@@ -849,7 +849,7 @@
         "{street} {building_number}, {city}",
         "{city}, {street} {building_number}"
     ],
-    "country": "Hrvatska",
+    "country": "Croatia",
     "country_code": "HR",
     "phone_area_codes": {
         "Grad Zagreb": [
 
@@ -3476,7 +3476,7 @@
         "{street}, {building_number}, {postcode} {city} ({state})",
         "{street} {building_number}, {postcode} {city}"
     ],
-    "country": "Italia",
+    "country": "Italy",
     "country_code": "IT",
     "phone_area_codes": {
         "Lazio": [