diff --git a/elsevier_coordinate_extraction/extract/__init__.py b/elsevier_coordinate_extraction/extract/__init__.py
index 0cc81b7..459f2ca 100644
--- a/elsevier_coordinate_extraction/extract/__init__.py
+++ b/elsevier_coordinate_extraction/extract/__init__.py
@@ -2,4 +2,16 @@
 
 from __future__ import annotations
 
-# Implementation forthcoming.
+from elsevier_coordinate_extraction.extract.text import (
+    TextExtractionError,
+    extract_text_from_article,
+    format_article_text,
+    save_article_text,
+)
+
+__all__ = [
+    "TextExtractionError",
+    "extract_text_from_article",
+    "format_article_text",
+    "save_article_text",
+]
diff --git a/elsevier_coordinate_extraction/extract/text.py b/elsevier_coordinate_extraction/extract/text.py
new file mode 100644
index 0000000..46583b2
--- /dev/null
+++ b/elsevier_coordinate_extraction/extract/text.py
@@ -0,0 +1,240 @@
+"""Text extraction from Elsevier XML articles."""
+
+from __future__ import annotations
+
+import re
+from functools import lru_cache
+from importlib import resources
+from pathlib import Path
+from typing import Mapping
+
+from lxml import etree
+
+from elsevier_coordinate_extraction.types import ArticleContent
+
+__all__ = [
+    "TextExtractionError",
+    "extract_text_from_article",
+    "format_article_text",
+    "save_article_text",
+]
+
+
+class TextExtractionError(RuntimeError):
+    """Raised when text extraction from an Elsevier article fails."""
+
+
+@lru_cache(maxsize=None)
+def _load_text_stylesheet() -> etree.XSLT:
+    """Load and cache the Elsevier text extraction stylesheet."""
+
+    stylesheet_path = resources.files(
+        "elsevier_coordinate_extraction.stylesheets"
+    ).joinpath("text_extraction.xsl")
+    try:
+        with stylesheet_path.open("rb") as handle:
+            xslt_doc = etree.parse(handle)
+    except (OSError, etree.XMLSyntaxError) as exc:
+        msg = "Failed to load text extraction stylesheet."
+        raise TextExtractionError(msg) from exc
+    return etree.XSLT(xslt_doc)
+
+
+def extract_text_from_article(
+    article: ArticleContent | bytes,
+) -> dict[str, str | None]:
+    """Return structured text content extracted from an Elsevier article.
+
+    Parameters
+    ----------
+    article:
+        Either an :class:`ArticleContent` instance or a raw XML payload of
+        ``bytes``.
+
+    Raises
+    ------
+    TextExtractionError
+        If the payload cannot be parsed or the XSLT transformation fails.
+    """
+
+    payload = (
+        article.payload if isinstance(article, ArticleContent) else article
+    )
+    try:
+        document = etree.fromstring(payload)
+    except etree.XMLSyntaxError as exc:
+        raise TextExtractionError("Article payload is not valid XML.") from exc
+
+    stylesheet = _load_text_stylesheet()
+    try:
+        transformed = stylesheet(document)
+    except etree.XSLTApplyError as exc:
+        msg = "XSLT transformation failed for article payload."
+        raise TextExtractionError(msg) from exc
+
+    root = transformed.getroot()
+    return {
+        "doi": _clean_doi(_extract_text(root, "doi")),
+        "pii": _clean_field(_extract_text(root, "pii")),
+        "title": _clean_field(_extract_text(root, "title")),
+        "keywords": _clean_keywords(_extract_text(root, "keywords")),
+        "abstract": _clean_block(_extract_text(root, "abstract")),
+        "body": _clean_block(_extract_text(root, "body")),
+    }
+
+
+def format_article_text(extracted: Mapping[str, str | None]) -> str:
+    """Compose a plain-text article document from extracted text fields."""
+
+    return _compose_text_document(extracted)
+
+
+def save_article_text(
+    article: ArticleContent,
+    directory: Path | str,
+    *,
+    stem: str | None = None,
+) -> Path:
+    """Extract article text and persist it as a ``.txt`` file on disk.
+
+    Parameters
+    ----------
+    article:
+        Article payload and metadata.
+    directory:
+        Directory where the text file should be written. The directory is
+        created if necessary.
+    stem:
+        Optional file-name stem to use; defaults to a slug derived from the
+        article identifier metadata.
+
+    Returns
+    -------
+    pathlib.Path
+        Full path to the written text file.
+    """
+
+    extracted = extract_text_from_article(article)
+    destination_dir = Path(directory)
+    destination_dir.mkdir(parents=True, exist_ok=True)
+    file_stem = stem or _default_stem(article, extracted)
+    destination = destination_dir / f"{file_stem}.txt"
+    document = _compose_text_document(extracted)
+    destination.write_text(document, encoding="utf-8")
+    return destination
+
+
+def _extract_text(root: etree._Element, tag: str) -> str | None:
+    element = root.find(tag)
+    if element is None:
+        return None
+    text = "".join(element.itertext())
+    return text or None
+
+
+def _clean_doi(value: str | None) -> str | None:
+    cleaned = _clean_field(value)
+    if cleaned and cleaned.lower().startswith("doi:"):
+        cleaned = cleaned.split(":", 1)[1].strip()
+    return cleaned or None
+
+
+def _clean_field(value: str | None) -> str | None:
+    if value is None:
+        return None
+    cleaned = " ".join(value.split())
+    return cleaned or None
+
+
+def _clean_block(value: str | None) -> str | None:
+    if value is None:
+        return None
+    normalized = value.replace("\r\n", "\n").replace("\r", "\n")
+    lines = [" ".join(line.split()) for line in normalized.split("\n")]
+    cleaned_lines: list[str] = []
+    blank_run = False
+    for line in lines:
+        if not line:
+            if not blank_run:
+                cleaned_lines.append("")
+            blank_run = True
+            continue
+        cleaned_lines.append(line)
+        blank_run = False
+    cleaned = "\n".join(cleaned_lines).strip()
+    return cleaned or None
+
+
+def _clean_keywords(value: str | None) -> str | None:
+    if value is None:
+        return None
+    normalized = value.replace("\r\n", "\n").replace("\r", "\n")
+    keywords = []
+    for line in normalized.split("\n"):
+        keyword = " ".join(line.split())
+        if keyword and keyword not in keywords:
+            keywords.append(keyword)
+    return "\n".join(keywords) or None
+
+
+def _compose_text_document(extracted: Mapping[str, str | None]) -> str:
+    parts: list[str] = []
+
+    title = extracted.get("title")
+    if title:
+        parts.append(f"# {title}")
+
+    metadata_lines: list[str] = []
+    doi = extracted.get("doi")
+    if doi:
+        metadata_lines.append(f"DOI: {doi}")
+    pii = extracted.get("pii")
+    if pii:
+        metadata_lines.append(f"PII: {pii}")
+    if metadata_lines:
+        parts.append("\n".join(metadata_lines))
+
+    keywords = extracted.get("keywords")
+    if keywords:
+        parts.append(f"## Keywords\n\n{keywords}")
+
+    abstract = extracted.get("abstract")
+    if abstract:
+        parts.append(f"## Abstract\n\n{abstract}")
+
+    body = extracted.get("body")
+    if body:
+        parts.append(body)
+
+    chunks = (
+        part.strip()
+        for part in parts
+        if part and part.strip()
+    )
+    text = "\n\n".join(chunks)
+    return f"{text}\n" if text else ""
+
+
+def _default_stem(
+    article: ArticleContent,
+    extracted: Mapping[str, str | None],
+) -> str:
+    candidates = (
+        article.doi,
+        extracted.get("pii"),
+        article.metadata.get("pii"),
+        article.metadata.get("identifier"),
+    )
+    for candidate in candidates:
+        slug = _sanitize_slug(candidate)
+        if slug:
+            return slug
+    return "article"
+
+
+def _sanitize_slug(value: str | None) -> str:
+    if not value:
+        return ""
+    slug = re.sub(r"[^A-Za-z0-9._-]+", "_", value)
+    slug = slug.strip("._")
+    return slug[:120]
diff --git a/elsevier_coordinate_extraction/stylesheets/__init__.py b/elsevier_coordinate_extraction/stylesheets/__init__.py
new file mode 100644
index 0000000..c6281fe
--- /dev/null
+++ b/elsevier_coordinate_extraction/stylesheets/__init__.py
@@ -0,0 +1 @@
+"""Stylesheet resources for Elsevier transformations."""
diff --git a/elsevier_coordinate_extraction/stylesheets/text_extraction.xsl b/elsevier_coordinate_extraction/stylesheets/text_extraction.xsl
new file mode 100644
index 0000000..75863ab
--- /dev/null
+++ b/elsevier_coordinate_extraction/stylesheets/text_extraction.xsl
@@ -0,0 +1,158 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xsl:stylesheet version="1.0"
+    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+  xmlns:ce="http://www.elsevier.com/xml/common/dtd"
+  xmlns:ja="http://www.elsevier.com/xml/ja/dtd"
+    xmlns:sb="http://www.elsevier.com/xml/common/struct-bib/dtd"
+    xmlns:dcterms="http://purl.org/dc/terms/"
+    xmlns:dc="http://purl.org/dc/elements/1.1/"
+    xmlns:xocs="http://www.elsevier.com/xml/xocs/dtd"
+  exclude-result-prefixes="ce sb dcterms dc xocs ja">
+
+  <xsl:output method="xml" version="1.0" encoding="UTF-8" omit-xml-declaration="no"/>
+  <xsl:strip-space elements="*"/>
+
+  <xsl:template match="/">
+    <extracted-text>
+      <doi>
+        <xsl:value-of select="normalize-space((//dc:identifier[starts-with(., 'doi:')])[1])"/>
+      </doi>
+      <pii>
+        <xsl:value-of select="normalize-space((//xocs:pii-unformatted)[1])"/>
+      </pii>
+      <title>
+        <xsl:value-of select="normalize-space((//dc:title)[1])"/>
+      </title>
+      <keywords>
+        <xsl:apply-templates select="//dcterms:subject" mode="keywords"/>
+      </keywords>
+      <abstract>
+        <xsl:variable name="abstractNode" select="(//ce:abstract)[1]"/>
+        <xsl:choose>
+          <xsl:when test="$abstractNode">
+            <xsl:apply-templates select="$abstractNode"/>
+          </xsl:when>
+          <xsl:otherwise>
+            <xsl:apply-templates select="(//dc:description)[1]"/>
+          </xsl:otherwise>
+        </xsl:choose>
+      </abstract>
+      <body>
+        <xsl:variable name="bodyNode" select="(//ja:body)[1]"/>
+        <xsl:choose>
+          <xsl:when test="$bodyNode">
+            <xsl:apply-templates select="$bodyNode"/>
+          </xsl:when>
+          <xsl:otherwise>
+            <xsl:apply-templates select="(//ce:sections)[1]"/>
+          </xsl:otherwise>
+        </xsl:choose>
+      </body>
+    </extracted-text>
+  </xsl:template>
+
+  <xsl:template match="body">
+    <xsl:apply-templates select="ce:sections | ce:section | ce:para | *"/>
+  </xsl:template>
+
+  <xsl:template match="ja:body">
+    <xsl:apply-templates select="ce:sections | ce:section | ce:para | *"/>
+  </xsl:template>
+
+  <xsl:template match="ce:sections">
+    <xsl:apply-templates/>
+  </xsl:template>
+
+  <xsl:template match="ce:section">
+    <xsl:text>&#10;</xsl:text>
+    <xsl:apply-templates/>
+    <xsl:text>&#10;</xsl:text>
+  </xsl:template>
+
+  <xsl:template match="ce:section-title">
+    <xsl:call-template name="heading">
+      <xsl:with-param name="level" select="count(ancestor::ce:section) + 1"/>
+    </xsl:call-template>
+    <xsl:text> </xsl:text>
+    <xsl:apply-templates/>
+    <xsl:text>&#10;&#10;</xsl:text>
+  </xsl:template>
+
+  <xsl:template name="heading">
+    <xsl:param name="level"/>
+    <xsl:if test="$level &gt; 0">
+      <xsl:text>#</xsl:text>
+      <xsl:call-template name="heading">
+        <xsl:with-param name="level" select="$level - 1"/>
+      </xsl:call-template>
+    </xsl:if>
+  </xsl:template>
+
+  <xsl:template match="ce:para">
+    <xsl:text>&#10;</xsl:text>
+    <xsl:apply-templates/>
+    <xsl:text>&#10;</xsl:text>
+  </xsl:template>
+
+  <xsl:template match="ce:list">
+    <xsl:text>&#10;</xsl:text>
+    <xsl:apply-templates/>
+    <xsl:text>&#10;</xsl:text>
+  </xsl:template>
+
+  <xsl:template match="ce:list-item">
+    <xsl:text>- </xsl:text>
+    <xsl:apply-templates/>
+    <xsl:text>&#10;</xsl:text>
+  </xsl:template>
+
+  <xsl:template match="ce:simple-para">
+    <xsl:apply-templates/>
+  </xsl:template>
+
+  <xsl:template match="text()" mode="keywords"/>
+
+  <xsl:template match="dcterms:subject" mode="keywords">
+    <xsl:value-of select="normalize-space(.)"/>
+    <xsl:text>&#10;</xsl:text>
+  </xsl:template>
+
+  <xsl:template match="text()">
+    <xsl:value-of select="normalize-space(.)"/>
+    <xsl:text> </xsl:text>
+  </xsl:template>
+
+  <xsl:template match="
+      ce:acknowledgment |
+      ce:bibliography |
+      ce:bib-reference |
+      ce:table |
+      ce:figure |
+      ce:caption |
+      ce:legend |
+      ce:label |
+      ce:cross-ref |
+      ce:cross-refs |
+      ce:footnote |
+      ce:floats |
+      ce:inline-figure |
+      ce:inline-formula |
+      ce:display-formula |
+      ce:graphic |
+      ce:supplementary-material |
+      ce:supplement |
+      ce:references |
+      ce:ref |
+      sb:reference |
+      sb:contribution |
+      sb:host |
+      sb:pages |
+      sb:authors |
+      sb:author
+    " />
+
+  <xsl:template match="*">
+    <xsl:apply-templates/>
+  </xsl:template>
+
+</xsl:stylesheet>
diff --git a/tests/extract/conftest.py b/tests/extract/conftest.py
new file mode 100644
index 0000000..9a77ac0
--- /dev/null
+++ b/tests/extract/conftest.py
@@ -0,0 +1,55 @@
+"""Shared fixtures for extraction integration tests."""
+
+from __future__ import annotations
+
+import asyncio
+
+import httpx
+import pytest
+
+from elsevier_coordinate_extraction import settings
+from elsevier_coordinate_extraction.client import ScienceDirectClient
+from elsevier_coordinate_extraction.download.api import download_articles
+from elsevier_coordinate_extraction.types import ArticleContent
+
+
+@pytest.fixture(scope="function", params=("doi", "pmid"), ids=("doi", "pmid"))
+def downloaded_articles(
+    request: pytest.FixtureRequest,
+    test_dois: list[str],
+    sample_test_pmids: list[str],
+) -> list[ArticleContent]:
+    """Download real articles for integration-style extraction tests."""
+
+    identifier_type: str = request.param
+    identifiers = test_dois if identifier_type == "doi" else sample_test_pmids
+
+    async def _download() -> list[ArticleContent]:
+        cfg = settings.get_settings()
+        async with ScienceDirectClient(cfg) as client:
+            try:
+                records = [{identifier_type: value} for value in identifiers]
+                article_list = await download_articles(records, client=client)
+            except httpx.HTTPStatusError as exc:  # type: ignore[attr-defined]
+                if exc.response.status_code in {401, 403}:
+                    pytest.skip(
+                        "ScienceDirect credentials unavailable for test run."
+                    )
+                raise
+        return list(article_list)
+
+    articles = asyncio.run(_download())
+    if identifier_type == "pmid":
+        for identifier, article in zip(identifiers, articles):
+            assert article.metadata.get("identifier") == identifier
+            assert article.metadata.get("identifier_type") == "pmid"
+
+    class ArticleList(list[ArticleContent]):
+        """Annotated list carrying identifier metadata."""
+
+        pass
+
+    wrapped = ArticleList(articles)
+    setattr(wrapped, "identifier_type", identifier_type)
+    setattr(wrapped, "identifiers", identifiers)
+    return wrapped
diff --git a/tests/extract/test_coordinates.py b/tests/extract/test_coordinates.py
index 0f9648e..5f7e7e1 100644
--- a/tests/extract/test_coordinates.py
+++ b/tests/extract/test_coordinates.py
@@ -2,57 +2,15 @@
 
 from __future__ import annotations
 
-import asyncio
-from typing import Any
-
-import httpx
 import pytest
 
-from elsevier_coordinate_extraction import settings
-from elsevier_coordinate_extraction.client import ScienceDirectClient
-from elsevier_coordinate_extraction.download.api import download_articles
-from elsevier_coordinate_extraction.extract.coordinates import extract_coordinates
-from elsevier_coordinate_extraction.types import ArticleContent, build_article_content
-
-
-@pytest.fixture(scope="function", params=("doi", "pmid"), ids=("doi", "pmid"))
-def downloaded_articles(
-    request: pytest.FixtureRequest,
-    test_dois: list[str],
-    sample_test_pmids: list[str],
-) -> list[ArticleContent]:
-    """Download real articles for integration-style coordinate tests."""
-
-    identifier_type: str = request.param
-    identifiers = test_dois if identifier_type == "doi" else sample_test_pmids
-
-    async def _download() -> list[ArticleContent]:
-        cfg = settings.get_settings()
-        async with ScienceDirectClient(cfg) as client:
-            try:
-                records = [{identifier_type: value} for value in identifiers]
-                article_list = await download_articles(records, client=client)
-            except httpx.HTTPStatusError as exc:  # type: ignore[attr-defined]
-                if exc.response.status_code in {401, 403}:
-                    pytest.skip("ScienceDirect credentials unavailable for test run.")
-                raise
-        return list(article_list)
-
-    articles = asyncio.run(_download())
-    if identifier_type == "pmid":
-        for identifier, article in zip(identifiers, articles):
-            assert article.metadata.get("identifier") == identifier
-            assert article.metadata.get("identifier_type") == "pmid"
-
-    class ArticleList(list[ArticleContent]):
-        """Annotated list carrying identifier metadata."""
-
-        pass
-
-    wrapped = ArticleList(articles)
-    setattr(wrapped, "identifier_type", identifier_type)
-    setattr(wrapped, "identifiers", identifiers)
-    return wrapped
+from elsevier_coordinate_extraction.extract.coordinates import (
+    extract_coordinates,
+)
+from elsevier_coordinate_extraction.types import (
+    ArticleContent,
+    build_article_content,
+)
 
 
 def _find_points(result: dict) -> list[dict]:
@@ -66,8 +24,10 @@ def _find_points(result: dict) -> list[dict]:
 
 
 @pytest.mark.vcr()
-def test_extract_returns_coordinates_for_real_articles(downloaded_articles: list[ArticleContent]) -> None:
-    """Aggregated extraction should preserve structure, metadata, and infer coordinate space."""
+def test_extract_returns_coordinates_for_real_articles(
+    downloaded_articles: list[ArticleContent],
+) -> None:
+    """Aggregated extraction preserves metadata and infers coordinate space."""
 
     result = extract_coordinates(downloaded_articles)
     studies = result["studyset"]["studies"]
@@ -75,7 +35,9 @@ def test_extract_returns_coordinates_for_real_articles(downloaded_articles: list
     analysis_names: set[str] = set()
     spaces_by_article: dict[str, set[str | None]] = {}
     missing_coordinates: list[str] = []
-    is_doi_source = getattr(downloaded_articles, "identifier_type", "doi") == "doi"
+    is_doi_source = (
+        getattr(downloaded_articles, "identifier_type", "doi") == "doi"
+    )
     for article, study in zip(downloaded_articles, studies):
         assert study["doi"] == article.doi
         analyses = study["analyses"]
@@ -88,7 +50,9 @@ def test_extract_returns_coordinates_for_real_articles(downloaded_articles: list
             assert points, f"Expected coordinate points for {study['doi']}"
             analysis_names.add(analysis["name"])
             analysis_meta = analysis.get("metadata", {})
-            assert analysis_meta.get("raw_table_xml"), "raw table XML should be retained"
+            assert analysis_meta.get(
+                "raw_table_xml"
+            ), "raw table XML should be retained"
             table_id = analysis_meta.get("table_id")
             if is_doi_source:
                 assert table_id, "table ID should accompany raw table XML"
@@ -99,24 +63,34 @@ def test_extract_returns_coordinates_for_real_articles(downloaded_articles: list
                 spaces_by_article[article.doi].add(point.get("space"))
     assert analysis_names, "Expected at least one named analysis"
     if is_doi_source:
-        assert "Coordinate Table" not in analysis_names, "Fallback analysis name should be replaced"
+        assert (
+            "Coordinate Table" not in analysis_names
+        ), "Fallback analysis name should be replaced"
     for doi, spaces in spaces_by_article.items():
         assert spaces, f"No coordinate space inferred for {doi}"
-        assert any(space in {"MNI", "TAL"} for space in spaces if space), (
-            f"No canonical coordinate space detected for {doi}: {spaces}"
-        )
+        assert any(
+            space in {"MNI", "TAL"}
+            for space in spaces
+            if space
+        ), f"No canonical coordinate space detected for {doi}: {spaces}"
     if is_doi_source:
-        assert not missing_coordinates, f"Missing coordinate tables for: {missing_coordinates}"
+        assert (
+            not missing_coordinates
+        ), f"Missing coordinate tables for: {missing_coordinates}"
     else:
         assert len(missing_coordinates) < len(downloaded_articles), (
             "No coordinates extracted for any PMID-sourced article."
         )
 
+
 @pytest.mark.vcr()
-def test_extract_preserves_article_metadata(downloaded_articles: list[ArticleContent]) -> None:
+def test_extract_preserves_article_metadata(
+    downloaded_articles: list[ArticleContent],
+) -> None:
     """Ensure DOI and PII are propagated to the study metadata."""
     result = extract_coordinates(downloaded_articles)
-    for study, article in zip(result["studyset"]["studies"], downloaded_articles):
+    studies = result["studyset"]["studies"]
+    for study, article in zip(studies, downloaded_articles):
         assert study["doi"] == article.doi
         if "pii" in article.metadata:
             assert study["metadata"]["pii"] == article.metadata.get("pii")
diff --git a/tests/extract/test_text.py b/tests/extract/test_text.py
new file mode 100644
index 0000000..8799a7c
--- /dev/null
+++ b/tests/extract/test_text.py
@@ -0,0 +1,58 @@
+"""Text extraction tests."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+import yaml
+
+from elsevier_coordinate_extraction.extract import (
+    TextExtractionError,
+    extract_text_from_article,
+    format_article_text,
+    save_article_text,
+)
+from elsevier_coordinate_extraction.types import build_article_content
+
+
+def _load_cassette_payload() -> bytes:
+    cassette_path = (
+        Path(__file__).parent.parent
+        / "cassettes"
+        / "test_extract_returns_coordinates_for_real_articles[doi].yaml"
+    )
+    with cassette_path.open(encoding="utf-8") as handle:
+        data = yaml.safe_load(handle)
+    string_payload = data["interactions"][0]["response"]["body"]["string"]
+    return string_payload.encode("utf-8")
+
+
+def test_extract_text_from_real_article(tmp_path: Path) -> None:
+    """Structured text should be extracted and persisted for real articles."""
+
+    payload = _load_cassette_payload()
+    article = build_article_content(
+        doi="10.1016/j.nbd.2012.03.039",
+        payload=payload,
+        content_type="text/xml",
+        fmt="xml",
+        metadata={"pii": "S0969-9961(12)00128-3"},
+    )
+    extracted = extract_text_from_article(article)
+    assert extracted["title"], "Expected article title to be present"
+    assert extracted["body"], "Expected article body text to be present"
+
+    formatted = format_article_text(extracted)
+    output_dir = tmp_path / "articles"
+    destination = save_article_text(article, output_dir)
+    saved = destination.read_text(encoding="utf-8")
+    assert destination.name.endswith(".txt")
+    assert saved == formatted
+
+
+def test_extract_text_invalid_payload() -> None:
+    """Invalid XML payloads should raise a text extraction error."""
+
+    with pytest.raises(TextExtractionError):
+        extract_text_from_article(b"<not-xml>")