Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ This MCP server is **free** and **open source**, supported by [**Unipile**](http
| `search_people` | Search for people by keywords, location, connection degree (1st/2nd/3rd), and current company | working |
| `get_job_details` | Get detailed information about a specific job posting | working |
| `get_feed` | Get recent posts from the authenticated user's home feed | working |
| `search_posts` | Search posts/content globally by keyword (the "Posts" tab) with an optional recency filter (past-24h/past-week/past-month) | working |
| `close_session` | Close browser session and clean up resources | working |

<br/>
Expand Down
1 change: 1 addition & 0 deletions docs/docker-hub.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ A Model Context Protocol (MCP) server that connects AI assistants to LinkedIn. A
- **Person Posts**: Get recent activity/posts from a person's profile
- **Company Posts**: Get recent posts from a company's LinkedIn feed
- **Home Feed**: Get recent posts from the authenticated user's LinkedIn home feed
- **Post Search**: Search posts/content globally by keyword (the "Posts" tab) with an optional recency filter
- **Compact References**: Return typed per-section links alongside readable text without shipping full-page markdown

## Quick Start
Expand Down
112 changes: 112 additions & 0 deletions linkedin_mcp_server/scraping/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,25 @@

_SORT_BY_MAP = {"date": "DD", "relevance": "R"}

# Content (post) search uses literal ``datePosted`` tokens inside a JSON-list
# facet, e.g. ``datePosted=["past-week"]`` — unlike job search, which uses
# ``f_TPR=r<seconds>`` codes. Human-friendly underscore aliases map onto
# LinkedIn's exact tokens; the tokens themselves also pass through unchanged.
_CONTENT_DATE_POSTED_MAP = {
"past-24h": "past-24h",
"past_24_hours": "past-24h",
"past-24-hours": "past-24h",
"past-week": "past-week",
"past_week": "past-week",
"past-month": "past-month",
"past_month": "past-month",
}

# Content search is an infinite scroll (no ``&start=`` pagination), so
# ``search_posts`` expresses depth as result "pages" of roughly this many
# scrolls each.
_CONTENT_SCROLLS_PER_PAGE = 5

# Valid tokens for the people-search ``network`` facet.
# LinkedIn accepts "F" (1st-degree), "S" (2nd-degree), "O" (3rd-degree and beyond).
_NETWORK_TOKENS = ("F", "S", "O")
Expand Down Expand Up @@ -3345,6 +3364,99 @@ async def search_companies(
result["section_errors"] = section_errors
return result

@staticmethod
def _build_content_search_url(
keywords: str,
date_posted: str | None = None,
) -> str:
"""Build a LinkedIn content (post) search URL.

Reproduces the ``FACETED_SEARCH`` URL LinkedIn produces from the
Posts results tab, e.g. for "Buscamos Unity" in the past week:
``/search/results/content/?keywords=Buscamos+Unity&origin=FACETED_SEARCH&datePosted=%5B%22past-week%22%5D``

The ``datePosted`` facet is a one-element JSON list carrying a literal
token (``past-24h`` / ``past-week`` / ``past-month``), URL-encoded —
unlike job search, which uses ``f_TPR=r<seconds>``. Aliases are
normalized via ``_CONTENT_DATE_POSTED_MAP``; unknown values pass
through unchanged (callers validate first).
"""
params = f"keywords={quote_plus(keywords)}&origin=FACETED_SEARCH"
if date_posted and date_posted.strip():
token = _CONTENT_DATE_POSTED_MAP.get(
date_posted.strip(), date_posted.strip()
)
params += f"&datePosted={_encode_list_facet([token])}"
return f"https://www.linkedin.com/search/results/content/?{params}"

async def search_posts(
self,
keywords: str,
date_posted: str | None = None,
max_pages: int = 3,
) -> dict[str, Any]:
"""Search LinkedIn posts/content and extract the results page.

Reproduces the LinkedIn "Posts" content-search tab — the surface for
catching informal "we're hiring" / "Buscamos ..." posts before a
formal job listing exists.

Args:
keywords: Free-text query (e.g. "Buscamos Unity", "estamos contratando").
date_posted: Optional recency filter. One of ``"past-24h"``,
``"past-week"``, ``"past-month"`` (underscore aliases also
accepted). Invalid values raise ``FilterValidationError``
(a ``ValueError`` subclass).
max_pages: Scroll depth, expressed in result "pages" of roughly
``_CONTENT_SCROLLS_PER_PAGE`` scrolls each (default 3). Content
search is an infinite scroll with no per-page URL, so this caps
how far the page is scrolled rather than fetching discrete
``&start=`` pages.

Returns:
{url, sections: {search_results: text}} plus optional ``references``
(``feed_post`` permalinks, post authors, companies) and
``section_errors``. The LLM should parse the raw text to extract
each post's author, headline, body, date, and reaction counts.
"""
if (
date_posted is not None
and date_posted.strip()
and date_posted.strip() not in _CONTENT_DATE_POSTED_MAP
):
raise FilterValidationError(
f"Invalid date_posted {date_posted!r}; expected one of "
"'past-24h', 'past-week', 'past-month'."
)

url = self._build_content_search_url(keywords, date_posted=date_posted)
max_scrolls = max(1, max_pages) * _CONTENT_SCROLLS_PER_PAGE
extracted = await self.extract_page(
url, section_name="search_results", max_scrolls=max_scrolls
)

sections: dict[str, str] = {}
references: dict[str, list[Reference]] = {}
section_errors: dict[str, dict[str, Any]] = {}
if extracted.text and extracted.text != _RATE_LIMITED_MSG:
sections["search_results"] = extracted.text
if extracted.references:
references["search_results"] = extracted.references
elif extracted.text == _RATE_LIMITED_MSG:
section_errors["search_results"] = {
"error_type": "rate_limit",
"error_message": extracted.text,
}
elif extracted.error:
section_errors["search_results"] = extracted.error

result: dict[str, Any] = {"url": url, "sections": sections}
if references:
result["references"] = references
if section_errors:
result["section_errors"] = section_errors
return result

async def get_inbox(self, limit: int = 20) -> dict[str, Any]:
"""List recent conversations from the messaging inbox."""
url = "https://www.linkedin.com/messaging/"
Expand Down
2 changes: 2 additions & 0 deletions linkedin_mcp_server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from linkedin_mcp_server.tools.job import register_job_tools
from linkedin_mcp_server.tools.messaging import register_messaging_tools
from linkedin_mcp_server.tools.person import register_person_tools
from linkedin_mcp_server.tools.post import register_post_tools

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -62,6 +63,7 @@ def create_mcp_server(*, tool_timeout: float = DEFAULT_TOOL_TIMEOUT_SECONDS) ->
register_job_tools(mcp, tool_timeout=tool_timeout)
register_messaging_tools(mcp, tool_timeout=tool_timeout)
register_feed_tools(mcp, tool_timeout=tool_timeout)
register_post_tools(mcp, tool_timeout=tool_timeout)

# Register session management tool
@mcp.tool(
Expand Down
1 change: 1 addition & 0 deletions linkedin_mcp_server/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
- Job tools: Job posting details and search functionality
- Messaging tools: Inbox, conversations, search, and sending messages
- Feed tools: Home feed scraping
- Post tools: Global post/content search

Architecture:
- FastMCP integration for MCP-compliant tool registration
Expand Down
114 changes: 114 additions & 0 deletions linkedin_mcp_server/tools/post.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
"""
LinkedIn post/content search tool.

Performs LinkedIn's global content search (the "Posts" results tab) using
innerText extraction, so informal "we're hiring" / "Buscamos ..." posts can
be found before a formal job listing is published. Mirrors search_people:
build a /search/results/content/ URL, scroll to load results, and return the
raw innerText for the LLM to parse, plus post-permalink references.
"""

import logging
from typing import Annotated, Any

from fastmcp import Context, FastMCP
from fastmcp.exceptions import ToolError
from pydantic import Field

from linkedin_mcp_server.config.schema import DEFAULT_TOOL_TIMEOUT_SECONDS
from linkedin_mcp_server.core.exceptions import AuthenticationError
from linkedin_mcp_server.dependencies import get_ready_extractor, handle_auth_error
from linkedin_mcp_server.error_handler import raise_tool_error
from linkedin_mcp_server.scraping.extractor import FilterValidationError

logger = logging.getLogger(__name__)


def register_post_tools(
mcp: FastMCP, *, tool_timeout: float = DEFAULT_TOOL_TIMEOUT_SECONDS
) -> None:
"""Register post/content-search tools with the MCP server."""

@mcp.tool(
timeout=tool_timeout,
title="Search Posts",
annotations={"readOnlyHint": True, "openWorldHint": True},
tags={"post", "search"},
exclude_args=["extractor"],
)
async def search_posts(
keywords: str,
ctx: Context,
date_posted: str | None = None,
max_pages: Annotated[int, Field(ge=1, le=10)] = 3,
extractor: Any | None = None,
) -> dict[str, Any]:
"""
Search LinkedIn posts/content globally by keyword (the "Posts" tab).

Use this to catch informal hiring posts ("we're hiring", "Buscamos
...", "estamos contratando", "join our team") that often appear before
a formal job listing exists. This is global content search, distinct
from get_feed (your own home feed) and get_company_posts (one
company's page).

Args:
keywords: Search keywords (e.g., "Buscamos Unity", "AI automation hiring")
ctx: FastMCP context for progress reporting
date_posted: Optional recency filter. One of "past-24h",
"past-week", "past-month" (underscore aliases like
"past_week" also accepted). Omit for any time.
max_pages: Scroll depth as result "pages" of ~5 scrolls each
(1-10, default 3). Content search is an infinite scroll, so
this caps how far the page is scrolled rather than fetching
discrete pages.

Returns:
Dict with url, sections (search_results -> raw text), and optional
references (post permalinks, authors, companies) and section_errors.
The LLM should parse the raw text to extract each post's author,
headline/role, company, body, posted date, and reaction/comment
counts.
"""
try:
extractor = extractor or await get_ready_extractor(
ctx, tool_name="search_posts"
)
logger.info(
"Searching posts: keywords='%s', date_posted='%s', max_pages=%d",
keywords,
date_posted,
max_pages,
)

await ctx.report_progress(
progress=0, total=100, message="Starting post search"
)

try:
result = await extractor.search_posts(
keywords,
date_posted=date_posted,
max_pages=max_pages,
)
except FilterValidationError as e:
# Validation messages carry actionable detail; surface them as
# ToolError so mask_error_details doesn't reduce them to a
# generic "Error calling tool 'search_posts'".
raise ToolError(str(e)) from e

await ctx.report_progress(progress=100, total=100, message="Complete")

return result

except ToolError:
# Already a properly formatted client-facing error; do not log it
# as "Unexpected error" via raise_tool_error.
raise
except AuthenticationError as e:
try:
await handle_auth_error(e, ctx)
except Exception as relogin_exc:
raise_tool_error(relogin_exc, "search_posts")
except Exception as e:
raise_tool_error(e, "search_posts") # NoReturn
4 changes: 4 additions & 0 deletions manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@
"name": "get_feed",
"description": "Get recent posts from the authenticated user's LinkedIn home feed"
},
{
"name": "search_posts",
"description": "Search LinkedIn posts/content globally by keyword (the 'Posts' tab) with an optional recency filter (past-24h/past-week/past-month)"
},
{
"name": "close_session",
"description": "Properly close browser session and clean up resources"
Expand Down
Loading
Loading