Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ This MCP server is **free** and **open source**, supported by [**Unipile**](http
| `search_companies` | Search for companies on LinkedIn by keywords | working |
| `get_company_employees` | List employees at a company from the /people/ page, with optional keyword filter | working |
| `search_jobs` | Search for jobs with keywords and location filters | working |
| `search_people` | Search for people by keywords, location, connection degree (1st/2nd/3rd), and current company | working |
| `search_people` | Search for people by keywords, location, connection degree (1st/2nd/3rd), and current company, with optional multi-page pagination | working |
| `get_job_details` | Get detailed information about a specific job posting | working |
| `get_feed` | Get recent posts from the authenticated user's home feed | working |
| `close_session` | Close browser session and clean up resources | working |
Expand Down
66 changes: 54 additions & 12 deletions linkedin_mcp_server/scraping/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3245,8 +3245,15 @@ async def search_people(
location: str | None = None,
network: list[str] | None = None,
current_company: str | None = None,
max_pages: int = 1,
) -> dict[str, Any]:
"""Search for people and extract the results page.
"""Search for people and extract the results page(s).

Paginates through LinkedIn's people search via the ``&page=N`` URL
parameter (1-based). Each page yields ~10 results; ``max_pages`` caps
how many are fetched. Pagination stops early when a page surfaces no
new ``person`` references (the locale-independent end-of-results
signal), so requesting more pages than exist is harmless.

Args:
keywords: Free-text query ("software engineer", "recruiter at Google").
Expand All @@ -3262,9 +3269,13 @@ async def search_people(
unfiltered result set. Look up a company's URN via
``get_company_profile`` -- it is exposed under
``references["about"]``.
max_pages: Maximum number of result pages to load (default 1).

Returns:
{url, sections: {name: text}}
{url, sections: {search_results: text}} where ``url`` is the
first-page URL and ``search_results`` joins each page's text with
``\\n---\\n``. Optional ``references`` and ``section_errors`` keys
follow the standard tool return shape.
"""
if network is not None:
invalid = [t for t in network if t not in _NETWORK_TOKENS]
Expand All @@ -3290,21 +3301,52 @@ async def search_people(
if current_company:
params += f"&currentCompany={_encode_list_facet([current_company])}"

url = f"https://www.linkedin.com/search/results/people/?{params}"
extracted = await self.extract_page(url, section_name="search_results")
base_url = f"https://www.linkedin.com/search/results/people/?{params}"

sections: dict[str, str] = {}
references: dict[str, list[Reference]] = {}
page_texts: list[str] = []
all_references: list[Reference] = []
seen_person_urls: set[str] = set()
section_errors: dict[str, dict[str, Any]] = {}
if extracted.text and extracted.text != _RATE_LIMITED_MSG:
sections["search_results"] = extracted.text

for page_num in range(max_pages):
if page_num > 0:
await asyncio.sleep(_NAV_DELAY)

url = base_url if page_num == 0 else f"{base_url}&page={page_num + 1}"
extracted = await self.extract_page(url, section_name="search_results")

if not extracted.text or extracted.text == _RATE_LIMITED_MSG:
if extracted.error:
section_errors["search_results"] = extracted.error
# Navigation failed or rate-limited; nothing more to paginate.
break

# End-of-results detection (locale-independent): a page beyond the
# first that surfaces no new /in/ profile anchors means we have run
# past the last page of results.
page_person_urls = {
ref["url"] for ref in extracted.references if ref["kind"] == "person"
}
new_person_urls = page_person_urls - seen_person_urls
if page_num > 0 and not new_person_urls:
logger.debug("No new person results on page %d, stopping", page_num + 1)
break

seen_person_urls |= page_person_urls
page_texts.append(extracted.text)
if extracted.references:
references["search_results"] = extracted.references
elif extracted.error:
section_errors["search_results"] = extracted.error
all_references.extend(extracted.references)

sections: dict[str, str] = {}
references: dict[str, list[Reference]] = {}
if page_texts:
sections["search_results"] = "\n---\n".join(page_texts)
deduped = dedupe_references(all_references)
Comment thread
greptile-apps[bot] marked this conversation as resolved.
if deduped:
references["search_results"] = deduped

result: dict[str, Any] = {
"url": url,
"url": base_url,
"sections": sections,
}
if references:
Expand Down
9 changes: 8 additions & 1 deletion linkedin_mcp_server/tools/person.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ async def search_people(
location: str | None = None,
network: list[str] | None = None,
current_company: str | None = None,
max_pages: Annotated[int, Field(ge=1, le=10)] = 1,
extractor: Any | None = None,
) -> dict[str, Any]:
"""
Expand All @@ -126,6 +127,9 @@ async def search_people(
network: Optional connection-degree filter. Each element is one of
"F" (1st-degree), "S" (2nd-degree), "O" (3rd-degree and beyond).
Example: ["F"] to only return 1st-degree connections.
max_pages: Maximum number of result pages to load (1-10, default 1).
Each page holds ~10 results. Pagination stops early once a page
returns no new people, so over-requesting pages is safe.
current_company: Optional current-employer filter. LinkedIn's
currentCompany facet only filters on the numeric company URN id
(e.g. "1115" for SAP); plain company names are accepted by the
Expand All @@ -144,11 +148,13 @@ async def search_people(
ctx, tool_name="search_people"
)
logger.info(
"Searching people: keywords='%s', location='%s', network=%s, current_company='%s'",
"Searching people: keywords='%s', location='%s', network=%s, "
"current_company='%s', max_pages=%d",
keywords,
location,
network,
current_company,
max_pages,
)

await ctx.report_progress(
Expand All @@ -161,6 +167,7 @@ async def search_people(
location,
network=network,
current_company=current_company,
max_pages=max_pages,
)
except FilterValidationError as e:
# Validation messages carry actionable detail; surface
Expand Down
2 changes: 1 addition & 1 deletion manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
},
{
"name": "search_people",
"description": "Search for people on LinkedIn by keywords, location, connection degree (1st/2nd/3rd), and current company"
"description": "Search for people on LinkedIn by keywords, location, connection degree (1st/2nd/3rd), and current company, with optional multi-page pagination via max_pages"
},
{
"name": "get_inbox",
Expand Down
83 changes: 83 additions & 0 deletions tests/test_scraping.py
Original file line number Diff line number Diff line change
Expand Up @@ -2706,6 +2706,89 @@ async def test_search_people_combines_all_filters(self, mock_page):
assert "network=%5B%22F%22%5D" in result["url"]
assert "currentCompany=%5B%221115%22%5D" in result["url"]

async def test_search_people_default_loads_single_page(self, mock_page):
extractor = LinkedInExtractor(mock_page)
with patch.object(
extractor,
"extract_page",
new_callable=AsyncMock,
return_value=extracted(
"Jane Doe",
[{"kind": "person", "url": "/in/jane/", "text": "Jane Doe"}],
),
) as mock_extract:
result = await extractor.search_people("engineer")

assert mock_extract.await_count == 1
assert "&page=" not in result["url"]
assert result["sections"]["search_results"] == "Jane Doe"

async def test_search_people_paginates_and_joins_pages(self, mock_page):
extractor = LinkedInExtractor(mock_page)
pages = [
extracted(
"Jane Doe",
[{"kind": "person", "url": "/in/jane/", "text": "Jane Doe"}],
),
extracted(
"John Roe",
[
{"kind": "person", "url": "/in/jane/", "text": "Jane Doe"},
{"kind": "person", "url": "/in/john/", "text": "John Roe"},
],
),
]
with (
patch.object(
extractor,
"extract_page",
new_callable=AsyncMock,
side_effect=pages,
) as mock_extract,
patch(
"linkedin_mcp_server.scraping.extractor.asyncio.sleep",
new_callable=AsyncMock,
),
):
result = await extractor.search_people("engineer", max_pages=2)

assert mock_extract.await_count == 2
# Second navigation carries the &page=2 cursor; first one does not.
second_url = mock_extract.await_args_list[1].args[0]
assert "&page=2" in second_url
assert result["sections"]["search_results"] == "Jane Doe\n---\nJohn Roe"
# References are deduped by URL across pages (jane appears on both).
urls = [ref["url"] for ref in result["references"]["search_results"]]
assert urls == ["/in/jane/", "/in/john/"]

async def test_search_people_stops_when_page_adds_no_new_people(self, mock_page):
extractor = LinkedInExtractor(mock_page)
repeated: list[Reference] = [
{"kind": "person", "url": "/in/jane/", "text": "Jane Doe"}
]
pages = [
extracted("Jane Doe", repeated),
# Same person, no new /in/ anchors -> past the last page.
extracted("Jane Doe again", repeated),
extracted("should not be reached", repeated),
]
with (
patch.object(
extractor,
"extract_page",
new_callable=AsyncMock,
side_effect=pages,
) as mock_extract,
patch(
"linkedin_mcp_server.scraping.extractor.asyncio.sleep",
new_callable=AsyncMock,
),
):
result = await extractor.search_people("engineer", max_pages=5)

assert mock_extract.await_count == 2
assert result["sections"]["search_results"] == "Jane Doe"


class TestStripLinkedInNoise:
def test_strips_footer(self):
Expand Down
24 changes: 24 additions & 0 deletions tests/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ async def test_search_people(self, mock_context):
"New York",
network=None,
current_company=None,
max_pages=1,
)

async def test_search_people_with_network_and_company_filters(self, mock_context):
Expand Down Expand Up @@ -297,6 +298,29 @@ async def test_search_people_with_network_and_company_filters(self, mock_context
None,
network=["F"],
current_company="1115",
max_pages=1,
)

async def test_search_people_forwards_max_pages(self, mock_context):
expected = {
"url": "https://www.linkedin.com/search/results/people/?keywords=engineer",
"sections": {"search_results": "Jane Doe\n---\nJohn Roe"},
}
mock_extractor = _make_mock_extractor(expected)

from linkedin_mcp_server.tools.person import register_person_tools

mcp = FastMCP("test")
register_person_tools(mcp)

tool_fn = await get_tool_fn(mcp, "search_people")
await tool_fn("engineer", mock_context, max_pages=3, extractor=mock_extractor)
mock_extractor.search_people.assert_awaited_once_with(
"engineer",
None,
network=None,
current_company=None,
max_pages=3,
)

async def test_search_people_validation_error_surfaced_as_tool_error(
Expand Down
Loading