Skip to content

Commit a72d1b2

Browse files
added 10-K/10-Q section extraction API
1 parent d280eb6 commit a72d1b2

File tree

6 files changed

+108
-6
lines changed

6 files changed

+108
-6
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@ build
55
.pypirc
66
sec_api.egg-info
77
sec_api.egg-info
8-
deploy.sh
8+
deploy.sh
9+
sec_api/__pycache__

README.md

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ It includes:
66
- Query and Full-Text Search API
77
- Real-Time Stream API
88
- XBRL-to-JSON Converter API + Financial Statements
9+
- 10-K/10-Q Section Extraction API
910
- Filing Render & Download API
1011

1112

@@ -356,6 +357,53 @@ Note: response is shortened.
356357

357358
> See the documentation for more details: https://sec-api.io/docs/xbrl-to-json-converter-api
358359
360+
# 10-K/10-Q Section Extractor API
361+
362+
The Extractor API returns individual sections from 10-Q and 10-K filings. The extracted section is cleaned and standardized - in raw text or in standardized HTML. You can programmatically extract one or multiple sections from any 10-Q and 10-K filing.
363+
364+
All 10-K and 10-Q sections can be extracted:
365+
366+
- 1 - Business
367+
- 1A - Risk Factors
368+
- 1B - Unresolved Staff Comments
369+
- 2 - Properties
370+
- 3 - Legal Proceedings
371+
- 4 - Mine Safety Disclosures
372+
- 5 - Market for Registrant’s Common Equity, Related Stockholder Matters and Issuer Purchases of Equity Securities
373+
- 6 - Selected Financial Data (prior to February 2021)
374+
- 7 - Management’s Discussion and Analysis of Financial Condition and Results of Operations
375+
- 7A - Quantitative and Qualitative Disclosures about Market Risk
376+
- 8 - Financial Statements and Supplementary Data
377+
- 9 - Changes in and Disagreements with Accountants on Accounting and Financial Disclosure
378+
- 9A - Controls and Procedures
379+
- 9B - Other Information
380+
- 10 - Directors, Executive Officers and Corporate Governance
381+
- 11 - Executive Compensation
382+
- 12 - Security Ownership of Certain Beneficial Owners and Management and Related Stockholder Matters
383+
- 13 - Certain Relationships and Related Transactions, and Director Independence
384+
- 14 - Principal Accountant Fees and Services
385+
386+
## Usage
387+
388+
```python
389+
from sec_api import ExtractorApi
390+
391+
extractorApi = ExtractorApi("YOUR_API_KEY")
392+
393+
# Tesla 10-K filing
394+
filing_url = "https://www.sec.gov/Archives/edgar/data/1318605/000156459021004599/tsla-10k_20201231.htm"
395+
396+
# get the standardized and cleaned text of section 1A "Risk Factors"
397+
section_text = extractorApi.get_section(filing_url, "1A", "text")
398+
399+
# get the original HTML of section 7 "Management’s Discussion and Analysis of Financial Condition and Results of Operations"
400+
section_html = extractorApi.get_section(filing_url, "7", "html")
401+
402+
print(section_text)
403+
print(section_html)
404+
```
405+
406+
> See the documentation for more details: https://sec-api.io/docs/sec-filings-item-extraction-api
359407
360408

361409
# Filing Render & Download API
@@ -377,7 +425,7 @@ print(filing)
377425
> See the documentation for more details: https://sec-api.io/docs/sec-filings-render-api
378426
379427

380-
# Response Format
428+
# Query API Response Format
381429

382430
- `accessionNo` (string) - Accession number of filing, e.g. 0000028917-20-000033
383431
- `cik` (string) - CIK of the filing issuer. Important: trailing `0` are removed.
@@ -572,7 +620,7 @@ print(filing)
572620

573621
# Contact
574622

575-
Let me know how I can improve the library or if you have any feature
576-
suggestions. I'm happy to implement them.
623+
Let us know how we can improve the library or if you have any feature
624+
suggestions. We're happy to implement them.
577625

578626

examples.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
1-
from sec_api.index import XbrlApi
1+
from sec_api.index import XbrlApi, ExtractorApi
22

3+
#
4+
# XBRL-to-JSON API example
5+
#
6+
7+
# """
38
xbrlApi = XbrlApi("YOUR_API_KEY")
49

10+
511
# 10-K HTM File URL example
612
xbrl_json_1 = xbrlApi.xbrl_to_json(
713
htm_url="https://www.sec.gov/Archives/edgar/data/320193/000032019320000096/aapl-20200926.htm"
@@ -18,3 +24,21 @@
1824

1925
# 10-K XBRL File URL example
2026
xbrl_json_3 = xbrlApi.xbrl_to_json(accession_no="0001564590-21-004599")
27+
# """
28+
29+
#
30+
# Extractor API Example
31+
#
32+
33+
# """
34+
extractorApi = ExtractorApi("YOUR_API_KEY")
35+
36+
# Tesla 10-K filing
37+
filing_url = "https://www.sec.gov/Archives/edgar/data/1318605/000156459021004599/tsla-10k_20201231.htm"
38+
39+
section_text = extractorApi.get_section(filing_url, "1A", "text")
40+
section_html = extractorApi.get_section(filing_url, "1A", "html")
41+
42+
print(section_text)
43+
print(section_html)
44+
# """

sec_api/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
from sec_api.index import FullTextSearchApi
44
from sec_api.index import RenderApi
55
from sec_api.index import XbrlApi
6+
from sec_api.index import ExtractorApi

sec_api/index.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
full_text_search_api_endpoint = "https://api.sec-api.io/full-text-search"
66
render_api_endpoint = "https://api.sec-api.io/filing-reader"
77
xbrl_api_endpoint = "https://api.sec-api.io/xbrl-to-json"
8+
extractor_api_endpoint = "https://api.sec-api.io/extractor"
89

910

1011
class QueryApi:
@@ -76,3 +77,30 @@ def xbrl_to_json(self, htm_url="", xbrl_url="", accession_no=""):
7677

7778
response = requests.get(_url)
7879
return json.loads(response.text)
80+
81+
82+
class ExtractorApi:
83+
"""
84+
Base class for 10-K/10-Q item/section extractor API
85+
"""
86+
87+
def __init__(self, api_key):
88+
self.api_key = api_key
89+
self.api_endpoint = extractor_api_endpoint + "?token=" + api_key
90+
91+
def get_section(self, filing_url="", section="1A", return_type="text"):
92+
if len(filing_url) == 0:
93+
raise ValueError("filing_url must be present")
94+
95+
_url = (
96+
self.api_endpoint
97+
+ "&url="
98+
+ filing_url
99+
+ "&item="
100+
+ section
101+
+ "&type="
102+
+ return_type
103+
)
104+
105+
response = requests.get(_url)
106+
return response.text

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup(
77
name="sec-api",
8-
version="1.0.6",
8+
version="1.0.7",
99
author="SEC API",
1010
author_email="[email protected]",
1111
description="SEC EDGAR Filings API",

0 commit comments

Comments
 (0)