Skip to content

Commit 2309f9f

Browse files
authored
feat!: extend HTTP extractor (#461)
- Add script global function `execute_request` for being able to execute 1 or more HTTP requests using the request configuration options similar to recipe config. - Instead of only providing the response body to the script, provide the {status_code, header, body}. - Allow configuration of concurrency to control the number of concurrent HTTP requests made from inside the script. BREAKING CHANGE: The response provided to the script in HTTP extractor has the {status_code, header, body} instead of just the response body.
1 parent 72abc18 commit 2309f9f

10 files changed

Lines changed: 619 additions & 116 deletions

File tree

plugins/extractors/http/README.md

Lines changed: 104 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ The user specified script has access to the response, if the API call was
2020
successful, and can use it for constructing and emitting assets using a custom
2121
script. Currently, [Tengo][tengo] is the only supported script engine.
2222

23-
Refer Tengo documentation for script language syntax and supported functionality
24-
\- https://github.com/d5/tengo/tree/v2.13.0#references
25-
. [Tengo standard library modules][tengo-stdlib] can also be imported and used
26-
if required.
23+
Refer Tengo documentation for script language syntax and supported
24+
functionality - https://github.com/d5/tengo/tree/v2.13.0#references.
25+
[Tengo standard library modules][tengo-stdlib] can also be imported and used if
26+
required (except the `os` module).
2727

2828
## Usage
2929

@@ -46,6 +46,7 @@ source:
4646
key: value
4747
timeout: 5s
4848
success_codes: [ 200 ]
49+
concurrency: 3
4950
script:
5051
engine: tengo
5152
source: |
@@ -56,19 +57,26 @@ source:
5657
5758
## Inputs
5859
59-
| Key | Value | Example | Description | Required? |
60-
|:-----------------------|:--------------------|:---------------------------------------|:------------------------------------------------------------------------------------------------|:----------|
61-
| `request.url` | `string` | `http://example.com/api/v1/endpoint` | The HTTP endpoint to send request to | ✅ |
62-
| `request.query_params` | `[]{key, value}` | `[{"key":"s","value":"One Piece"}]` | The query parameters to be added to the request URL. | ✘ |
63-
| `request.method` | `string` | `GET`/`POST` | The HTTP verb/method to use with request. Default is `GET`. | ✘ |
64-
| `request.headers` | `map[string]string` | `{"Api-Token": "..."}` | Headers to send in the HTTP request. | ✘ |
65-
| `request.content_type` | `string` | `application/json` | Content type for encoding request body. Also sent as a header. | ✅ |
66-
| `request.accept` | `string` | `application/json` | Sent as the `Accept` header. Also indicates the format to use for decoding. | ✅ |
67-
| `request.body` | `Object` | `{"key": "value"}` | The request body to be sent. | ✘ |
68-
| `request.timeout` | `string` | `1s` | Timeout for the HTTP request. Default is 5s. | ✘ |
69-
| `success_codes` | `[]int` | `[200]` | The list of status codes that would be considered as a successful response. Default is `[200]`. | ✘ |
70-
| `script.engine` | `string` | `tengo` | Script engine. Only `"tengo"` is supported currently | ✅ |
71-
| `script.source` | `string` | see [Worked Example](#worked-example). | [Tengo][tengo] script used to map the response into 0 or more assets. | ✅ |
60+
| Key | Value | Example | Description | Required? |
61+
|:----------------|:---------|:---------------------------------------|:------------------------------------------------------------------------------------------------|:----------|
62+
| `request` | `Object` | see [Request](#request) | The configuration for constructing and sending HTTP request. | ✅ |
63+
| `success_codes` | `[]int` | `[200]` | The list of status codes that would be considered as a successful response. Default is `[200]`. | ✘ |
64+
| `concurrency` | `int` | `5` | Number of concurrent child requests to execute. Default is `5` | ✘ |
65+
| `script.engine` | `string` | `tengo` | Script engine. Only `"tengo"` is supported currently | ✅ |
66+
| `script.source` | `string` | see [Worked Example](#worked-example). | [Tengo][tengo] script used to map the response into 0 or more assets. | ✅ |
67+
68+
### Request
69+
70+
| Key | Value | Example | Description | Required? |
71+
|:---------------|:--------------------|:-------------------------------------|:----------------------------------------------------------------------------|:----------|
72+
| `url` | `string` | `http://example.com/api/v1/endpoint` | The HTTP endpoint to send request to | ✅ |
73+
| `query_params` | `[]{key, value}` | `[{"key":"s","value":"One Piece"}]` | The query parameters to be added to the request URL. | ✘ |
74+
| `method` | `string` | `GET`/`POST` | The HTTP verb/method to use with request. Default is `GET`. | ✘ |
75+
| `headers` | `map[string]string` | `{"Api-Token": "..."}` | Headers to send in the HTTP request. | ✘ |
76+
| `content_type` | `string` | `application/json` | Content type for encoding request body. Also sent as a header. | ✅ |
77+
| `accept` | `string` | `application/json` | Sent as the `Accept` header. Also indicates the format to use for decoding. | ✅ |
78+
| `body` | `Object` | `{"key": "value"}` | The request body to be sent. | ✘ |
79+
| `timeout` | `string` | `1s` | Timeout for the HTTP request. Default is 5s. | ✘ |
7280

7381
### Notes
7482

@@ -86,7 +94,24 @@ source:
8694

8795
#### `response`
8896

89-
HTTP response received.
97+
HTTP response received with the `status_code`, `header` and `body`. Ex:
98+
99+
```json
100+
{
101+
"status_code": "200",
102+
"header": {
103+
"link": "</products?page=5&perPage=20>;rel=self,</products?page=0&perPage=20>;rel=first,</products?page=4&perPage=20>;rel=previous,</products?page=6&perPage=20>;rel=next,</products?page=26&perPage=20>;rel=last"
104+
},
105+
"body": [
106+
{"id": 1, "name": "Widget #1"},
107+
{"id": 2, "name": "Widget #2"},
108+
{"id": 3, "name": "Widget #3"}
109+
]
110+
}
111+
```
112+
113+
The header names are always in lower case. See
114+
[Worked Example](#worked-example) for detailed usage.
90115

91116
#### `new_asset(string): Asset`
92117

@@ -125,6 +150,60 @@ asset.data.full_name = "Daiyamondo Jozu"
125150
Takes an asset and emits the asset that can then be consumed by the
126151
processor/sink.
127152

153+
#### `execute_request(...requests)`
154+
155+
Takes 1 or more requests and executes the requests with the concurrency defined
156+
in the recipe. The results are returned as an array. Each item in the array can
157+
be an error or the HTTP response. The request object supports the properties
158+
defined in the [Request](#request) input section.
159+
160+
When a request is executed, it can fail due to temporary errors such as network
161+
errors. These instances need to be handled in the script.
162+
163+
[//]: # (@formatter:off)
164+
165+
```go
166+
if !response.body.success {
167+
exit()
168+
}
169+
170+
reqs := []
171+
for j in response.body.jobs {
172+
reqs = append(reqs, {
173+
url: format("http://my.server.com/jobs/%s/config", j.id),
174+
method: "GET",
175+
content_type: "application/json",
176+
accept: "application/json",
177+
timeout: "5s"
178+
})
179+
}
180+
181+
responses := execute_request(reqs...)
182+
for r in responses {
183+
if is_error(r) {
184+
// TODO: Handle it appropriately. The error value has the request and
185+
// error string:
186+
// r.value.{request, error}
187+
continue
188+
}
189+
190+
asset := new_asset("job")
191+
asset.name = r.body.name
192+
exec_cfg := r.body["execution-config"]
193+
asset.data.attributes = {
194+
"job_id": r.body.jid,
195+
"job_parallelism": exec_cfg["job-parallelism"],
196+
"config": exec_cfg["user-config"]
197+
}
198+
emit(asset)
199+
}
200+
```
201+
202+
[//]: # (@formatter:on)
203+
204+
If the request passed to the function fails validation, a runtime error is
205+
thrown.
206+
128207
#### `exit()`
129208

130209
Terminates the script execution.
@@ -200,11 +279,11 @@ source:
200279
script:
201280
engine: tengo
202281
source: |
203-
if !response.success {
282+
if !response.body.success {
204283
exit()
205284
}
206285
207-
users := response.data
286+
users := response.body.data
208287
for u in users {
209288
if u.email == "" {
210289
continue
@@ -229,19 +308,21 @@ source:
229308
manager_id: u.manager_id,
230309
cost_center_id: u.cost_center_id,
231310
supervisory_org_name: u.supervisory_org_name,
232-
location_id: u.location_id
311+
location_id: u.location_id,
312+
service_job_id: response.header["x-job-id"]
233313
}
234314
emit(asset)
235315
}
236316
```
237317

238-
This would emit a 'User' asset for each user object in `response.data`.
318+
This would emit a 'User' asset for each user object in `response.data`. Note
319+
that the response headers can be accessed under `response.header` and can be
320+
used as needed.
239321

240322
## Caveats
241323

242324
The following features are currently not supported:
243325

244-
- Pagination.
245326
- Explicit authentication support, ex: Basic auth/OAuth/OAuth2/JWT etc.
246327
- Retries with configurable backoff.
247328
- Content type for request/response body other than `application/json`.

plugins/extractors/http/execute_request.go

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -7,31 +7,32 @@ import (
77
"fmt"
88
"io"
99
"net/http"
10+
"strings"
1011
)
1112

12-
func (e *Extractor) executeRequest(ctx context.Context) (interface{}, error) {
13-
cfg := e.config
13+
type executeRequestFunc func(ctx context.Context, reqCfg RequestConfig) (map[string]interface{}, error)
1414

15-
ctx, cancel := context.WithTimeout(ctx, cfg.Request.Timeout)
16-
defer cancel()
15+
func makeRequestExecutor(successCodes []int, httpClient *http.Client) executeRequestFunc {
16+
return func(ctx context.Context, reqCfg RequestConfig) (map[string]interface{}, error) {
17+
ctx, cancel := context.WithTimeout(ctx, reqCfg.Timeout)
18+
defer cancel()
1719

18-
req, err := buildRequest(ctx, cfg)
19-
if err != nil {
20-
return nil, err
21-
}
20+
req, err := buildRequest(ctx, reqCfg)
21+
if err != nil {
22+
return nil, err
23+
}
2224

23-
resp, err := e.http.Do(req)
24-
defer drainBody(resp)
25-
if err != nil {
26-
return nil, fmt.Errorf("do request: %w", err)
27-
}
25+
resp, err := httpClient.Do(req)
26+
defer drainBody(resp)
27+
if err != nil {
28+
return nil, fmt.Errorf("do request: %w", err)
29+
}
2830

29-
return handleResponse(cfg, resp)
31+
return handleResponse(successCodes, resp)
32+
}
3033
}
3134

32-
func buildRequest(ctx context.Context, cfg Config) (*http.Request, error) {
33-
reqCfg := cfg.Request
34-
35+
func buildRequest(ctx context.Context, reqCfg RequestConfig) (*http.Request, error) {
3536
body, err := asReader(reqCfg.Body)
3637
if err != nil {
3738
return nil, fmt.Errorf("encode request body: %w", err)
@@ -72,17 +73,26 @@ func addQueryParams(req *http.Request, params []QueryParam) {
7273
req.URL.RawQuery = q.Encode()
7374
}
7475

75-
func handleResponse(cfg Config, resp *http.Response) (interface{}, error) {
76-
if !has(cfg.SuccessCodes, resp.StatusCode) {
76+
func handleResponse(successCodes []int, resp *http.Response) (map[string]interface{}, error) {
77+
if !has(successCodes, resp.StatusCode) {
7778
return nil, fmt.Errorf("unsuccessful request: response status code: %d", resp.StatusCode)
7879
}
7980

80-
var res interface{}
81-
if err := json.NewDecoder(resp.Body).Decode(&res); err != nil {
81+
h := make(map[string]interface{}, len(resp.Header))
82+
for k := range resp.Header {
83+
h[strings.ToLower(k)] = resp.Header.Get(k)
84+
}
85+
86+
var body interface{}
87+
if err := json.NewDecoder(resp.Body).Decode(&body); err != nil {
8288
return nil, fmt.Errorf("decode response: %w", err)
8389
}
8490

85-
return res, nil
91+
return map[string]interface{}{
92+
"status_code": resp.StatusCode,
93+
"header": h,
94+
"body": body,
95+
}, nil
8696
}
8797

8898
func asReader(v interface{}) (io.Reader, error) {

0 commit comments

Comments
 (0)