Skip to content

Using -v should output the status code of the url #248

Closed
@yuzhe-Mortal

Description

@yuzhe-Mortal

image

image

func (c *Crawler) getRequest(ctx context.Context, request navigation.Request, rootHostname string, depth int, httpclient *retryablehttp.Client) (navigation.Response, error) {
	response := navigation.Response{
		Depth:        request.Depth + 1,
		Options:      c.options,
		RootHostname: rootHostname,
	}
	ctx = context.WithValue(ctx, navigation.Depth{}, depth)
	httpReq, err := http.NewRequestWithContext(ctx, request.Method, request.URL, nil)
	if err != nil {
		return response, err
	}
	if request.Body != "" && request.Method != "GET" {
		httpReq.Body = io.NopCloser(strings.NewReader(request.Body))
	}
	req, err := retryablehttp.FromRequest(httpReq)
	if err != nil {
		return response, err
	}
	req.Header.Set("User-Agent", utils.WebUserAgent())

	for k, v := range request.Headers {
		req.Header.Set(k, v)
	}
	for k, v := range c.headers {
		req.Header.Set(k, v)
	}
	resp, err := httpclient.Do(req)
	if resp != nil {
		defer func() {
			if resp.Body != nil && resp.StatusCode != http.StatusSwitchingProtocols {
				_, _ = io.CopyN(io.Discard, resp.Body, 8*1024)
			}
			_ = resp.Body.Close()
		}()
	}
	if err != nil {
		return response, err
	}
	if resp.StatusCode == http.StatusSwitchingProtocols {
		return response, nil
	}
	limitReader := io.LimitReader(resp.Body, int64(c.options.Options.BodyReadSize))
	data, err := io.ReadAll(limitReader)
	if err != nil {
		return response, err
	}

	response.Body = data
	response.Resp = resp
	response.Reader, err = goquery.NewDocumentFromReader(bytes.NewReader(data))
	if err != nil {
		return response, errors.Wrap(err, "could not make document from reader")
	}
	return response, nil
}

Metadata

Metadata

Assignees

Labels

Status: AbandonedThis issue is no longer important to the requestor and no one else has shown an interest in it.Type: EnhancementMost issues will probably ask for additions or changes.Type: QuestionA query or seeking clarification on parts of the spec. Probably doesn't need the attention of all.

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions