Skip to content

Commit 13ee3b3

Browse files
umputunclaude
andcommitted
Refactor monolithic code into maintainable Go modules with improved test coverage
- Split 1066-line main.go into focused domain files (article.go, audio.go, openai.go, text.go) - Move interfaces to consumer side following Go conventions - Replace functions with many parameters using parameter structs - Create separate podcast/types.go package to prevent circular dependencies - Generate mocks using moq for all interfaces - Increase test coverage from 14.6% to 23.8% - Add comprehensive tests for OpenAI service, text processing, and article fetching - Update linter configuration for better code quality 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 3bb381a commit 13ee3b3

17 files changed

Lines changed: 2271 additions & 1048 deletions

.golangci.yml

Lines changed: 78 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,89 +1,84 @@
1-
linters-settings:
2-
govet:
3-
shadow: true
4-
golint:
5-
min-confidence: 0.6
6-
gocyclo:
7-
min-complexity: 15
8-
maligned:
9-
suggest-new: true
10-
dupl:
11-
threshold: 100
12-
goconst:
13-
min-len: 2
14-
min-occurrences: 2
15-
misspell:
16-
locale: US
17-
lll:
18-
line-length: 140
19-
gocritic:
20-
enabled-tags:
21-
- performance
22-
- style
23-
- experimental
24-
disabled-checks:
25-
- wrapperFunc
26-
- hugeParam
27-
- rangeValCopy
28-
1+
version: "2"
2+
run:
3+
concurrency: 4
294
linters:
30-
disable-all: true
5+
default: none
316
enable:
32-
- revive
33-
- govet
34-
- unconvert
35-
- gosec
36-
- misspell
37-
- unused
38-
- typecheck
39-
- ineffassign
40-
- stylecheck
417
- gochecknoinits
428
- gocritic
9+
- gosec
10+
- govet
11+
- ineffassign
12+
- misspell
4313
- nakedret
44-
- gosimple
4514
- prealloc
46-
47-
fast: false
48-
49-
50-
run:
51-
# modules-download-mode: vendor
52-
53-
concurrency: 4
54-
55-
issues:
56-
exclude-dirs:
57-
- vendor
58-
exclude-rules:
59-
- text: "should have a package comment, unless it's in another file for this package"
60-
linters:
61-
- golint
62-
- text: "exitAfterDefer:"
63-
linters:
64-
- gocritic
65-
- text: "whyNoLint: include an explanation for nolint directive"
66-
linters:
67-
- gocritic
68-
- text: "go.mongodb.org/mongo-driver/bson/primitive.E"
69-
linters:
70-
- govet
71-
- text: "weak cryptographic primitive"
72-
linters:
73-
- gosec
74-
- text: "at least one file in a package should have a package comment"
75-
linters:
76-
- stylecheck
77-
- text: "package-comments: should have a package comment"
78-
linters:
79-
- revive
80-
- text: 'Deferring unsafe method "Close" on type "io.ReadCloser"'
81-
linters:
82-
- gosec
83-
- linters:
84-
- unparam
85-
- unused
86-
- revive
87-
path: _test\.go$
88-
text: "unused-parameter"
89-
exclude-use-default: false
15+
- revive
16+
- staticcheck
17+
- unconvert
18+
- unused
19+
settings:
20+
dupl:
21+
threshold: 100
22+
goconst:
23+
min-len: 2
24+
min-occurrences: 2
25+
gocritic:
26+
disabled-checks:
27+
- wrapperFunc
28+
- hugeParam
29+
- rangeValCopy
30+
enabled-tags:
31+
- performance
32+
- style
33+
- experimental
34+
gocyclo:
35+
min-complexity: 15
36+
lll:
37+
line-length: 140
38+
misspell:
39+
locale: US
40+
exclusions:
41+
generated: lax
42+
rules:
43+
- linters:
44+
- revive
45+
text: should have a package comment, unless it's in another file for this package
46+
- linters:
47+
- gocritic
48+
text: 'exitAfterDefer:'
49+
- linters:
50+
- gocritic
51+
text: 'whyNoLint: include an explanation for nolint directive'
52+
- linters:
53+
- govet
54+
text: go.mongodb.org/mongo-driver/bson/primitive.E
55+
- linters:
56+
- gosec
57+
text: weak cryptographic primitive
58+
- linters:
59+
- staticcheck
60+
text: at least one file in a package should have a package comment
61+
- linters:
62+
- revive
63+
text: 'package-comments: should have a package comment'
64+
- linters:
65+
- gosec
66+
text: Deferring unsafe method "Close" on type "io.ReadCloser"
67+
- linters:
68+
- revive
69+
- unparam
70+
- unused
71+
path: _test\.go$
72+
text: unused-parameter
73+
paths:
74+
- vendor
75+
- third_party$
76+
- builtin$
77+
- examples$
78+
formatters:
79+
exclusions:
80+
generated: lax
81+
paths:
82+
- third_party$
83+
- builtin$
84+
- examples$

article.go

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
"net/http"
6+
"strings"
7+
8+
"github.com/PuerkitoBio/goquery"
9+
)
10+
11+
// HTTPArticleFetcher implements article fetching using HTTP
12+
type HTTPArticleFetcher struct {
13+
client *http.Client
14+
}
15+
16+
// NewHTTPArticleFetcher creates a new HTTP article fetcher
17+
func NewHTTPArticleFetcher(client *http.Client) *HTTPArticleFetcher {
18+
if client == nil {
19+
client = http.DefaultClient
20+
}
21+
return &HTTPArticleFetcher{client: client}
22+
}
23+
24+
// Fetch downloads and extracts text from the given URL
25+
func (f *HTTPArticleFetcher) Fetch(url string) (content, title string, err error) {
26+
// #nosec G107 -- URL is provided by command-line flag
27+
resp, err := f.client.Get(url)
28+
if err != nil {
29+
return "", "", fmt.Errorf("failed to fetch URL: %w", err)
30+
}
31+
defer resp.Body.Close()
32+
33+
if resp.StatusCode != http.StatusOK {
34+
return "", "", fmt.Errorf("failed to fetch article: status code %d", resp.StatusCode)
35+
}
36+
37+
// parse the HTML
38+
doc, err := goquery.NewDocumentFromReader(resp.Body)
39+
if err != nil {
40+
return "", "", fmt.Errorf("failed to parse HTML: %w", err)
41+
}
42+
43+
// extract title
44+
title = doc.Find("title").Text()
45+
46+
// extract article content
47+
content = f.extractContent(doc)
48+
49+
// limit article length for API calls
50+
const maxContentLength = 8000
51+
if len(content) > maxContentLength {
52+
content = content[:maxContentLength] + "..."
53+
}
54+
55+
return content, title, nil
56+
}
57+
58+
// extractContent extracts the main text content from the HTML document
59+
func (f *HTTPArticleFetcher) extractContent(doc *goquery.Document) string {
60+
var articleText strings.Builder
61+
62+
// first try to find article content in common containers
63+
article := doc.Find("article, .article, .post, .content, main")
64+
if article.Length() > 0 {
65+
article.Find("p").Each(func(_ int, s *goquery.Selection) {
66+
articleText.WriteString(s.Text())
67+
articleText.WriteString("\n\n")
68+
})
69+
} else {
70+
// fallback to all paragraphs
71+
doc.Find("p").Each(func(_ int, s *goquery.Selection) {
72+
// skip very short paragraphs which are likely not article content
73+
if len(s.Text()) > 50 {
74+
articleText.WriteString(s.Text())
75+
articleText.WriteString("\n\n")
76+
}
77+
})
78+
}
79+
80+
return strings.TrimSpace(articleText.String())
81+
}

article_test.go

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
package main
2+
3+
import (
4+
"io"
5+
"net/http"
6+
"net/http/httptest"
7+
"strings"
8+
"testing"
9+
10+
"github.com/stretchr/testify/assert"
11+
)
12+
13+
func TestHTTPArticleFetcher_Fetch(t *testing.T) {
14+
tests := []struct {
15+
name string
16+
html string
17+
statusCode int
18+
expectedTitle string
19+
expectedContent string
20+
expectError bool
21+
}{
22+
{
23+
name: "successful fetch with article tag",
24+
html: `<html>
25+
<head><title>Test Article</title></head>
26+
<body>
27+
<article>
28+
<p>This is the first paragraph of the article.</p>
29+
<p>This is the second paragraph with more content.</p>
30+
</article>
31+
</body>
32+
</html>`,
33+
statusCode: http.StatusOK,
34+
expectedTitle: "Test Article",
35+
expectedContent: "This is the first paragraph of the article.\n\nThis is the second paragraph with more content.",
36+
expectError: false,
37+
},
38+
{
39+
name: "successful fetch with class content",
40+
html: `<html>
41+
<head><title>Another Article</title></head>
42+
<body>
43+
<div class="content">
44+
<p>Content paragraph one.</p>
45+
<p>Content paragraph two with enough text to be included.</p>
46+
</div>
47+
</body>
48+
</html>`,
49+
statusCode: http.StatusOK,
50+
expectedTitle: "Another Article",
51+
expectedContent: "Content paragraph one.\n\nContent paragraph two with enough text to be included.",
52+
expectError: false,
53+
},
54+
{
55+
name: "fallback to all paragraphs",
56+
html: `<html>
57+
<head><title>Simple Page</title></head>
58+
<body>
59+
<p>Short.</p>
60+
<p>This is a longer paragraph that should be included in the content extraction.</p>
61+
<p>Another long paragraph with sufficient content to pass the length filter.</p>
62+
</body>
63+
</html>`,
64+
statusCode: http.StatusOK,
65+
expectedTitle: "Simple Page",
66+
expectedContent: "This is a longer paragraph that should be included in the content extraction.\n\nAnother long paragraph with sufficient content to pass the length filter.",
67+
expectError: false,
68+
},
69+
{
70+
name: "error status code",
71+
html: "<html><body>Not Found</body></html>",
72+
statusCode: http.StatusNotFound,
73+
expectedTitle: "",
74+
expectedContent: "",
75+
expectError: true,
76+
},
77+
{
78+
name: "content length limit",
79+
html: `<html>
80+
<head><title>Long Article</title></head>
81+
<body>
82+
<article>
83+
<p>` + strings.Repeat("A", 9000) + `</p>
84+
</article>
85+
</body>
86+
</html>`,
87+
statusCode: http.StatusOK,
88+
expectedTitle: "Long Article",
89+
expectedContent: strings.Repeat("A", 8000) + "...",
90+
expectError: false,
91+
},
92+
}
93+
94+
for _, tc := range tests {
95+
t.Run(tc.name, func(t *testing.T) {
96+
// create test server
97+
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
98+
w.WriteHeader(tc.statusCode)
99+
_, _ = w.Write([]byte(tc.html))
100+
}))
101+
defer server.Close()
102+
103+
// create fetcher
104+
fetcher := NewHTTPArticleFetcher(server.Client())
105+
106+
// fetch article
107+
content, title, err := fetcher.Fetch(server.URL)
108+
109+
if tc.expectError {
110+
assert.Error(t, err)
111+
} else {
112+
assert.NoError(t, err)
113+
assert.Equal(t, tc.expectedTitle, title)
114+
assert.Equal(t, tc.expectedContent, strings.TrimSpace(content))
115+
}
116+
})
117+
}
118+
}
119+
120+
func TestHTTPArticleFetcher_FetchWithNetworkError(t *testing.T) {
121+
// create fetcher with custom client that always fails
122+
client := &http.Client{
123+
Transport: &failingTransport{},
124+
}
125+
fetcher := NewHTTPArticleFetcher(client)
126+
127+
content, title, err := fetcher.Fetch("http://example.com")
128+
129+
assert.Error(t, err)
130+
assert.Contains(t, err.Error(), "failed to fetch URL")
131+
assert.Empty(t, content)
132+
assert.Empty(t, title)
133+
}
134+
135+
// failingTransport is a custom transport that always returns an error
136+
type failingTransport struct{}
137+
138+
func (f *failingTransport) RoundTrip(req *http.Request) (*http.Response, error) {
139+
return nil, io.ErrUnexpectedEOF
140+
}

0 commit comments

Comments
 (0)