Skip to content

Commit abd44ee

Browse files
feat(gmail): add --full flag and HTML stripping for thread display
- Add HTML stripping patterns to remove script/style blocks and tags - Add --full flag to show complete message bodies without truncation - Default behavior now truncates to 500 chars with hint to use --full - Use runes for truncation to avoid breaking UTF-8 characters - Show message count upfront (e.g., "Thread contains 5 message(s)") - Improve message header format (=== Message 1/5: <id> ===) - Add comprehensive tests for stripHTMLTags() 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 19d833f commit abd44ee

2 files changed

Lines changed: 93 additions & 3 deletions

File tree

internal/cmd/gmail_thread.go

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"net/url"
99
"os"
1010
"path/filepath"
11+
"regexp"
1112
"strings"
1213

1314
"google.golang.org/api/gmail/v1"
@@ -16,6 +17,22 @@ import (
1617
"github.com/steipete/gogcli/internal/ui"
1718
)
1819

20+
// HTML stripping patterns for cleaner text output.
21+
var (
22+
scriptPattern = regexp.MustCompile(`(?is)<script[^>]*>.*?</script>`)
23+
stylePattern = regexp.MustCompile(`(?is)<style[^>]*>.*?</style>`)
24+
htmlTagPattern = regexp.MustCompile(`<[^>]*>`)
25+
whitespacePattern = regexp.MustCompile(`\s+`)
26+
)
27+
28+
func stripHTMLTags(s string) string {
29+
s = scriptPattern.ReplaceAllString(s, "")
30+
s = stylePattern.ReplaceAllString(s, "")
31+
s = htmlTagPattern.ReplaceAllString(s, " ")
32+
s = whitespacePattern.ReplaceAllString(s, " ")
33+
return strings.TrimSpace(s)
34+
}
35+
1936
type GmailThreadCmd struct {
2037
Get GmailThreadGetCmd `cmd:"" name:"get" help:"Get a thread with all messages (optionally download attachments)"`
2138
Modify GmailThreadModifyCmd `cmd:"" name:"modify" help:"Modify labels on all messages in a thread"`
@@ -25,6 +42,7 @@ type GmailThreadGetCmd struct {
2542
ThreadID string `arg:"" name:"threadId" help:"Thread ID"`
2643
Download bool `name:"download" help:"Download attachments"`
2744
OutDir string `name:"out-dir" help:"Directory to write attachments to (default: current directory)"`
45+
Full bool `name:"full" help:"Show complete message bodies without truncation"`
2846
}
2947

3048
func (c *GmailThreadGetCmd) Run(ctx context.Context, flags *RootFlags) error {
@@ -103,11 +121,15 @@ func (c *GmailThreadGetCmd) Run(ctx context.Context, flags *RootFlags) error {
103121
return nil
104122
}
105123

106-
for _, msg := range thread.Messages {
124+
// Show message count upfront so users know how many messages to expect
125+
u.Out().Printf("Thread contains %d message(s)\n", len(thread.Messages))
126+
u.Out().Println("")
127+
128+
for i, msg := range thread.Messages {
107129
if msg == nil {
108130
continue
109131
}
110-
u.Out().Printf("Message: %s", msg.Id)
132+
u.Out().Printf("=== Message %d/%d: %s ===", i+1, len(thread.Messages), msg.Id)
111133
u.Out().Printf("From: %s", headerValue(msg.Payload, "From"))
112134
u.Out().Printf("To: %s", headerValue(msg.Payload, "To"))
113135
u.Out().Printf("Subject: %s", headerValue(msg.Payload, "Subject"))
@@ -116,7 +138,17 @@ func (c *GmailThreadGetCmd) Run(ctx context.Context, flags *RootFlags) error {
116138

117139
body := bestBodyText(msg.Payload)
118140
if body != "" {
119-
u.Out().Println(body)
141+
// Strip HTML tags for cleaner text output
142+
cleanBody := stripHTMLTags(body)
143+
// Truncate unless --full is specified
144+
if !c.Full {
145+
// Use runes to avoid breaking multi-byte UTF-8 characters
146+
runes := []rune(cleanBody)
147+
if len(runes) > 500 {
148+
cleanBody = string(runes[:500]) + "... [truncated, use --full for complete output]"
149+
}
150+
}
151+
u.Out().Println(cleanBody)
120152
u.Out().Println("")
121153
}
122154

internal/cmd/gmail_thread_test.go

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,61 @@ func TestDecodeBase64URL(t *testing.T) {
6161
t.Fatalf("expected error")
6262
}
6363
}
64+
65+
func TestStripHTMLTags(t *testing.T) {
66+
tests := []struct {
67+
name string
68+
input string
69+
want string
70+
}{
71+
{
72+
name: "basic HTML tags",
73+
input: "<p>Hello</p>",
74+
want: "Hello",
75+
},
76+
{
77+
name: "script block removed",
78+
input: "<script>alert(1)</script>text",
79+
want: "text",
80+
},
81+
{
82+
name: "style block removed",
83+
input: "<style>body{color:red}</style>content",
84+
want: "content",
85+
},
86+
{
87+
name: "nested tags",
88+
input: "<div><span>text</span></div>",
89+
want: "text",
90+
},
91+
{
92+
name: "plain text unchanged",
93+
input: "plain text",
94+
want: "plain text",
95+
},
96+
{
97+
name: "empty input",
98+
input: "",
99+
want: "",
100+
},
101+
{
102+
name: "whitespace collapsed",
103+
input: "<p>hello</p> <p>world</p>",
104+
want: "hello world",
105+
},
106+
{
107+
name: "complex HTML email",
108+
input: "<html><head><style>.foo{}</style></head><body><p>Hi there</p></body></html>",
109+
want: "Hi there",
110+
},
111+
}
112+
113+
for _, tt := range tests {
114+
t.Run(tt.name, func(t *testing.T) {
115+
got := stripHTMLTags(tt.input)
116+
if got != tt.want {
117+
t.Errorf("stripHTMLTags(%q) = %q, want %q", tt.input, got, tt.want)
118+
}
119+
})
120+
}
121+
}

0 commit comments

Comments
 (0)