Skip to content

Commit 408fb67

Browse files
committed
feat: add discord attachment and mention indexing
1 parent b357916 commit 408fb67

17 files changed

Lines changed: 888 additions & 28 deletions

README.md

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ It is a bot-token crawler. No user-token hacks. Data stays local.
99
- discovers every guild the configured bot can access
1010
- syncs channels, threads, members, and message history into SQLite
1111
- maintains FTS5 search indexes for fast local text search
12+
- extracts small text-like attachments into the local search index
13+
- records structured user and role mentions for direct querying
1214
- tails Gateway events for live updates, with periodic repair syncs
1315
- exposes read-only SQL for ad hoc analysis
1416
- keeps schema multi-guild ready while preserving a simple single-guild default UX
@@ -163,7 +165,7 @@ bin/discrawl search --include-empty "GitHub"
163165
bin/discrawl --json search "websocket closed"
164166
```
165167

166-
By default, `search` skips empty-body rows such as attachment-only or system-style messages. Use `--include-empty` to opt back in.
168+
By default, `search` skips rows with no searchable content. Attachment text, attachment filenames, embeds, and replies still count as content. Use `--include-empty` to opt back in.
167169

168170
### `messages`
169171

@@ -182,9 +184,26 @@ Notes:
182184
- `--channel` accepts a channel id, exact name, `#name`, or partial name match
183185
- `--days` is shorthand for "since now minus N days"
184186
- `--all` removes the safety limit; default is `200`
185-
- empty-body rows are skipped by default; `--include-empty` opts back in
187+
- rows with no displayable/searchable content are skipped by default; `--include-empty` opts back in
186188
- at least one filter is required
187189

190+
### `mentions`
191+
192+
Lists structured user and role mentions.
193+
194+
```bash
195+
bin/discrawl mentions --channel maintainers --days 7
196+
bin/discrawl mentions --target steipete --type user --limit 50
197+
bin/discrawl mentions --target 1456406468898197625
198+
bin/discrawl --json mentions --type role --days 1
199+
```
200+
201+
Notes:
202+
203+
- `--target` accepts an id, exact name, or partial name match
204+
- `--type` can be `user` or `role`
205+
- same guild/time filters as `messages`
206+
188207
### `sql`
189208

190209
Runs read-only SQL against the local database.

internal/cli/cli.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ func (r *runtime) dispatch(rest []string) error {
124124
return r.withServices(false, func() error { return r.runSearch(rest[1:]) })
125125
case "messages":
126126
return r.withServices(false, func() error { return r.runMessages(rest[1:]) })
127+
case "mentions":
128+
return r.withServices(false, func() error { return r.runMentions(rest[1:]) })
127129
case "sql":
128130
return r.withServices(false, func() error { return r.runSQL(rest[1:]) })
129131
case "members":

internal/cli/cli_test.go

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,14 +75,42 @@ func TestStatusSearchSQLAndListings(t *testing.T) {
7575
NormalizedContent: "",
7676
RawJSON: `{"author":{"username":"Peter"}}`,
7777
}))
78+
require.NoError(t, s.UpsertMessages(ctx, []store.MessageMutation{{
79+
Record: store.MessageRecord{
80+
ID: "m3",
81+
GuildID: "g1",
82+
ChannelID: "c1",
83+
ChannelName: "general",
84+
AuthorID: "u1",
85+
AuthorName: "Peter",
86+
MessageType: 0,
87+
CreatedAt: time.Now().UTC().Add(2 * time.Second).Format(time.RFC3339Nano),
88+
Content: "",
89+
NormalizedContent: "trace.txt stack trace line one",
90+
HasAttachments: true,
91+
RawJSON: `{"author":{"username":"Peter"}}`,
92+
},
93+
Mentions: []store.MentionEventRecord{{
94+
MessageID: "m3",
95+
GuildID: "g1",
96+
ChannelID: "c1",
97+
AuthorID: "u1",
98+
TargetType: "user",
99+
TargetID: "u2",
100+
TargetName: "Shadow",
101+
EventAt: time.Now().UTC().Add(2 * time.Second).Format(time.RFC3339Nano),
102+
}},
103+
}}))
78104
require.NoError(t, s.Close())
79105

80106
tests := [][]string{
81107
{"--config", cfgPath, "status"},
82108
{"--config", cfgPath, "search", "panic"},
109+
{"--config", cfgPath, "search", "stack"},
83110
{"--config", cfgPath, "search", "--include-empty", "Peter"},
84111
{"--config", cfgPath, "messages", "--channel", "general", "--days", "7", "--all"},
85112
{"--config", cfgPath, "messages", "--channel", "general", "--days", "7", "--all", "--include-empty"},
113+
{"--config", cfgPath, "mentions", "--target", "Shadow", "--limit", "10"},
86114
{"--config", cfgPath, "sql", "select count(*) as total from messages"},
87115
{"--config", cfgPath, "members", "list"},
88116
{"--config", cfgPath, "channels", "list"},
@@ -273,6 +301,7 @@ func TestRuntimeHelpersAndSubcommands(t *testing.T) {
273301
rt.now = func() time.Time { return time.Date(2026, 3, 8, 12, 0, 0, 0, time.UTC) }
274302
require.NoError(t, rt.runMessages([]string{"--channel", "#general", "--days", "7", "--all"}))
275303
require.NoError(t, rt.runMessages([]string{"--channel", "#general", "--days", "7", "--all", "--include-empty"}))
304+
require.NoError(t, rt.runMentions([]string{"--channel", "#general", "--target", "u2"}))
276305
require.NoError(t, rt.runSearch([]string{"--include-empty", "Peter"}))
277306
require.NoError(t, rt.runChannels([]string{"show", "c1"}))
278307
require.NoError(t, rt.runChannels([]string{"list"}))
@@ -281,6 +310,19 @@ func TestRuntimeHelpersAndSubcommands(t *testing.T) {
281310
}))
282311
}
283312

313+
func TestRunMentionsValidation(t *testing.T) {
314+
t.Parallel()
315+
316+
rt := &runtime{stderr: &bytes.Buffer{}}
317+
rt.now = func() time.Time { return time.Date(2026, 3, 8, 12, 0, 0, 0, time.UTC) }
318+
319+
require.Equal(t, 2, ExitCode(rt.runMentions([]string{"--days", "-1", "--target", "u1"})))
320+
require.Equal(t, 2, ExitCode(rt.runMentions([]string{"--days", "1", "--since", "2026-03-01T00:00:00Z", "--target", "u1"})))
321+
require.Equal(t, 2, ExitCode(rt.runMentions([]string{"--since", "bad", "--target", "u1"})))
322+
require.Equal(t, 2, ExitCode(rt.runMentions([]string{"--type", "nope", "--target", "u1"})))
323+
require.Equal(t, 2, ExitCode(rt.runMentions([]string{})))
324+
}
325+
284326
func TestPrintJSONAndPlain(t *testing.T) {
285327
t.Parallel()
286328

@@ -296,6 +338,10 @@ func TestPrintJSONAndPlain(t *testing.T) {
296338
require.NoError(t, rt.print([]store.SearchResult{{GuildID: "g1", ChannelName: "general", AuthorName: "Peter", Content: "hello"}}))
297339
require.Contains(t, rt.stdout.(*bytes.Buffer).String(), "hello")
298340

341+
rt = &runtime{stdout: &bytes.Buffer{}}
342+
require.NoError(t, rt.print([]store.MentionRow{{GuildID: "g1", ChannelName: "general", AuthorName: "Peter", TargetType: "user", TargetName: "Shadow", Content: "hello"}}))
343+
require.Contains(t, rt.stdout.(*bytes.Buffer).String(), "Shadow")
344+
299345
rt = &runtime{stdout: &bytes.Buffer{}, plain: true}
300346
require.NoError(t, rt.print([]store.MemberRow{{GuildID: "g1", UserID: "u1", Username: "peter"}}))
301347
require.Contains(t, rt.stdout.(*bytes.Buffer).String(), "peter")
@@ -308,6 +354,10 @@ func TestPrintJSONAndPlain(t *testing.T) {
308354
require.NoError(t, rt.print([]store.MessageRow{{GuildID: "g1", ChannelID: "c1", AuthorID: "u1", MessageID: "m1", Content: "hello", CreatedAt: time.Unix(1, 0).UTC()}}))
309355
require.Contains(t, rt.stdout.(*bytes.Buffer).String(), "m1")
310356

357+
rt = &runtime{stdout: &bytes.Buffer{}, plain: true}
358+
require.NoError(t, rt.print([]store.MentionRow{{GuildID: "g1", ChannelID: "c1", AuthorID: "u1", TargetType: "user", TargetID: "u2", Content: "hello", CreatedAt: time.Unix(1, 0).UTC()}}))
359+
require.Contains(t, rt.stdout.(*bytes.Buffer).String(), "u2")
360+
311361
rt = &runtime{stdout: &bytes.Buffer{}}
312362
require.NoError(t, rt.print(struct{ OK bool }{OK: true}))
313363
require.Contains(t, rt.stdout.(*bytes.Buffer).String(), "\"OK\": true")

internal/cli/mentions.go

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
package cli
2+
3+
import (
4+
"flag"
5+
"fmt"
6+
"io"
7+
"strings"
8+
"time"
9+
10+
"github.com/steipete/discrawl/internal/store"
11+
)
12+
13+
func (r *runtime) runMentions(args []string) error {
14+
fs := flag.NewFlagSet("mentions", flag.ContinueOnError)
15+
fs.SetOutput(io.Discard)
16+
channel := fs.String("channel", "", "")
17+
author := fs.String("author", "", "")
18+
target := fs.String("target", "", "")
19+
targetType := fs.String("type", "", "")
20+
days := fs.Int("days", 0, "")
21+
since := fs.String("since", "", "")
22+
before := fs.String("before", "", "")
23+
limit := fs.Int("limit", defaultMessageLimit, "")
24+
guildsFlag := fs.String("guilds", "", "")
25+
guildFlag := fs.String("guild", "", "")
26+
if err := fs.Parse(args); err != nil {
27+
return usageErr(err)
28+
}
29+
if fs.NArg() != 0 {
30+
return usageErr(fmt.Errorf("mentions takes flags only"))
31+
}
32+
if *days < 0 {
33+
return usageErr(fmt.Errorf("--days must be >= 0"))
34+
}
35+
if *days > 0 && strings.TrimSpace(*since) != "" {
36+
return usageErr(fmt.Errorf("use either --days or --since"))
37+
}
38+
if *limit < 0 {
39+
return usageErr(fmt.Errorf("--limit must be >= 0"))
40+
}
41+
if targetTypeValue := strings.TrimSpace(*targetType); targetTypeValue != "" && targetTypeValue != "user" && targetTypeValue != "role" {
42+
return usageErr(fmt.Errorf("--type must be user or role"))
43+
}
44+
45+
var sinceTime time.Time
46+
var beforeTime time.Time
47+
var err error
48+
if *days > 0 {
49+
now := time.Now().UTC()
50+
if r.now != nil {
51+
now = r.now().UTC()
52+
}
53+
sinceTime = now.Add(-time.Duration(*days) * 24 * time.Hour)
54+
}
55+
if strings.TrimSpace(*since) != "" {
56+
sinceTime, err = time.Parse(time.RFC3339, *since)
57+
if err != nil {
58+
return usageErr(fmt.Errorf("invalid --since: %w", err))
59+
}
60+
}
61+
if strings.TrimSpace(*before) != "" {
62+
beforeTime, err = time.Parse(time.RFC3339, *before)
63+
if err != nil {
64+
return usageErr(fmt.Errorf("invalid --before: %w", err))
65+
}
66+
}
67+
68+
guildIDs := r.resolveSearchGuilds(*guildFlag, *guildsFlag)
69+
if strings.TrimSpace(*channel) == "" &&
70+
strings.TrimSpace(*author) == "" &&
71+
strings.TrimSpace(*target) == "" &&
72+
strings.TrimSpace(*targetType) == "" &&
73+
sinceTime.IsZero() &&
74+
beforeTime.IsZero() &&
75+
len(guildIDs) == 0 {
76+
return usageErr(fmt.Errorf("mentions needs at least one filter"))
77+
}
78+
79+
rows, err := r.store.ListMentions(r.ctx, store.MentionListOptions{
80+
GuildIDs: guildIDs,
81+
Channel: *channel,
82+
Author: *author,
83+
Target: *target,
84+
TargetType: *targetType,
85+
Since: sinceTime,
86+
Before: beforeTime,
87+
Limit: *limit,
88+
})
89+
if err != nil {
90+
return err
91+
}
92+
return r.print(rows)
93+
}

internal/cli/output.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@ func printPlain(w io.Writer, value any) error {
5454
_, _ = fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\t%s\n", formatTime(row.CreatedAt), row.GuildID, row.ChannelID, row.AuthorID, row.MessageID, row.Content)
5555
}
5656
return nil
57+
case []store.MentionRow:
58+
for _, row := range v {
59+
_, _ = fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\t%s\t%s\n", formatTime(row.CreatedAt), row.GuildID, row.ChannelID, row.AuthorID, row.TargetType, row.TargetID, row.Content)
60+
}
61+
return nil
5762
default:
5863
return fmt.Errorf("no plain printer")
5964
}
@@ -71,6 +76,7 @@ Commands:
7176
tail
7277
search
7378
messages
79+
mentions
7480
sql
7581
members
7682
channels
@@ -112,6 +118,13 @@ func printHuman(w io.Writer, value any) error {
112118
}
113119
}
114120
return nil
121+
case []store.MentionRow:
122+
for _, row := range v {
123+
if _, err := fmt.Fprintf(w, "[%s/%s] %s -> %s:%s %s\n%s\n\n", row.GuildID, row.ChannelName, row.AuthorName, row.TargetType, firstNonEmpty(row.TargetName, row.TargetID), formatTime(row.CreatedAt), row.Content); err != nil {
124+
return err
125+
}
126+
}
127+
return nil
115128
case []store.MemberRow:
116129
tw := tabwriter.NewWriter(w, 2, 4, 2, ' ', 0)
117130
_, _ = fmt.Fprintln(tw, "GUILD\tUSER\tNAME\tDISPLAY")

internal/store/mentions.go

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
package store
2+
3+
import (
4+
"context"
5+
"strings"
6+
)
7+
8+
func (s *Store) ListMentions(ctx context.Context, opts MentionListOptions) ([]MentionRow, error) {
9+
if opts.Limit <= 0 {
10+
opts.Limit = 200
11+
}
12+
args := []any{}
13+
clauses := []string{"1=1"}
14+
if len(opts.GuildIDs) > 0 {
15+
clauses = append(clauses, "me.guild_id in ("+placeholders(len(opts.GuildIDs))+")")
16+
for _, guildID := range opts.GuildIDs {
17+
args = append(args, guildID)
18+
}
19+
}
20+
if channel := normalizeChannelFilter(opts.Channel); channel != "" {
21+
clauses = append(clauses, "(me.channel_id = ? or c.name = ? or c.name like ?)")
22+
args = append(args, channel, channel, "%"+channel+"%")
23+
}
24+
if author := strings.TrimSpace(opts.Author); author != "" {
25+
clauses = append(clauses, `(me.author_id = ? or coalesce(mem.username, '') = ? or coalesce(mem.display_name, '') = ? or coalesce(mem.username, '') like ? or coalesce(mem.display_name, '') like ?)`)
26+
args = append(args, author, author, author, "%"+author+"%", "%"+author+"%")
27+
}
28+
if target := strings.TrimSpace(opts.Target); target != "" {
29+
clauses = append(clauses, `(me.target_id = ? or me.target_name = ? or me.target_name like ?)`)
30+
args = append(args, target, target, "%"+target+"%")
31+
}
32+
if targetType := strings.TrimSpace(opts.TargetType); targetType != "" {
33+
clauses = append(clauses, "me.target_type = ?")
34+
args = append(args, targetType)
35+
}
36+
if !opts.Since.IsZero() {
37+
clauses = append(clauses, "me.event_at >= ?")
38+
args = append(args, opts.Since.UTC().Format(timeLayout))
39+
}
40+
if !opts.Before.IsZero() {
41+
clauses = append(clauses, "me.event_at < ?")
42+
args = append(args, opts.Before.UTC().Format(timeLayout))
43+
}
44+
args = append(args, opts.Limit)
45+
rows, err := s.db.QueryContext(ctx, `
46+
select
47+
me.message_id,
48+
me.guild_id,
49+
me.channel_id,
50+
coalesce(c.name, ''),
51+
coalesce(me.author_id, ''),
52+
coalesce(
53+
nullif(mem.display_name, ''),
54+
nullif(mem.nick, ''),
55+
nullif(mem.global_name, ''),
56+
nullif(mem.username, ''),
57+
nullif(json_extract(m.raw_json, '$.author.global_name'), ''),
58+
nullif(json_extract(m.raw_json, '$.author.username'), ''),
59+
''
60+
),
61+
me.target_type,
62+
me.target_id,
63+
me.target_name,
64+
case
65+
when trim(coalesce(m.content, '')) <> '' then m.content
66+
else m.normalized_content
67+
end,
68+
me.event_at
69+
from mention_events me
70+
left join messages m on m.id = me.message_id
71+
left join channels c on c.id = me.channel_id
72+
left join members mem on mem.guild_id = me.guild_id and mem.user_id = me.author_id
73+
where `+strings.Join(clauses, " and ")+`
74+
order by me.event_at desc, me.event_id desc
75+
limit ?
76+
`, args...)
77+
if err != nil {
78+
return nil, err
79+
}
80+
defer func() { _ = rows.Close() }()
81+
82+
var out []MentionRow
83+
for rows.Next() {
84+
var row MentionRow
85+
var created string
86+
if err := rows.Scan(
87+
&row.MessageID,
88+
&row.GuildID,
89+
&row.ChannelID,
90+
&row.ChannelName,
91+
&row.AuthorID,
92+
&row.AuthorName,
93+
&row.TargetType,
94+
&row.TargetID,
95+
&row.TargetName,
96+
&row.Content,
97+
&created,
98+
); err != nil {
99+
return nil, err
100+
}
101+
row.CreatedAt = parseTime(created)
102+
out = append(out, row)
103+
}
104+
return out, rows.Err()
105+
}

0 commit comments

Comments
 (0)