Skip to content

Commit e4bf53b

Browse files
authored
Merge pull request openclaw#382 from Danielkweber/daniel/feat/sedmat-paragraph-addressing
feat(sedmat): paragraph addressing and structure introspection
2 parents cb16e4f + ef0e8aa commit e4bf53b

10 files changed

Lines changed: 1379 additions & 5 deletions

docs/sedmat.md

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,48 @@ s/![logo]/!(https:\/\/new-logo.png)/ # match by alt text
309309
310310
---
311311
312+
## Paragraph Addressing
313+
314+
Target specific paragraphs by number using address prefixes. Use `gog docs structure` to see paragraph numbers.
315+
316+
```bash
317+
# Introspection — see paragraph numbers, types, and content
318+
gog docs structure <DOC_ID> # show numbered structure
319+
gog docs cat <DOC_ID> -N # cat with [N] prefixes
320+
321+
# Delete by paragraph number
322+
gog docs sed <DOC_ID> '5d' # delete paragraph 5
323+
gog docs sed <DOC_ID> '3,7d' # delete paragraphs 3-7
324+
gog docs sed <DOC_ID> '$d' # delete last paragraph
325+
326+
# Substitute within addressed paragraphs
327+
gog docs sed <DOC_ID> '5s/.*/New text/' # replace all text in paragraph 5
328+
gog docs sed <DOC_ID> '3,7s/old/new/g' # replace within paragraphs 3-7
329+
330+
# Insert/Append around addressed paragraphs
331+
gog docs sed <DOC_ID> '5a/New line/' # append after paragraph 5
332+
gog docs sed <DOC_ID> '3i/Before text/' # insert before paragraph 3
333+
gog docs sed <DOC_ID> '$a/Last line/' # append after last paragraph
334+
```
335+
336+
### Address Syntax
337+
338+
| Address | Meaning |
339+
|---------|---------|
340+
| `N` | Paragraph number N (1-based) |
341+
| `N,M` | Range from paragraph N to M |
342+
| `$` | Last paragraph |
343+
| `N,$` | From paragraph N to end |
344+
345+
### Multi-Tab Support
346+
347+
```bash
348+
gog docs structure <DOC_ID> --tab "Sheet1"
349+
gog docs sed <DOC_ID> --tab "Sheet1" '3d'
350+
```
351+
352+
---
353+
312354
## Batch Mode
313355
314356
Create a `.sed` file with one expression per line. Comments start with `#`.

internal/cmd/docs.go

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ type DocsCmd struct {
3838
Edit DocsEditCmd `cmd:"" name:"edit" help:"Find and replace text in a Google Doc"`
3939
Sed DocsSedCmd `cmd:"" name:"sed" help:"Regex find/replace (sed-style: s/pattern/replacement/g)"`
4040
Clear DocsClearCmd `cmd:"" name:"clear" help:"Clear all content from a Google Doc"`
41+
Structure DocsStructureCmd `cmd:"" name:"structure" aliases:"struct" help:"Show document structure with numbered paragraphs"`
4142
}
4243
type DocsExportCmd struct {
4344
DocID string `arg:"" name:"docId" help:"Doc ID"`
@@ -276,6 +277,7 @@ type DocsCatCmd struct {
276277
Tab string `name:"tab" help:"Tab title or ID to read (omit for default behavior)"`
277278
AllTabs bool `name:"all-tabs" help:"Show all tabs with headers"`
278279
Raw bool `name:"raw" help:"Output the raw Google Docs API JSON response without modifications"`
280+
Numbered bool `name:"numbered" short:"N" help:"Prefix each paragraph with its number"`
279281
}
280282

281283
func (c *DocsCatCmd) Run(ctx context.Context, flags *RootFlags) error {
@@ -340,6 +342,10 @@ func (c *DocsCatCmd) Run(ctx context.Context, flags *RootFlags) error {
340342
return errors.New("doc not found")
341343
}
342344

345+
if c.Numbered {
346+
return c.printNumbered(ctx, doc, "")
347+
}
348+
343349
text := docsPlainText(doc, c.MaxBytes)
344350

345351
if outfmt.IsJSON(ctx) {
@@ -545,6 +551,9 @@ func (c *DocsCatCmd) runWithTabs(ctx context.Context, svc *docs.Service, id stri
545551
if tab == nil {
546552
return fmt.Errorf("tab not found: %s", c.Tab)
547553
}
554+
if c.Numbered {
555+
return c.printNumbered(ctx, doc, c.Tab)
556+
}
548557
text := tabPlainText(tab, c.MaxBytes)
549558
if outfmt.IsJSON(ctx) {
550559
return outfmt.WriteJSON(ctx, os.Stdout, map[string]any{
@@ -937,6 +946,99 @@ func (c *DocsClearCmd) Run(ctx context.Context, flags *RootFlags) error {
937946
return sedCmd.Run(ctx, flags)
938947
}
939948

949+
// --- Structure / Numbered commands ---
950+
951+
// DocsStructureCmd displays document structure with numbered paragraphs.
952+
type DocsStructureCmd struct {
953+
DocID string `arg:"" name:"docId" help:"Doc ID"`
954+
Tab string `name:"tab" help:"Tab title or ID (omit for default)"`
955+
}
956+
957+
func (c *DocsStructureCmd) Run(ctx context.Context, flags *RootFlags) error {
958+
u := ui.FromContext(ctx)
959+
account, err := requireAccount(flags)
960+
if err != nil {
961+
return err
962+
}
963+
964+
id := strings.TrimSpace(c.DocID)
965+
if id == "" {
966+
return usage("empty docId")
967+
}
968+
969+
svc, err := newDocsService(ctx, account)
970+
if err != nil {
971+
return err
972+
}
973+
974+
getCall := svc.Documents.Get(id)
975+
if c.Tab != "" {
976+
getCall = getCall.IncludeTabsContent(true)
977+
}
978+
doc, err := getCall.Context(ctx).Do()
979+
if err != nil {
980+
if isDocsNotFound(err) {
981+
return fmt.Errorf("doc not found or not a Google Doc (id=%s)", id)
982+
}
983+
return err
984+
}
985+
if doc == nil {
986+
return errors.New("doc not found")
987+
}
988+
989+
pm, err := buildParagraphMap(doc, c.Tab)
990+
if err != nil {
991+
return err
992+
}
993+
994+
if outfmt.IsJSON(ctx) {
995+
return outfmt.WriteJSON(ctx, os.Stdout, pm)
996+
}
997+
998+
u.Out().Printf(" # TYPE CONTENT")
999+
for _, p := range pm.Paragraphs {
1000+
prefix := ""
1001+
if p.IsBullet {
1002+
prefix = strings.Repeat(" ", p.NestLevel) + "* "
1003+
}
1004+
1005+
text := p.Text
1006+
if len(text) > 60 {
1007+
text = text[:57] + "..."
1008+
}
1009+
1010+
if p.ElemType == "table" {
1011+
text = fmt.Sprintf("[table %dx%d] %s", p.TableRows, p.TableCols, text)
1012+
}
1013+
1014+
u.Out().Printf("%2d %-18s %s%s", p.Num, p.Type, prefix, text)
1015+
}
1016+
return nil
1017+
}
1018+
1019+
// printNumbered prints document content with [N] paragraph number prefixes.
1020+
func (c *DocsCatCmd) printNumbered(ctx context.Context, doc *docs.Document, tabID string) error {
1021+
pm, err := buildParagraphMap(doc, tabID)
1022+
if err != nil {
1023+
return err
1024+
}
1025+
1026+
if outfmt.IsJSON(ctx) {
1027+
return outfmt.WriteJSON(ctx, os.Stdout, pm)
1028+
}
1029+
1030+
for _, p := range pm.Paragraphs {
1031+
text := p.Text
1032+
if p.ElemType == "table" {
1033+
text = fmt.Sprintf("[table %dx%d] %s", p.TableRows, p.TableCols, text)
1034+
}
1035+
if _, err := fmt.Fprintf(os.Stdout, "[%d] %s\n", p.Num, text); err != nil {
1036+
return err
1037+
}
1038+
}
1039+
return nil
1040+
}
1041+
9401042
type DocsFindReplaceCmd struct {
9411043
DocID string `arg:"" name:"docId" help:"Doc ID"`
9421044
Find string `arg:"" name:"find" help:"Text to find"`

internal/cmd/docs_paragraphs.go

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
package cmd
2+
3+
import (
4+
"context"
5+
"errors"
6+
"fmt"
7+
"strings"
8+
9+
"google.golang.org/api/docs/v1"
10+
)
11+
12+
// docParagraph represents a single numbered element in a Google Doc's structure.
13+
type docParagraph struct {
14+
Num int `json:"num"`
15+
StartIndex int64 `json:"startIndex"`
16+
EndIndex int64 `json:"endIndex"`
17+
Type string `json:"type"`
18+
IsBullet bool `json:"bullet"`
19+
NestLevel int `json:"nestLevel,omitempty"`
20+
Text string `json:"text"`
21+
ElemType string `json:"elemType"` // "paragraph", "table", "toc", "sectionBreak"
22+
TableRows int `json:"tableRows,omitempty"`
23+
TableCols int `json:"tableCols,omitempty"`
24+
}
25+
26+
// paragraphMap holds the structured view of a Google Doc's content.
27+
type paragraphMap struct {
28+
DocumentID string `json:"documentId"`
29+
RevisionID string `json:"revisionId"`
30+
TabID string `json:"tab,omitempty"`
31+
Paragraphs []docParagraph `json:"paragraphs"`
32+
}
33+
34+
// buildParagraphMap traverses the document body and numbers each paragraph
35+
// and table sequentially (1-based). The initial SectionBreak at index 0 is
36+
// skipped as it is not user-editable.
37+
func buildParagraphMap(doc *docs.Document, tabID string) (*paragraphMap, error) {
38+
if doc == nil {
39+
return nil, fmt.Errorf("nil document")
40+
}
41+
42+
var content []*docs.StructuralElement
43+
var revisionID string
44+
45+
if tabID != "" && len(doc.Tabs) > 0 {
46+
tabs := flattenTabs(doc.Tabs)
47+
tab := findTab(tabs, tabID)
48+
if tab == nil {
49+
return nil, fmt.Errorf("tab not found: %s", tabID)
50+
}
51+
if tab.DocumentTab == nil || tab.DocumentTab.Body == nil {
52+
return nil, fmt.Errorf("tab has no content: %s", tabID)
53+
}
54+
content = tab.DocumentTab.Body.Content
55+
if tab.TabProperties != nil {
56+
tabID = tab.TabProperties.TabId
57+
}
58+
} else {
59+
if doc.Body == nil {
60+
return nil, fmt.Errorf("document has no body")
61+
}
62+
content = doc.Body.Content
63+
}
64+
revisionID = doc.RevisionId
65+
66+
pm := &paragraphMap{
67+
DocumentID: doc.DocumentId,
68+
RevisionID: revisionID,
69+
TabID: tabID,
70+
}
71+
72+
num := 0
73+
for _, el := range content {
74+
if el == nil {
75+
continue
76+
}
77+
78+
switch {
79+
case el.SectionBreak != nil:
80+
// Skip section breaks — not user-editable.
81+
continue
82+
83+
case el.Paragraph != nil:
84+
num++
85+
dp := docParagraph{
86+
Num: num,
87+
StartIndex: el.StartIndex,
88+
EndIndex: el.EndIndex,
89+
ElemType: "paragraph",
90+
Text: paragraphText(el.Paragraph),
91+
}
92+
93+
// Extract named style type.
94+
if el.Paragraph.ParagraphStyle != nil {
95+
dp.Type = el.Paragraph.ParagraphStyle.NamedStyleType
96+
}
97+
if dp.Type == "" {
98+
dp.Type = "NORMAL_TEXT"
99+
}
100+
101+
// Extract bullet info.
102+
if el.Paragraph.Bullet != nil {
103+
dp.IsBullet = true
104+
dp.NestLevel = int(el.Paragraph.Bullet.NestingLevel)
105+
}
106+
107+
pm.Paragraphs = append(pm.Paragraphs, dp)
108+
109+
case el.Table != nil:
110+
num++
111+
rows := len(el.Table.TableRows)
112+
cols := 0
113+
if rows > 0 && len(el.Table.TableRows[0].TableCells) > 0 {
114+
cols = len(el.Table.TableRows[0].TableCells)
115+
}
116+
dp := docParagraph{
117+
Num: num,
118+
StartIndex: el.StartIndex,
119+
EndIndex: el.EndIndex,
120+
Type: "TABLE",
121+
ElemType: "table",
122+
Text: tablePreviewText(el.Table),
123+
TableRows: rows,
124+
TableCols: cols,
125+
}
126+
pm.Paragraphs = append(pm.Paragraphs, dp)
127+
128+
case el.TableOfContents != nil:
129+
num++
130+
dp := docParagraph{
131+
Num: num,
132+
StartIndex: el.StartIndex,
133+
EndIndex: el.EndIndex,
134+
Type: "TABLE_OF_CONTENTS",
135+
ElemType: "toc",
136+
Text: "[table of contents]",
137+
}
138+
pm.Paragraphs = append(pm.Paragraphs, dp)
139+
}
140+
}
141+
142+
return pm, nil
143+
}
144+
145+
// paragraphText extracts the plain text from a Paragraph element.
146+
func paragraphText(p *docs.Paragraph) string {
147+
if p == nil {
148+
return ""
149+
}
150+
var sb strings.Builder
151+
for _, elem := range p.Elements {
152+
if elem.TextRun != nil {
153+
sb.WriteString(elem.TextRun.Content)
154+
}
155+
}
156+
// Trim the trailing newline that Google Docs adds to every paragraph.
157+
return strings.TrimRight(sb.String(), "\n")
158+
}
159+
160+
// get returns the paragraph at the given 1-based number.
161+
func (pm *paragraphMap) get(num int) (*docParagraph, error) {
162+
if num < 1 || num > len(pm.Paragraphs) {
163+
return nil, fmt.Errorf("paragraph %d out of range (document has %d paragraphs)", num, len(pm.Paragraphs))
164+
}
165+
return &pm.Paragraphs[num-1], nil
166+
}
167+
168+
// fetchAndBuildMap fetches the document and builds a paragraph map.
169+
func fetchAndBuildMap(ctx context.Context, svc *docs.Service, docID, tabID string) (*paragraphMap, error) {
170+
getCall := svc.Documents.Get(docID)
171+
if tabID != "" {
172+
getCall = getCall.IncludeTabsContent(true)
173+
}
174+
doc, err := getCall.Context(ctx).Do()
175+
if err != nil {
176+
if isDocsNotFound(err) {
177+
return nil, fmt.Errorf("doc not found or not a Google Doc (id=%s)", docID)
178+
}
179+
return nil, err
180+
}
181+
if doc == nil {
182+
return nil, errors.New("doc not found")
183+
}
184+
185+
return buildParagraphMap(doc, tabID)
186+
}
187+
188+
// tablePreviewText returns a short preview of the table content.
189+
func tablePreviewText(t *docs.Table) string {
190+
if t == nil || len(t.TableRows) == 0 {
191+
return "[empty table]"
192+
}
193+
// Show first row cells as a preview.
194+
var cells []string
195+
for _, cell := range t.TableRows[0].TableCells {
196+
var text strings.Builder
197+
for _, el := range cell.Content {
198+
if el.Paragraph != nil {
199+
text.WriteString(paragraphText(el.Paragraph))
200+
}
201+
}
202+
cells = append(cells, strings.TrimSpace(text.String()))
203+
}
204+
preview := strings.Join(cells, " | ")
205+
if len(preview) > 60 {
206+
preview = preview[:57] + "..."
207+
}
208+
return preview
209+
}

0 commit comments

Comments
 (0)