|
| 1 | +package cmd |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "errors" |
| 6 | + "fmt" |
| 7 | + "strings" |
| 8 | + |
| 9 | + "google.golang.org/api/docs/v1" |
| 10 | +) |
| 11 | + |
| 12 | +// docParagraph represents a single numbered element in a Google Doc's structure. |
| 13 | +type docParagraph struct { |
| 14 | + Num int `json:"num"` |
| 15 | + StartIndex int64 `json:"startIndex"` |
| 16 | + EndIndex int64 `json:"endIndex"` |
| 17 | + Type string `json:"type"` |
| 18 | + IsBullet bool `json:"bullet"` |
| 19 | + NestLevel int `json:"nestLevel,omitempty"` |
| 20 | + Text string `json:"text"` |
| 21 | + ElemType string `json:"elemType"` // "paragraph", "table", "toc", "sectionBreak" |
| 22 | + TableRows int `json:"tableRows,omitempty"` |
| 23 | + TableCols int `json:"tableCols,omitempty"` |
| 24 | +} |
| 25 | + |
| 26 | +// paragraphMap holds the structured view of a Google Doc's content. |
| 27 | +type paragraphMap struct { |
| 28 | + DocumentID string `json:"documentId"` |
| 29 | + RevisionID string `json:"revisionId"` |
| 30 | + TabID string `json:"tab,omitempty"` |
| 31 | + Paragraphs []docParagraph `json:"paragraphs"` |
| 32 | +} |
| 33 | + |
| 34 | +// buildParagraphMap traverses the document body and numbers each paragraph |
| 35 | +// and table sequentially (1-based). The initial SectionBreak at index 0 is |
| 36 | +// skipped as it is not user-editable. |
| 37 | +func buildParagraphMap(doc *docs.Document, tabID string) (*paragraphMap, error) { |
| 38 | + if doc == nil { |
| 39 | + return nil, fmt.Errorf("nil document") |
| 40 | + } |
| 41 | + |
| 42 | + var content []*docs.StructuralElement |
| 43 | + var revisionID string |
| 44 | + |
| 45 | + if tabID != "" && len(doc.Tabs) > 0 { |
| 46 | + tabs := flattenTabs(doc.Tabs) |
| 47 | + tab := findTab(tabs, tabID) |
| 48 | + if tab == nil { |
| 49 | + return nil, fmt.Errorf("tab not found: %s", tabID) |
| 50 | + } |
| 51 | + if tab.DocumentTab == nil || tab.DocumentTab.Body == nil { |
| 52 | + return nil, fmt.Errorf("tab has no content: %s", tabID) |
| 53 | + } |
| 54 | + content = tab.DocumentTab.Body.Content |
| 55 | + if tab.TabProperties != nil { |
| 56 | + tabID = tab.TabProperties.TabId |
| 57 | + } |
| 58 | + } else { |
| 59 | + if doc.Body == nil { |
| 60 | + return nil, fmt.Errorf("document has no body") |
| 61 | + } |
| 62 | + content = doc.Body.Content |
| 63 | + } |
| 64 | + revisionID = doc.RevisionId |
| 65 | + |
| 66 | + pm := ¶graphMap{ |
| 67 | + DocumentID: doc.DocumentId, |
| 68 | + RevisionID: revisionID, |
| 69 | + TabID: tabID, |
| 70 | + } |
| 71 | + |
| 72 | + num := 0 |
| 73 | + for _, el := range content { |
| 74 | + if el == nil { |
| 75 | + continue |
| 76 | + } |
| 77 | + |
| 78 | + switch { |
| 79 | + case el.SectionBreak != nil: |
| 80 | + // Skip section breaks — not user-editable. |
| 81 | + continue |
| 82 | + |
| 83 | + case el.Paragraph != nil: |
| 84 | + num++ |
| 85 | + dp := docParagraph{ |
| 86 | + Num: num, |
| 87 | + StartIndex: el.StartIndex, |
| 88 | + EndIndex: el.EndIndex, |
| 89 | + ElemType: "paragraph", |
| 90 | + Text: paragraphText(el.Paragraph), |
| 91 | + } |
| 92 | + |
| 93 | + // Extract named style type. |
| 94 | + if el.Paragraph.ParagraphStyle != nil { |
| 95 | + dp.Type = el.Paragraph.ParagraphStyle.NamedStyleType |
| 96 | + } |
| 97 | + if dp.Type == "" { |
| 98 | + dp.Type = "NORMAL_TEXT" |
| 99 | + } |
| 100 | + |
| 101 | + // Extract bullet info. |
| 102 | + if el.Paragraph.Bullet != nil { |
| 103 | + dp.IsBullet = true |
| 104 | + dp.NestLevel = int(el.Paragraph.Bullet.NestingLevel) |
| 105 | + } |
| 106 | + |
| 107 | + pm.Paragraphs = append(pm.Paragraphs, dp) |
| 108 | + |
| 109 | + case el.Table != nil: |
| 110 | + num++ |
| 111 | + rows := len(el.Table.TableRows) |
| 112 | + cols := 0 |
| 113 | + if rows > 0 && len(el.Table.TableRows[0].TableCells) > 0 { |
| 114 | + cols = len(el.Table.TableRows[0].TableCells) |
| 115 | + } |
| 116 | + dp := docParagraph{ |
| 117 | + Num: num, |
| 118 | + StartIndex: el.StartIndex, |
| 119 | + EndIndex: el.EndIndex, |
| 120 | + Type: "TABLE", |
| 121 | + ElemType: "table", |
| 122 | + Text: tablePreviewText(el.Table), |
| 123 | + TableRows: rows, |
| 124 | + TableCols: cols, |
| 125 | + } |
| 126 | + pm.Paragraphs = append(pm.Paragraphs, dp) |
| 127 | + |
| 128 | + case el.TableOfContents != nil: |
| 129 | + num++ |
| 130 | + dp := docParagraph{ |
| 131 | + Num: num, |
| 132 | + StartIndex: el.StartIndex, |
| 133 | + EndIndex: el.EndIndex, |
| 134 | + Type: "TABLE_OF_CONTENTS", |
| 135 | + ElemType: "toc", |
| 136 | + Text: "[table of contents]", |
| 137 | + } |
| 138 | + pm.Paragraphs = append(pm.Paragraphs, dp) |
| 139 | + } |
| 140 | + } |
| 141 | + |
| 142 | + return pm, nil |
| 143 | +} |
| 144 | + |
| 145 | +// paragraphText extracts the plain text from a Paragraph element. |
| 146 | +func paragraphText(p *docs.Paragraph) string { |
| 147 | + if p == nil { |
| 148 | + return "" |
| 149 | + } |
| 150 | + var sb strings.Builder |
| 151 | + for _, elem := range p.Elements { |
| 152 | + if elem.TextRun != nil { |
| 153 | + sb.WriteString(elem.TextRun.Content) |
| 154 | + } |
| 155 | + } |
| 156 | + // Trim the trailing newline that Google Docs adds to every paragraph. |
| 157 | + return strings.TrimRight(sb.String(), "\n") |
| 158 | +} |
| 159 | + |
| 160 | +// get returns the paragraph at the given 1-based number. |
| 161 | +func (pm *paragraphMap) get(num int) (*docParagraph, error) { |
| 162 | + if num < 1 || num > len(pm.Paragraphs) { |
| 163 | + return nil, fmt.Errorf("paragraph %d out of range (document has %d paragraphs)", num, len(pm.Paragraphs)) |
| 164 | + } |
| 165 | + return &pm.Paragraphs[num-1], nil |
| 166 | +} |
| 167 | + |
| 168 | +// fetchAndBuildMap fetches the document and builds a paragraph map. |
| 169 | +func fetchAndBuildMap(ctx context.Context, svc *docs.Service, docID, tabID string) (*paragraphMap, error) { |
| 170 | + getCall := svc.Documents.Get(docID) |
| 171 | + if tabID != "" { |
| 172 | + getCall = getCall.IncludeTabsContent(true) |
| 173 | + } |
| 174 | + doc, err := getCall.Context(ctx).Do() |
| 175 | + if err != nil { |
| 176 | + if isDocsNotFound(err) { |
| 177 | + return nil, fmt.Errorf("doc not found or not a Google Doc (id=%s)", docID) |
| 178 | + } |
| 179 | + return nil, err |
| 180 | + } |
| 181 | + if doc == nil { |
| 182 | + return nil, errors.New("doc not found") |
| 183 | + } |
| 184 | + |
| 185 | + return buildParagraphMap(doc, tabID) |
| 186 | +} |
| 187 | + |
| 188 | +// tablePreviewText returns a short preview of the table content. |
| 189 | +func tablePreviewText(t *docs.Table) string { |
| 190 | + if t == nil || len(t.TableRows) == 0 { |
| 191 | + return "[empty table]" |
| 192 | + } |
| 193 | + // Show first row cells as a preview. |
| 194 | + var cells []string |
| 195 | + for _, cell := range t.TableRows[0].TableCells { |
| 196 | + var text strings.Builder |
| 197 | + for _, el := range cell.Content { |
| 198 | + if el.Paragraph != nil { |
| 199 | + text.WriteString(paragraphText(el.Paragraph)) |
| 200 | + } |
| 201 | + } |
| 202 | + cells = append(cells, strings.TrimSpace(text.String())) |
| 203 | + } |
| 204 | + preview := strings.Join(cells, " | ") |
| 205 | + if len(preview) > 60 { |
| 206 | + preview = preview[:57] + "..." |
| 207 | + } |
| 208 | + return preview |
| 209 | +} |
0 commit comments