Skip to content

Commit 93c7aba

Browse files
committed
Uma funcionalidade importante que pode ser feita agora, esta relacionado as noticias, ao listar, (vibe-kanban ba018684)
pode ser interessante de "Enter" abrir no browser, mas CTRL + Enter fazer o crawler e ler diretamente na assistente. Faça com consistência
1 parent e121ecf commit 93c7aba

File tree

4 files changed

+283
-16
lines changed

4 files changed

+283
-16
lines changed

src/index.ts

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import { loadPlugins, pluginsToTools, formatPluginList, getPluginDir } from './p
2020
import { formatApprovalPrompt, formatEditDiff } from './approval'
2121
import { extractImages, extractFiles } from './images'
2222
import { openApp, openFile, openUrl, getRunningApps, getSystemInfo, getDateTimeInfo, getOutlookEvents, getKnownApps } from './windows'
23-
import { fetchNews, fetchNewsItems, getNewsCategories, initNews, addNewsFeed, removeNewsFeed, disableNewsFeed, enableNewsFeed, listNewsFeeds, type NewsCategory, type NewsItem } from './news'
23+
import { fetchNews, fetchNewsItems, fetchNewsContent, getNewsCategories, initNews, addNewsFeed, removeNewsFeed, disableNewsFeed, enableNewsFeed, listNewsFeeds, type NewsCategory, type NewsItem } from './news'
2424
import { generateBriefing, getTimeContext, type TimeContext, type PersonaMode } from './briefing'
2525
import { initTasks, stopTasks, addTask, completeTask, removeTask, listTasks, formatTaskList, parseTime, type Task } from './tasks'
2626
import { initPeople, addPerson, findPerson, listPeople, logInteraction, delegateTask, getDelegations, getPendingFollowUps, markFollowUpDone, formatPeopleList, formatPersonDetail, formatDelegationList, formatFollowUps, generatePeopleDashboard, type PersonGroup, type InteractionType } from './people'
@@ -1087,11 +1087,28 @@ async function runInteractive(
10871087
: '',
10881088
}))
10891089

1090-
const selectedLink = await tui.promptNewsPicker(pickerEntries)
1091-
if (selectedLink) {
1092-
const { openUrl } = await import('./windows')
1093-
openUrl(selectedLink)
1094-
tui.showSystem(`Abrindo: ${selectedLink}`)
1090+
const result = await tui.promptNewsPicker(pickerEntries)
1091+
if (result) {
1092+
if (result.action === 'open') {
1093+
// Open in browser
1094+
const { openUrl } = await import('./windows')
1095+
openUrl(result.link)
1096+
tui.showSystem(`Abrindo: ${result.link}`)
1097+
} else if (result.action === 'read') {
1098+
// Fetch and display content
1099+
tui.showSystem(`Buscando conteudo...`)
1100+
const content = await fetchNewsContent(result.link)
1101+
if (typeof content === 'string') {
1102+
tui.showError(content)
1103+
} else {
1104+
// Send content to assistant for summarization
1105+
const newsContext = `Noticia: ${content.title}\nFonte: ${result.link}\n\n${content.content}`
1106+
const prompt = `Por favor, resuma esta noticia de forma objetiva e destaque os pontos principais:\n\n${newsContext}`
1107+
tui.enableInput()
1108+
handleSubmit(prompt)
1109+
return
1110+
}
1111+
}
10951112
}
10961113
} catch (err) {
10971114
tui.showError(`Falha ao buscar noticias: ${err instanceof Error ? err.message : String(err)}`)

src/news.ts

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -584,3 +584,206 @@ export function getNewsCategories(): string {
584584
const categories = [...new Set(active.map((f) => f.category))].sort()
585585
return `Categorias: ${categories.join(', ')}\nUso: /news [categoria]`
586586
}
587+
588+
// ─── News Content Fetcher ───────────────────────────────────
589+
590+
const CONTENT_FETCH_TIMEOUT_MS = 15_000
591+
const MAX_CONTENT_BYTES = 5 * 1024 * 1024 // 5 MB
592+
593+
/**
594+
* Fetch and extract the main content from a news article URL.
595+
* Returns a cleaned, readable text version of the article.
596+
*/
597+
export async function fetchNewsContent(url: string): Promise<{ title: string; content: string } | string> {
598+
// Validate URL
599+
if (!url.startsWith('http://') && !url.startsWith('https://')) {
600+
return 'Error: URL invalida'
601+
}
602+
603+
const controller = new AbortController()
604+
const timeout = setTimeout(() => controller.abort(), CONTENT_FETCH_TIMEOUT_MS)
605+
606+
try {
607+
const resp = await fetch(url, {
608+
signal: controller.signal,
609+
headers: {
610+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
611+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
612+
'Accept-Language': 'pt-BR,pt;q=0.9,en;q=0.8',
613+
},
614+
})
615+
clearTimeout(timeout)
616+
617+
if (!resp.ok) {
618+
return `Error: HTTP ${resp.status}`
619+
}
620+
621+
// Check content-length
622+
const contentLength = resp.headers.get('content-length')
623+
if (contentLength && Number(contentLength) > MAX_CONTENT_BYTES) {
624+
return 'Error: pagina muito grande'
625+
}
626+
627+
// Read body with size cap
628+
const reader = resp.body?.getReader()
629+
if (!reader) {
630+
return 'Error: sem corpo de resposta'
631+
}
632+
633+
const chunks: Uint8Array[] = []
634+
let totalBytes = 0
635+
636+
while (true) {
637+
const { done, value } = await reader.read()
638+
if (done) break
639+
totalBytes += value.byteLength
640+
if (totalBytes > MAX_CONTENT_BYTES) {
641+
reader.cancel()
642+
return 'Error: pagina muito grande'
643+
}
644+
chunks.push(value)
645+
}
646+
647+
const raw = Buffer.concat(chunks)
648+
const contentType = resp.headers.get('content-type')
649+
const html = decodeHtml(raw, contentType)
650+
651+
// Extract article content
652+
const extracted = extractArticleContent(html)
653+
return extracted
654+
655+
} catch (err) {
656+
clearTimeout(timeout)
657+
if (err instanceof Error) {
658+
if (err.name === 'AbortError') {
659+
return 'Error: timeout ao buscar pagina'
660+
}
661+
return `Error: ${err.message.slice(0, 100)}`
662+
}
663+
return 'Error: falha ao buscar pagina'
664+
}
665+
}
666+
667+
/**
668+
* Decode HTML bytes using the correct charset.
669+
*/
670+
function decodeHtml(raw: Buffer, contentType: string | null): string {
671+
const encoding = detectHtmlEncoding(raw, contentType)
672+
try {
673+
return new TextDecoder(encoding).decode(raw)
674+
} catch {
675+
try {
676+
return new TextDecoder('latin1').decode(raw)
677+
} catch {
678+
return new TextDecoder('utf-8', { fatal: false }).decode(raw)
679+
}
680+
}
681+
}
682+
683+
/**
684+
* Detect encoding from Content-Type header or meta charset.
685+
*/
686+
function detectHtmlEncoding(raw: Buffer, contentType: string | null): string {
687+
// 1) HTTP Content-Type header
688+
if (contentType) {
689+
const match = contentType.match(/charset\s*=\s*["']?([^\s;"']+)/i)
690+
if (match) return normalizeEncoding(match[1])
691+
}
692+
693+
// 2) Meta charset in HTML (first 2KB)
694+
const head = raw.subarray(0, 2048).toString('ascii')
695+
const metaMatch = head.match(/<meta[^>]+charset\s*=\s*["']?([^"'\s>]+)/i)
696+
if (metaMatch) return normalizeEncoding(metaMatch[1])
697+
698+
// 3) XML-style declaration
699+
const xmlMatch = head.match(/<\?xml[^?]+encoding\s*=\s*["']([^"']+)["']/i)
700+
if (xmlMatch) return normalizeEncoding(xmlMatch[1])
701+
702+
return 'utf-8'
703+
}
704+
705+
/**
706+
* Extract readable article content from HTML.
707+
* Uses heuristics to find the main article body.
708+
*/
709+
function extractArticleContent(html: string): { title: string; content: string } {
710+
// Extract title
711+
const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i)
712+
|| html.match(/<h1[^>]*>([^<]+)<\/h1>/i)
713+
|| html.match(/<meta[^>]+property="og:title"[^>]+content="([^"]+)"/i)
714+
const title = titleMatch ? cleanHtml(titleMatch[1]) : 'Sem titulo'
715+
716+
// Try to find article content using common patterns
717+
let articleHtml = ''
718+
719+
// Strategy 1: Look for <article> tag
720+
const articleMatch = html.match(/<article[^>]*>([\s\S]*?)<\/article>/i)
721+
if (articleMatch) {
722+
articleHtml = articleMatch[1]
723+
}
724+
725+
// Strategy 2: Look for common content containers
726+
if (!articleHtml) {
727+
const patterns = [
728+
/<div[^>]+class="[^"]*(?:article-body|post-content|entry-content|story-body|content-body|article-content|news-content|materia-corpo)[^"]*"[^>]*>([\s\S]*?)<\/div>/i,
729+
/<div[^>]+itemprop="articleBody"[^>]*>([\s\S]*?)<\/div>/i,
730+
/<main[^>]*>([\s\S]*?)<\/main>/i,
731+
]
732+
for (const pattern of patterns) {
733+
const match = html.match(pattern)
734+
if (match) {
735+
articleHtml = match[1]
736+
break
737+
}
738+
}
739+
}
740+
741+
// Strategy 3: Extract all paragraphs as fallback
742+
if (!articleHtml) {
743+
const paragraphs: string[] = []
744+
const pRegex = /<p[^>]*>([\s\S]*?)<\/p>/gi
745+
let pMatch: RegExpExecArray | null
746+
while ((pMatch = pRegex.exec(html)) !== null) {
747+
const text = cleanHtml(pMatch[1]).trim()
748+
// Filter out short paragraphs (likely navigation, ads)
749+
if (text.length > 50) {
750+
paragraphs.push(text)
751+
}
752+
}
753+
articleHtml = paragraphs.join('\n\n')
754+
} else {
755+
// Clean extracted article HTML
756+
articleHtml = extractParagraphs(articleHtml)
757+
}
758+
759+
// Clean and format content
760+
const content = articleHtml.trim() || 'Nao foi possivel extrair o conteudo do artigo.'
761+
762+
return { title, content }
763+
}
764+
765+
/**
766+
* Extract paragraphs from HTML content block.
767+
*/
768+
function extractParagraphs(html: string): string {
769+
const paragraphs: string[] = []
770+
const pRegex = /<p[^>]*>([\s\S]*?)<\/p>/gi
771+
let match: RegExpExecArray | null
772+
773+
while ((match = pRegex.exec(html)) !== null) {
774+
const text = cleanHtml(match[1]).trim()
775+
if (text.length > 20) { // Skip very short paragraphs
776+
paragraphs.push(text)
777+
}
778+
}
779+
780+
// If no paragraphs found, try to extract text directly
781+
if (paragraphs.length === 0) {
782+
const cleanedText = cleanHtml(html).trim()
783+
if (cleanedText.length > 50) {
784+
return cleanedText
785+
}
786+
}
787+
788+
return paragraphs.join('\n\n')
789+
}

src/tui.ts

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -716,18 +716,20 @@ export class TUI {
716716
// ── News Picker ────────────────────────────────────────────
717717

718718
/**
719-
* Interactive news picker. Navigate with W/S or arrows, Enter to open in browser.
720-
* Returns the selected item's link or null if cancelled.
719+
* Interactive news picker. Navigate with W/S or arrows.
720+
* - Enter: open in browser
721+
* - Ctrl+Enter: fetch and read content in assistant
722+
* Returns NewsPickerResult or null if cancelled.
721723
*/
722724
promptNewsPicker(
723725
items: NewsPickerEntry[],
724-
): Promise<string | null> {
726+
): Promise<NewsPickerResult | null> {
725727
if (items.length === 0) {
726728
this.showSystem('Nenhuma noticia encontrada.')
727729
return Promise.resolve(null)
728730
}
729731

730-
return new Promise<string | null>((resolve) => {
732+
return new Promise<NewsPickerResult | null>((resolve) => {
731733
this.pickerActive = true
732734
let cursor = 0
733735
let filter = ''
@@ -832,10 +834,10 @@ export class TUI {
832834
const hintRow = this.height - footerH
833835
w(A.to(hintRow, 1))
834836
w(A.clearLine)
835-
w(` ${A.dim}W/S or \u2191\u2193 navigate Enter open Esc cancel / filter Tab category${A.reset}`)
837+
w(` ${A.dim}↑↓ navegar Enter abrir Ctrl+Enter ler aqui Esc cancelar / filtrar Tab categoria${A.reset}`)
836838
}
837839

838-
const cleanup = (result: string | null): void => {
840+
const cleanup = (result: NewsPickerResult | null): void => {
839841
this.pickerActive = false
840842
process.stdin.removeListener('data', handler)
841843
this.renderAll()
@@ -846,7 +848,7 @@ export class TUI {
846848
const key = data.toString('utf-8')
847849
const list = filtered()
848850

849-
// Esc
851+
// Esc (bare escape, not part of sequence)
850852
if (key === '\x1b' && data.length === 1) {
851853
if (filterMode) {
852854
filterMode = false
@@ -864,10 +866,19 @@ export class TUI {
864866
return
865867
}
866868

867-
// Enter — open link
869+
// Ctrl+Enter (Ctrl+J = \x0a or some terminals send \x1b\r or similar)
870+
// Common Ctrl+Enter sequences: \x0a (Ctrl+J), \x1b\r, \x1bOM
871+
if (key === '\x0a' || key === '\x1b\r' || key === '\x1bOM') {
872+
if (list.length > 0 && cursor < list.length) {
873+
cleanup({ action: 'read', link: list[cursor].link })
874+
}
875+
return
876+
}
877+
878+
// Regular Enter — open in browser
868879
if (key === '\r' || key === '\n') {
869880
if (list.length > 0 && cursor < list.length) {
870-
cleanup(list[cursor].link)
881+
cleanup({ action: 'open', link: list[cursor].link })
871882
}
872883
return
873884
}
@@ -1961,6 +1972,10 @@ export interface NewsPickerEntry {
19611972
time: string // formatted time string e.g. "21:30"
19621973
}
19631974

1975+
export type NewsPickerResult =
1976+
| { action: 'open'; link: string } // Enter: open in browser
1977+
| { action: 'read'; link: string } // Ctrl+Enter: fetch and read content
1978+
19641979
function formatPickerAge(timestamp: number): string {
19651980
const diff = Date.now() - timestamp
19661981
const secs = Math.floor(diff / 1000)

tests/news.test.ts

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { join } from 'node:path'
44
import { tmpdir } from 'node:os'
55
import {
66
getNewsCategories, initNews, addNewsFeed, removeNewsFeed,
7-
disableNewsFeed, enableNewsFeed, listNewsFeeds,
7+
disableNewsFeed, enableNewsFeed, listNewsFeeds, fetchNewsContent,
88
} from '../src/news'
99

1010
const TEST_DIR = join(tmpdir(), `smolerclaw-news-test-${Date.now()}`)
@@ -191,3 +191,35 @@ describe('news — feed management', () => {
191191
expect(removeNewsFeed('myfeed')).toBe(true)
192192
})
193193
})
194+
195+
describe('news — fetchNewsContent', () => {
196+
test('fetchNewsContent rejects invalid URL', async () => {
197+
const result = await fetchNewsContent('ftp://example.com')
198+
expect(typeof result).toBe('string')
199+
expect(result).toContain('URL invalida')
200+
})
201+
202+
test('fetchNewsContent rejects javascript: URL', async () => {
203+
const result = await fetchNewsContent('javascript:alert(1)')
204+
expect(typeof result).toBe('string')
205+
expect(result).toContain('URL invalida')
206+
})
207+
208+
test('fetchNewsContent handles unreachable host', async () => {
209+
const result = await fetchNewsContent('https://this-domain-does-not-exist-12345.test/')
210+
expect(typeof result).toBe('string')
211+
expect(result).toContain('Error')
212+
})
213+
214+
test('fetchNewsContent returns title and content for valid article', async () => {
215+
// Use a simple, stable public page for testing
216+
const result = await fetchNewsContent('https://example.com')
217+
// Should either succeed or return an error string
218+
if (typeof result === 'string') {
219+
expect(result).toContain('Error')
220+
} else {
221+
expect(result).toHaveProperty('title')
222+
expect(result).toHaveProperty('content')
223+
}
224+
})
225+
})

0 commit comments

Comments
 (0)