@@ -2,6 +2,7 @@ package cmd
22
33import (
44 "bytes"
5+ "errors"
56 "fmt"
67 "image"
78 "image/color"
@@ -25,6 +26,7 @@ import (
2526
2627 md "github.com/JohannesKaufmann/html-to-markdown"
2728 mdplug "github.com/JohannesKaufmann/html-to-markdown/plugin"
29+ "github.com/emersion/go-message"
2830 "github.com/spf13/cobra"
2931 "golang.org/x/crypto/ssh/terminal"
3032
3739 noPretty bool
3840 noReadability bool
3941 noCycleTLS bool
42+ isEML bool
43+ rawOut bool
4044 imageMode string
4145 terminalWidth int
4246 validImageModes = []string {"none" , "ansi" , "ansi-dither" , "kitty" , "sixel" }
@@ -47,8 +51,10 @@ type InlineImage struct {
4751 Title string
4852}
4953
50- var mdImgRegex = regexp .MustCompile (`(?m)\[{0,1}!\[(:?\]\(.*\)){0,1}(.*)\]\((.+)\)` )
51- var mdImgPlaceholderRegex = regexp .MustCompile (`(?m)\$\$\$([0-9]*)\$` )
54+ var (
55+ mdImgRegex = regexp .MustCompile (`(?m)\[{0,1}!\[(:?\]\(.*\)){0,1}(.*)\]\((.+)\)` )
56+ mdImgPlaceholderRegex = regexp .MustCompile (`(?m)\$\$\$([0-9]*)\$` )
57+ )
5258
5359func MakeReadable (rawUrl * string , logger * zap.Logger , cycleTLS bool ) (string , string , error ) {
5460 var crwlr * crawler.Crawler = crawler .New (logger )
@@ -73,7 +79,93 @@ func MakeReadable(rawUrl *string, logger *zap.Logger, cycleTLS bool) (string, st
7379 return article .Title , article .ContentHtml , nil
7480}
7581
76- func HTMLtoMarkdown (html * string ) (string , error ) {
82+ func ioReaderToString (r io.Reader ) (string , error ) {
83+ buf := make ([]byte , 8 )
84+ var text strings.Builder
85+
86+ for {
87+ n , err := r .Read (buf )
88+ if n > 0 {
89+ text .Write (buf [:n ])
90+ }
91+ if err == io .EOF {
92+ break
93+ } else if err != nil {
94+ return "" , err
95+ }
96+ }
97+
98+ return text .String (), nil
99+ }
100+
101+ func EMLToMarkdown (eml * string , rawOutput bool ) (string , error ) {
102+ m , err := message .Read (strings .NewReader (* eml ))
103+ if message .IsUnknownCharset (err ) {
104+ // TODO
105+ } else if err != nil {
106+ return "" , err
107+ }
108+
109+ var txt string = ""
110+ if mr := m .MultipartReader (); mr != nil {
111+ var noCT bool = false
112+
113+ for {
114+ p , err := mr .NextPart ()
115+ if err == io .EOF {
116+ break
117+ } else if err != nil {
118+ return "" , err
119+ }
120+
121+ t , _ , _ := p .Header .ContentType ()
122+ if t != "text/html" {
123+ noCT = true
124+ continue
125+ }
126+ if txt , err = ioReaderToString (p .Body ); err != nil {
127+ return "" , err
128+ }
129+ noCT = false
130+ break
131+ }
132+ if noCT {
133+ return "" , errors .New (
134+ fmt .Sprintf ("Expected text/html content type, found others\n " ),
135+ )
136+ }
137+ } else {
138+ t , _ , _ := m .Header .ContentType ()
139+ if t != "text/html" {
140+ return "" , errors .New (
141+ fmt .Sprintf ("Expected text/html content type, found: %s\n " , t ),
142+ )
143+ }
144+ if txt , err = ioReaderToString (m .Body ); err != nil {
145+ return "" , err
146+ }
147+ }
148+
149+ if rawOutput {
150+ return txt , nil
151+ }
152+
153+ converter := md .NewConverter ("" , true , nil )
154+ converter .Use (mdplug .GitHubFlavored ())
155+
156+ markdown , err := converter .ConvertString (txt )
157+ if err != nil {
158+ return "" , err
159+ }
160+
161+ return markdown , nil
162+ }
163+
164+ func HTMLtoMarkdown (html * string , rawOutput bool ) (string , error ) {
165+ if rawOutput {
166+ return * html , nil
167+ }
168+
77169 converter := md .NewConverter ("" , true , nil )
78170 converter .Use (mdplug .GitHubFlavored ())
79171
@@ -111,7 +203,6 @@ func RenderImg(md string) (string, []InlineImage, error) {
111203}
112204
113205func renderImage (img image.Image , imgTitle string , mode string , width int ) (string , error ) {
114-
115206 switch mode {
116207 case "sixel" :
117208 var b bytes.Buffer
@@ -150,16 +241,14 @@ func renderImage(img image.Image, imgTitle string, mode string, width int) (stri
150241}
151242
152243func RenderMarkdown (title , markdown string , images []InlineImage , width int ) (string , error ) {
153-
154244 renderer , _ := glamour .NewTermRenderer (
155245 glamour .WithEnvironmentConfig (),
156246 glamour .WithWordWrap (width ),
157247 )
158248
159- output , err :=
160- renderer .Render (
161- fmt .Sprintf ("# %s\n \n %s" , title , markdown ),
162- )
249+ output , err := renderer .Render (
250+ fmt .Sprintf ("# %s\n \n %s" , title , markdown ),
251+ )
163252 if err != nil {
164253 output = fmt .Sprintf ("%v" , err )
165254 } else {
@@ -212,7 +301,7 @@ func RenderMarkdown(title, markdown string, images []InlineImage, width int) (st
212301}
213302
214303var rootCmd = & cobra.Command {
215- Use : "reader <url/file/->" ,
304+ Use : "reader < url/file/- >" ,
216305 Short : "Reader is a command line web reader" ,
217306 Long : "A minimal command line reader offering better readability of web " +
218307 "pages on the CLI. [https://github.com/mrusme/reader]" ,
@@ -250,18 +339,30 @@ var rootCmd = &cobra.Command{
250339 }
251340 }
252341
342+ if isEML {
343+ noReadability = true
344+ }
253345 title , content , err := MakeReadable (& rawUrl , logger , ! noCycleTLS )
254346 if err != nil {
255347 fmt .Fprintln (os .Stderr , err )
256348 os .Exit (1 )
257349 }
258350
259- markdown , err := HTMLtoMarkdown (& content )
351+ var markdown string = ""
352+ if isEML {
353+ markdown , err = EMLToMarkdown (& content , rawOut )
354+ } else {
355+ markdown , err = HTMLtoMarkdown (& content , rawOut )
356+ }
260357 if err != nil {
261358 fmt .Fprintln (os .Stderr , err )
262359 os .Exit (1 )
263360 }
264361
362+ if rawOut == true {
363+ fmt .Print (markdown )
364+ os .Exit (0 )
365+ }
265366 if noPretty == true {
266367 fmt .Printf ("# %s\n \n " , title )
267368 fmt .Print (markdown )
@@ -301,6 +402,18 @@ func Execute() {
301402 false ,
302403 "disable use of CycleTLS" ,
303404 )
405+ rootCmd .Flags ().BoolVar (
406+ & isEML ,
407+ "eml" ,
408+ false ,
409+ "input is EML (email) format" ,
410+ )
411+ rootCmd .Flags ().BoolVar (
412+ & rawOut ,
413+ "raw" ,
414+ false ,
415+ "output raw text" ,
416+ )
304417 rootCmd .Flags ().BoolVarP (
305418 & verbose ,
306419 "verbose" ,
0 commit comments