1+ // package lolhtml provides the ability to rewrite or parse HTML on the fly,
2+ // with CSS-selector based API.
3+
4+ // It is a binding for Rust crate lol_html.
5+ // https://github.com/cloudflare/lol-html
16package lolhtml
27
38/*
@@ -27,29 +32,42 @@ import (
2732
2833var ErrCannotGetErrorMessage = errors .New ("cannot get error message from underlying lol-html lib" )
2934
35+ // RewriterDirective should returned by callback handlers, to inform the rewriter to continue or stop parsing.
3036type RewriterDirective int
3137
3238const (
39+ // Let the normal parsing process continue.
3340 Continue RewriterDirective = iota
41+
42+ // Stop the rewriter immediately. Content currently buffered is discarded, and an error is returned.
3443 Stop
3544)
3645
46+ // rewriterBuilder is used to build a rewriter.
3747type rewriterBuilder C.lol_html_rewriter_builder_t
3848
39- type Rewriter C.lol_html_rewriter_t
49+ // rewriter represents an actual HTML rewriter.
50+ // rewriterBuilder, rewriter and selector are kept private to simplify public API.
51+ // If you find it useful to use them publicly, please inform me.
52+ type rewriter C.lol_html_rewriter_t
53+
54+ // selector represents a parsed CSS selector.
55+ type selector C.lol_html_selector_t
4056
4157type Doctype C.lol_html_doctype_t
4258type DocumentEnd C.lol_html_doc_end_t
4359type Comment C.lol_html_comment_t
4460type TextChunk C.lol_html_text_chunk_t
4561type Element C.lol_html_element_t
62+ // AttributeIterator cannot be iterated by "range" syntax. You should use AttributeIterator.Next() instead.
4663type AttributeIterator C.lol_html_attributes_iterator_t
4764type Attribute C.lol_html_attribute_t
4865
49- type selector C.lol_html_selector_t
5066type str C.lol_html_str_t
67+ // textChunkContent does not need to be de-allocated manually.
5168type textChunkContent C.lol_html_text_chunk_content_t
5269
70+ // OutputSink takes each chunked output as a byte slice.
5371type OutputSink func ([]byte )
5472
5573type DoctypeHandlerFunc func (* Doctype ) RewriterDirective
@@ -58,10 +76,15 @@ type TextChunkHandlerFunc func(*TextChunk) RewriterDirective
5876type ElementHandlerFunc func (* Element ) RewriterDirective
5977type DocumentEndHandlerFunc func (* DocumentEnd ) RewriterDirective
6078
79+ // Config defines settings for the rewriter.
6180type Config struct {
81+ // defaults to "utf-8".
6282 Encoding string
83+ // defaults to PreallocatedParsingBufferSize: 1024, MaxAllowedMemoryUsage: 1<<63 - 1.
6384 Memory * MemorySettings
85+ // defaults to func([]byte) {}. In other words, totally discard output.
6486 Sink OutputSink
87+ // defaults to true. If true, bail out for security reasons when ambiguous.
6588 Strict bool
6689}
6790
@@ -101,6 +124,7 @@ type Handlers struct {
101124 ElementContentHandler []ElementContentHandler
102125}
103126
127+ // RewriteString rewrites the given string with the provided Handlers and Config.
104128func RewriteString (s string , h * Handlers , config ... Config ) (string , error ) {
105129 var buf bytes.Buffer
106130 var w * Writer
@@ -130,9 +154,10 @@ func RewriteString(s string, h *Handlers, config ...Config) (string, error) {
130154
131155type Writer struct {
132156 w io.Writer
133- r * Rewriter
157+ r * rewriter
134158}
135159
160+ // NewWriter returns a new Writer with Handlers and Config configured, writing to w.
136161func NewWriter (w io.Writer , handlers * Handlers , config ... Config ) (* Writer , error ) {
137162 var c Config
138163 var sink OutputSink
@@ -290,7 +315,7 @@ func (rb *rewriterBuilder) AddElementContentHandlers(
290315 )
291316}
292317
293- func (rb * rewriterBuilder ) Build (sink OutputSink , config Config ) (* Rewriter , error ) {
318+ func (rb * rewriterBuilder ) Build (sink OutputSink , config Config ) (* rewriter , error ) {
294319 encodingC := C .CString (config .Encoding )
295320 defer C .free (unsafe .Pointer (encodingC ))
296321 encodingLen := len (config .Encoding )
@@ -299,7 +324,7 @@ func (rb *rewriterBuilder) Build(sink OutputSink, config Config) (*Rewriter, err
299324 max_allowed_memory_usage : C .size_t (config .Memory .MaxAllowedMemoryUsage ),
300325 }
301326 p := pointer .Save (sink )
302- r := (* Rewriter )(C .lol_html_rewriter_build (
327+ r := (* rewriter )(C .lol_html_rewriter_build (
303328 (* C .lol_html_rewriter_builder_t )(rb ),
304329 encodingC ,
305330 C .size_t (encodingLen ),
@@ -314,7 +339,7 @@ func (rb *rewriterBuilder) Build(sink OutputSink, config Config) (*Rewriter, err
314339 return nil , getError ()
315340}
316341
317- func (r * Rewriter ) Write (p []byte ) (n int , err error ) {
342+ func (r * rewriter ) Write (p []byte ) (n int , err error ) {
318343 pLen := len (p )
319344 // avoid 0-sized array
320345 if pLen == 0 {
@@ -328,7 +353,7 @@ func (r *Rewriter) Write(p []byte) (n int, err error) {
328353 return 0 , getError ()
329354}
330355
331- func (r * Rewriter ) WriteString (chunk string ) (n int , err error ) {
356+ func (r * rewriter ) WriteString (chunk string ) (n int , err error ) {
332357 chunkC := C .CString (chunk )
333358 defer C .free (unsafe .Pointer (chunkC ))
334359 chunkLen := len (chunk )
@@ -339,15 +364,15 @@ func (r *Rewriter) WriteString(chunk string) (n int, err error) {
339364 return 0 , getError ()
340365}
341366
342- func (r * Rewriter ) End () error {
367+ func (r * rewriter ) End () error {
343368 errCode := C .lol_html_rewriter_end ((* C .lol_html_rewriter_t )(r ))
344369 if errCode == 0 {
345370 return nil
346371 }
347372 return getError ()
348373}
349374
350- func (r * Rewriter ) Free () {
375+ func (r * rewriter ) Free () {
351376 if r != nil {
352377 C .lol_html_rewriter_free ((* C .lol_html_rewriter_t )(r ))
353378 }
@@ -392,7 +417,7 @@ func (c *Comment) SetText(text string) error {
392417 return getError ()
393418}
394419
395- func (c * Comment ) InsertBeforeAsRaw (content string ) error {
420+ func (c * Comment ) InsertBeforeAsText (content string ) error {
396421 contentC := C .CString (content )
397422 defer C .free (unsafe .Pointer (contentC ))
398423 contentLen := len (content )
@@ -414,7 +439,7 @@ func (c *Comment) InsertBeforeAsHtml(content string) error {
414439 return getError ()
415440}
416441
417- func (c * Comment ) InsertAfterAsRaw (content string ) error {
442+ func (c * Comment ) InsertAfterAsText (content string ) error {
418443 contentC := C .CString (content )
419444 defer C .free (unsafe .Pointer (contentC ))
420445 contentLen := len (content )
@@ -436,7 +461,7 @@ func (c *Comment) InsertAfterAsHtml(content string) error {
436461 return getError ()
437462}
438463
439- func (c * Comment ) ReplaceAsRaw (content string ) error {
464+ func (c * Comment ) ReplaceAsText (content string ) error {
440465 contentC := C .CString (content )
441466 defer C .free (unsafe .Pointer (contentC ))
442467 contentLen := len (content )
@@ -475,7 +500,7 @@ func (t *TextChunk) IsLastInTextNode() bool {
475500 return (bool )(C .lol_html_text_chunk_is_last_in_text_node ((* C .lol_html_text_chunk_t )(t )))
476501}
477502
478- func (t * TextChunk ) InsertBeforeAsRaw (content string ) error {
503+ func (t * TextChunk ) InsertBeforeAsText (content string ) error {
479504 contentC := C .CString (content )
480505 defer C .free (unsafe .Pointer (contentC ))
481506 contentLen := len (content )
@@ -497,7 +522,7 @@ func (t *TextChunk) InsertBeforeAsHtml(content string) error {
497522 return getError ()
498523}
499524
500- func (t * TextChunk ) InsertAfterAsRaw (content string ) error {
525+ func (t * TextChunk ) InsertAfterAsText (content string ) error {
501526 contentC := C .CString (content )
502527 defer C .free (unsafe .Pointer (contentC ))
503528 contentLen := len (content )
@@ -519,7 +544,7 @@ func (t *TextChunk) InsertAfterAsHtml(content string) error {
519544 return getError ()
520545}
521546
522- func (t * TextChunk ) ReplaceAsRaw (content string ) error {
547+ func (t * TextChunk ) ReplaceAsText (content string ) error {
523548 contentC := C .CString (content )
524549 defer C .free (unsafe .Pointer (contentC ))
525550 contentLen := len (content )
@@ -634,7 +659,7 @@ func (e *Element) RemoveAttribute(name string) error {
634659 return getError ()
635660}
636661
637- func (e * Element ) InsertBeforeStartTagAsRaw (content string ) error {
662+ func (e * Element ) InsertBeforeStartTagAsText (content string ) error {
638663 contentC := C .CString (content )
639664 defer C .free (unsafe .Pointer (contentC ))
640665 contentLen := len (content )
@@ -656,7 +681,7 @@ func (e *Element) InsertBeforeStartTagAsHtml(content string) error {
656681 return getError ()
657682}
658683
659- func (e * Element ) InsertAfterStartTagAsRaw (content string ) error {
684+ func (e * Element ) InsertAfterStartTagAsText (content string ) error {
660685 contentC := C .CString (content )
661686 defer C .free (unsafe .Pointer (contentC ))
662687 contentLen := len (content )
@@ -678,7 +703,7 @@ func (e *Element) InsertAfterStartTagAsHtml(content string) error {
678703 return getError ()
679704}
680705
681- func (e * Element ) InsertBeforeEndTagAsRaw (content string ) error {
706+ func (e * Element ) InsertBeforeEndTagAsText (content string ) error {
682707 contentC := C .CString (content )
683708 defer C .free (unsafe .Pointer (contentC ))
684709 contentLen := len (content )
@@ -700,7 +725,7 @@ func (e *Element) InsertBeforeEndTagAsHtml(content string) error {
700725 return getError ()
701726}
702727
703- func (e * Element ) InsertAfterEndTagAsRaw (content string ) error {
728+ func (e * Element ) InsertAfterEndTagAsText (content string ) error {
704729 contentC := C .CString (content )
705730 defer C .free (unsafe .Pointer (contentC ))
706731 contentLen := len (content )
@@ -722,7 +747,7 @@ func (e *Element) InsertAfterEndTagAsHtml(content string) error {
722747 return getError ()
723748}
724749
725- func (e * Element ) SetInnerContentAsRaw (content string ) error {
750+ func (e * Element ) SetInnerContentAsText (content string ) error {
726751 contentC := C .CString (content )
727752 defer C .free (unsafe .Pointer (contentC ))
728753 contentLen := len (content )
@@ -744,7 +769,7 @@ func (e *Element) SetInnerContentAsHtml(content string) error {
744769 return getError ()
745770}
746771
747- func (e * Element ) ReplaceAsRaw (content string ) error {
772+ func (e * Element ) ReplaceAsText (content string ) error {
748773 contentC := C .CString (content )
749774 defer C .free (unsafe .Pointer (contentC ))
750775 contentLen := len (content )
@@ -798,7 +823,7 @@ func (a *Attribute) Value() string {
798823 return strToGoString2 (valueC )
799824}
800825
801- func (d * DocumentEnd ) AppendAsRaw (content string ) error {
826+ func (d * DocumentEnd ) AppendAsText (content string ) error {
802827 contentC := C .CString (content )
803828 defer C .free (unsafe .Pointer (contentC ))
804829 contentLen := len (content )
0 commit comments