@@ -113,6 +113,7 @@ func (auto *Automaton) ToDoubleArray() *DaTokenizer {
113113 var atrans * edge
114114 var s , s1 int
115115 var t , t1 uint32
116+ var diff int
116117
117118 // Create a mapping from s (in Ms aka Intermediate FSA)
118119 // to t (in Mt aka Double Array FSA)
@@ -215,9 +216,10 @@ func (auto *Automaton) ToDoubleArray() *DaTokenizer {
215216 // Store a final transition
216217 dat .array [base + uint32 (dat .final )].setCheck (t )
217218
218- if dat .maxSize < int (base )+ dat .final {
219- dat .maxSize = int (base ) + dat .final
220- }
219+ // Find max
220+ // see https://dev.to/jobinrjohnson/branchless-programming-does-it-really-matter-20j4
221+ diff = dat .maxSize - (int (base ) + dat .final )
222+ dat .maxSize -= (diff & (diff >> 31 ))
221223 }
222224 }
223225 }
@@ -461,6 +463,8 @@ func (dat *DaTokenizer) TransCount() int {
461463
462464 dat .transCount = 0
463465 for x := 1 ; x < len (dat .array ); x ++ {
466+
467+ // Hopefully branchless
464468 if dat .array [x ].getBase () != 0 {
465469 dat .transCount ++
466470 }
@@ -512,9 +516,12 @@ func (dat *DaTokenizer) WriteTo(w io.Writer) (n int64, err error) {
512516 max := 0
513517 for sym , num := range dat .sigma {
514518 sigmalist [num ] = sym
515- if num > max {
516- max = num
517- }
519+
520+ // Find max
521+ max -= ((max - num ) & ((max - num ) >> 31 ))
522+ // if num > max {
523+ // max = num
524+ // }
518525 }
519526
520527 sigmalist = sigmalist [:max + 1 ]
@@ -852,9 +859,7 @@ PARSECHAR:
852859 // Better not repeatedly check for a!
853860 // Possibly keep a buffer with a.
854861 if int (char ) < 256 {
855- if int (char ) == EOT {
856- eot = true
857- }
862+ eot = int (char ) == EOT
858863 a = dat .sigmaASCII [int (char )]
859864 } else {
860865 a , ok = dat .sigma [char ]
@@ -933,6 +938,7 @@ PARSECHAR:
933938 // token and start blank at the root node of the automaton for the remaining data.
934939 // It may be beneficial to have something like a "drop()" event to capture these cases,
935940 // as they are likely the result of a bad automaton design.
941+ // Hopefully this is branchless code
936942 if buffc - bufft <= 0 {
937943 buffc ++
938944 if buffc == 0 {
@@ -953,9 +959,7 @@ PARSECHAR:
953959 log .Println ("-> Rewind buffer" , bufft , buffc , buffi , epsilonOffset )
954960 }
955961
956- for x , i := range buffer [buffc :buffi ] {
957- buffer [x ] = i
958- }
962+ copy (buffer [0 :], buffer [buffc :buffi ])
959963
960964 buffi -= buffc
961965 epsilonState = 0
@@ -986,6 +990,7 @@ PARSECHAR:
986990 buffc ++
987991
988992 // Transition does not produce a character
993+ // Hopefully this is branchless
989994 if buffc - bufft == 1 && ta .isNonToken () {
990995 if DEBUG {
991996 log .Println ("Nontoken forward" , showBufferNew (buffer , bufft , buffc , buffi ))
@@ -1028,10 +1033,7 @@ PARSECHAR:
10281033 }
10291034
10301035 // TODO: Better as a ring buffer
1031- // buffer = buffer[buffc:] !slower
1032- for x , i := range buffer [buffc :buffi ] {
1033- buffer [x ] = i
1034- }
1036+ copy (buffer [0 :], buffer [buffc :buffi ])
10351037
10361038 buffi -= buffc
10371039 // epsilonOffset -= buffo
0 commit comments