Skip to content

Commit b43bee1

Browse files
committed
[xml] Optimize character class BitSet creation
The measured gains are not as drastic as making this change in a different scala-2.12 library, creating one length strings and comparing them to a regex, or concatenating ranges into a list just to convert that list into a bitset is more work than adding those ranges to a BitSet builder.
1 parent 22154e4 commit b43bee1

File tree

1 file changed

+34
-18
lines changed

1 file changed

+34
-18
lines changed

XmlParser/src/main/scala/XmlParser.scala

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -25,28 +25,44 @@ private[xml] object XmlParser {
2525
final case class InterpolatedAttribute(data: Expr[Any]) extends ThingInAttributePosition
2626

2727

28-
val xmlNameFirstChar:BitSet = {
29-
import scala.Predef.intWrapper
30-
val pattern = java.util.regex.Pattern.compile("""[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\x{10000}-\x{EFFFF}]""")
28+
private val xmlNameFirstChar:BitSet = {
3129
val builder = BitSet.newBuilder
32-
(0 until 0x110000)
33-
.filter(x => pattern.matcher(new String(Array[Int](x), 0, 1)).matches)
34-
.foreach(builder.+= _)
35-
builder.result
30+
builder ++= 'A'.toInt to 'Z'
31+
builder += '_'
32+
builder ++= 'a'.toInt to 'z'
33+
builder ++= 0x00C0 to 0x00D6
34+
builder ++= 0x00D8 to 0x00F6
35+
builder ++= 0x00F8 to 0x02FF
36+
builder ++= 0x0370 to 0x037D
37+
builder ++= 0x037F to 0x1FFF
38+
builder ++= 0x200C to 0x200D
39+
builder ++= 0x2070 to 0x218F
40+
builder ++= 0x2C00 to 0x2FEF
41+
builder ++= 0x3001 to 0xD7FF
42+
builder ++= 0xF900 to 0xFDCF
43+
builder ++= 0xFDF0 to 0xFFFD
44+
builder ++= 0x10000 to 0xEFFFF
45+
builder.result()
3646
}
37-
val xmlNameRestChar:BitSet = {
38-
import scala.Predef.intWrapper
39-
val pattern = java.util.regex.Pattern.compile("""[\-\.0-9\u00B7\u0300-\u036F\u203F-\u2040]""")
47+
private val xmlNameRestChar:BitSet = {
4048
val builder = BitSet.newBuilder
41-
(0 until 0x110000)
42-
.filter(x => pattern.matcher(new String(Array[Int](x), 0, 1)).matches)
43-
.foreach(builder.+= _)
44-
builder.result
49+
builder += '-'
50+
builder += '.'
51+
builder ++= '0'.toInt to '9'
52+
builder += 0x00B7
53+
builder ++= 0x0300 to 0x036F
54+
builder ++= 0x203F to 0x2040
55+
builder.result()
4556
}
46-
val xmlAllowedChar:BitSet = {
47-
import scala.Predef.intWrapper
48-
val bits = Seq(0x9, 0xA, 0xD) ++ (0x20 to 0xD7FF) ++ (0xE000 to 0xFFFD) ++ (0x10000 to 0x10FFFF)
49-
BitSet(bits:_*)
57+
private val xmlAllowedChar:BitSet = {
58+
val builder = BitSet.newBuilder
59+
builder += 0x9
60+
builder += 0xA
61+
builder += 0xD
62+
builder ++= 0x20 to 0xD7FF
63+
builder ++= 0xE000 to 0xFFFD
64+
builder ++= 0x10000 to 0x10FFFF
65+
builder.result()
5066
}
5167

5268
private val whitespace:Interpolator[Unit] = charIn("\n\r\t ").map(_ => ())

0 commit comments

Comments
 (0)