Skip to content

Commit 2a2f265

Browse files
RuslanLameta-codesync[bot]
authored andcommitted
Add lexer token counting to kotlincd_actions telemetry
Summary: Add num_kotlin_tokens and num_java_tokens fields to the kotlincd_actions Scuba table. Token counts are computed at compilation time using KotlinLexer for .kt/.kts files and StreamTokenizer for .java files. This enables measuring Kotlin build speed normalized by token count (tokens/second) to identify abnormally slow targets and compare across apps. Token counting is best-effort — failures are caught and logged as extras without breaking the build. Performance impact: benchmarking with a realistic workload (200 .kt files at ~6.7 KB each + 50 .java files at ~5.4 KB each, ~1.6 MB total) shows token counting completes in ~22 ms on average (~0.09 ms per file). This is negligible relative to compilation time (seconds to minutes), and source file contents will already be in the OS page cache since the compiler reads the same files immediately after. Differential Revision: D94540188 fbshipit-source-id: d49877ae92a87b5c061618ff74299d39cbb1d111
1 parent 07ae185 commit 2a2f265

8 files changed

Lines changed: 251 additions & 0 deletions

File tree

prelude/toolchains/android/src/com/facebook/buck/jvm/kotlin/buildtools/BuildToolsKotlinc.kt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import com.facebook.buck.core.filesystems.RelPath
1616
import com.facebook.buck.core.util.log.Logger
1717
import com.facebook.buck.jvm.core.BuildTargetValue
1818
import com.facebook.buck.jvm.kotlin.cd.analytics.KotlinCDLoggingContext
19+
import com.facebook.buck.jvm.kotlin.cd.analytics.SourceTokenCounter
1920
import com.facebook.buck.jvm.kotlin.kotlinc.Kotlinc
2021
import com.facebook.buck.jvm.kotlin.kotlinc.incremental.KotlincMode
2122
import com.facebook.buck.util.ClassLoaderCache
@@ -134,6 +135,18 @@ class BuildToolsKotlinc : Kotlinc {
134135
)
135136
}
136137

138+
try {
139+
val tokenCounts = SourceTokenCounter.countTokens(expandedSources, ruleCellRoot.path)
140+
kotlinCDLoggingContext.numKotlinTokens = tokenCounts.kotlinTokens
141+
kotlinCDLoggingContext.numJavaTokens = tokenCounts.javaTokens
142+
} catch (e: Exception) {
143+
// Token counting is best-effort telemetry; don't fail the build
144+
kotlinCDLoggingContext.addExtras(
145+
BuildToolsKotlinc::class.java.simpleName,
146+
"Token counting failed: ${e.message}",
147+
)
148+
}
149+
137150
val resolvedExpandedSources =
138151
expandedSources.map { path -> ruleCellRoot.resolve(path).toString() }
139152

prelude/toolchains/android/src/com/facebook/buck/jvm/kotlin/cd/analytics/BUCK

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,6 @@ buck_kotlin_library(
1717
deps = [
1818
"prelude//toolchains/android/src/com/facebook/buck/core/filesystems:filesystems",
1919
"prelude//toolchains/android/src/com/facebook/buck/jvm/cd/command/kotlin:language-version",
20+
"prelude//toolchains/android/third-party:kotlin-compiler",
2021
],
2122
)

prelude/toolchains/android/src/com/facebook/buck/jvm/kotlin/cd/analytics/KotlinCDLoggingContext.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ class KotlinCDLoggingContext(
2121

2222
val classpathChangesParam = (mode as? ModeParam.Incremental)?.classpathChangesParam
2323

24+
var numKotlinTokens: Long = 0L
25+
var numJavaTokens: Long = 0L
26+
2427
private val _extras: MutableMap<String, MutableList<String>> = mutableMapOf()
2528
val extras: Map<String, List<String>>
2629
get() = _extras
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* This source code is dual-licensed under either the MIT license found in the
5+
* LICENSE-MIT file in the root directory of this source tree or the Apache
6+
* License, Version 2.0 found in the LICENSE-APACHE file in the root directory
7+
* of this source tree. You may select, at your option, one of the
8+
* above-listed licenses.
9+
*/
10+
11+
package com.facebook.buck.jvm.kotlin.cd.analytics
12+
13+
import java.nio.file.Files
14+
import java.nio.file.Path
15+
import org.jetbrains.kotlin.lexer.KotlinLexer
16+
import org.jetbrains.kotlin.lexer.KtTokens
17+
18+
object SourceTokenCounter {
19+
fun countTokens(sourcePaths: List<Path>, rootPath: Path): TokenCounts {
20+
var kotlinTokens = 0L
21+
var javaTokens = 0L
22+
for (path in sourcePaths) {
23+
val resolved = rootPath.resolve(path)
24+
val ext = path.toString().substringAfterLast('.', "")
25+
when (ext) {
26+
"kt",
27+
"kts" -> kotlinTokens += countKotlinTokens(resolved)
28+
"java" -> javaTokens += countJavaTokens(resolved)
29+
}
30+
}
31+
return TokenCounts(kotlinTokens, javaTokens)
32+
}
33+
34+
private val SKIPPED_TOKEN_TYPES =
35+
setOf(
36+
KtTokens.WHITE_SPACE,
37+
KtTokens.EOL_COMMENT,
38+
KtTokens.BLOCK_COMMENT,
39+
KtTokens.DOC_COMMENT,
40+
KtTokens.SHEBANG_COMMENT,
41+
)
42+
43+
private fun countKotlinTokens(file: Path): Long {
44+
val text = Files.readString(file)
45+
val lexer = KotlinLexer()
46+
lexer.start(text)
47+
var count = 0L
48+
while (lexer.tokenType != null) {
49+
if (lexer.tokenType !in SKIPPED_TOKEN_TYPES) {
50+
count++
51+
}
52+
lexer.advance()
53+
}
54+
return count
55+
}
56+
57+
private fun countJavaTokens(file: Path): Long {
58+
// Use StreamTokenizer for Java - lighter weight, no extra deps
59+
val text = Files.readString(file)
60+
val reader = java.io.StreamTokenizer(java.io.StringReader(text))
61+
reader.slashSlashComments(true)
62+
reader.slashStarComments(true)
63+
reader.ordinaryChar('/'.code)
64+
var count = 0L
65+
while (reader.nextToken() != java.io.StreamTokenizer.TT_EOF) {
66+
count++
67+
}
68+
return count
69+
}
70+
}
71+
72+
data class TokenCounts(val kotlinTokens: Long, val javaTokens: Long) {
73+
val totalTokens: Long
74+
get() = kotlinTokens + javaTokens
75+
}

prelude/toolchains/android/src/com/facebook/buck/jvm/kotlin/cd/analytics/logger/KotlinCDLoggerAnalytics.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ constructor(
8989
extras = buildJson(context.extras),
9090
addedAndModifiedFiles = addedAndModifiedFiles,
9191
removedFiles = removedFiles,
92+
numKotlinTokens = context.numKotlinTokens.takeIf { it > 0 },
93+
numJavaTokens = context.numJavaTokens.takeIf { it > 0 },
9294
)
9395
}
9496

prelude/toolchains/android/src/com/facebook/buck/jvm/kotlin/cd/analytics/logger/model/KotlinCDLogEntry.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,6 @@ data class KotlinCDLogEntry(
2727
val extras: String?,
2828
val addedAndModifiedFiles: Set<String>?,
2929
val removedFiles: Set<String>?,
30+
val numKotlinTokens: Long? = null,
31+
val numJavaTokens: Long? = null,
3032
)

prelude/toolchains/android/test/com/facebook/buck/jvm/kotlin/cd/analytics/logger/KotlinCDLoggerAnalyticsTest.kt

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,15 +165,42 @@ internal class KotlinCDLoggerAnalyticsTest {
165165
verify(kotlinCDLogger, times(1)).log(expectedEntry)
166166
}
167167

168+
@Test
169+
fun `when token counts are set, they are logged`() {
170+
val kotlinCDAnalytics = createFakeKotlinCDAnalytics()
171+
val expectedEntry =
172+
createExpectedKotlinCDLogEntry(numKotlinTokens = 1000L, numJavaTokens = 500L)
173+
174+
kotlinCDAnalytics.log(
175+
createKotlinCDLoggingContext(numKotlinTokens = 1000L, numJavaTokens = 500L)
176+
)
177+
178+
verify(kotlinCDLogger, times(1)).log(expectedEntry)
179+
}
180+
181+
@Test
182+
fun `when token counts are zero, they are not logged`() {
183+
val kotlinCDAnalytics = createFakeKotlinCDAnalytics()
184+
val expectedEntry = createExpectedKotlinCDLogEntry()
185+
186+
kotlinCDAnalytics.log(createKotlinCDLoggingContext(numKotlinTokens = 0L, numJavaTokens = 0L))
187+
188+
verify(kotlinCDLogger, times(1)).log(expectedEntry)
189+
}
190+
168191
private fun createKotlinCDLoggingContext(
169192
step: StepParam = StepParam.KOTLINC,
170193
languageVersion: String = DEFAULT_LANGUAGE_VERSION,
171194
kotlincMode: ModeParam? =
172195
ModeParam.Incremental(ClasspathChangesParam.NO_CHANGES, emptySet(), emptySet()),
173196
extras: Map<String, List<String>> = mapOf(),
197+
numKotlinTokens: Long = 0L,
198+
numJavaTokens: Long = 0L,
174199
): KotlinCDLoggingContext {
175200
val context = KotlinCDLoggingContext(step, LanguageVersion(languageVersion), kotlincMode)
176201
extras.forEach { (key, extras) -> extras.forEach { item -> context.addExtras(key, item) } }
202+
context.numKotlinTokens = numKotlinTokens
203+
context.numJavaTokens = numJavaTokens
177204
return context
178205
}
179206

@@ -198,6 +225,8 @@ internal class KotlinCDLoggerAnalyticsTest {
198225
extras: String? = null,
199226
modifiedFiles: Set<String> = emptySet(),
200227
removedFiles: Set<String> = emptySet(),
228+
numKotlinTokens: Long? = null,
229+
numJavaTokens: Long? = null,
201230
) =
202231
KotlinCDLogEntry(
203232
time = Instant.now(clock).epochSecond,
@@ -216,6 +245,8 @@ internal class KotlinCDLoggerAnalyticsTest {
216245
extras = extras,
217246
addedAndModifiedFiles = modifiedFiles,
218247
removedFiles = removedFiles,
248+
numKotlinTokens = numKotlinTokens,
249+
numJavaTokens = numJavaTokens,
219250
)
220251

221252
companion object TestParams {
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* This source code is dual-licensed under either the MIT license found in the
5+
* LICENSE-MIT file in the root directory of this source tree or the Apache
6+
* License, Version 2.0 found in the LICENSE-APACHE file in the root directory
7+
* of this source tree. You may select, at your option, one of the
8+
* above-listed licenses.
9+
*/
10+
11+
package com.facebook.buck.jvm.kotlin.cd.analytics
12+
13+
import java.nio.file.Path
14+
import org.junit.Assert.assertEquals
15+
import org.junit.Rule
16+
import org.junit.Test
17+
import org.junit.rules.TemporaryFolder
18+
19+
internal class SourceTokenCounterTest {
20+
21+
@get:Rule val tempDir = TemporaryFolder()
22+
23+
@Test
24+
fun `countTokens returns zero for empty source list`() {
25+
val result = SourceTokenCounter.countTokens(emptyList(), tempDir.root.toPath())
26+
assertEquals(0L, result.kotlinTokens)
27+
assertEquals(0L, result.javaTokens)
28+
assertEquals(0L, result.totalTokens)
29+
}
30+
31+
@Test
32+
fun `countTokens counts Kotlin tokens`() {
33+
val ktFile = tempDir.newFile("Test.kt")
34+
ktFile.writeText("fun main() { println(\"hello\") }")
35+
val sourcePaths = listOf(Path.of("Test.kt"))
36+
37+
val result = SourceTokenCounter.countTokens(sourcePaths, tempDir.root.toPath())
38+
39+
assert(result.kotlinTokens > 0) { "Expected positive Kotlin token count" }
40+
assertEquals(0L, result.javaTokens)
41+
assertEquals(result.kotlinTokens, result.totalTokens)
42+
}
43+
44+
@Test
45+
fun `countTokens counts Java tokens`() {
46+
val javaFile = tempDir.newFile("Test.java")
47+
javaFile.writeText("public class Test { public static void main(String[] args) {} }")
48+
val sourcePaths = listOf(Path.of("Test.java"))
49+
50+
val result = SourceTokenCounter.countTokens(sourcePaths, tempDir.root.toPath())
51+
52+
assertEquals(0L, result.kotlinTokens)
53+
assert(result.javaTokens > 0) { "Expected positive Java token count" }
54+
assertEquals(result.javaTokens, result.totalTokens)
55+
}
56+
57+
@Test
58+
fun `countTokens counts both Kotlin and Java files`() {
59+
val ktFile = tempDir.newFile("Test.kt")
60+
ktFile.writeText("val x = 1")
61+
val javaFile = tempDir.newFile("Test.java")
62+
javaFile.writeText("class Test { int x = 1; }")
63+
val sourcePaths = listOf(Path.of("Test.kt"), Path.of("Test.java"))
64+
65+
val result = SourceTokenCounter.countTokens(sourcePaths, tempDir.root.toPath())
66+
67+
assert(result.kotlinTokens > 0) { "Expected positive Kotlin token count" }
68+
assert(result.javaTokens > 0) { "Expected positive Java token count" }
69+
assertEquals(result.kotlinTokens + result.javaTokens, result.totalTokens)
70+
}
71+
72+
@Test
73+
fun `countTokens handles kts extension`() {
74+
val ktsFile = tempDir.newFile("build.gradle.kts")
75+
ktsFile.writeText("plugins { id(\"java\") }")
76+
val sourcePaths = listOf(Path.of("build.gradle.kts"))
77+
78+
val result = SourceTokenCounter.countTokens(sourcePaths, tempDir.root.toPath())
79+
80+
assert(result.kotlinTokens > 0) { "Expected positive Kotlin token count for .kts file" }
81+
}
82+
83+
@Test
84+
fun `countTokens ignores non-kotlin-non-java files`() {
85+
val txtFile = tempDir.newFile("readme.txt")
86+
txtFile.writeText("this is not source code")
87+
val sourcePaths = listOf(Path.of("readme.txt"))
88+
89+
val result = SourceTokenCounter.countTokens(sourcePaths, tempDir.root.toPath())
90+
91+
assertEquals(0L, result.kotlinTokens)
92+
assertEquals(0L, result.javaTokens)
93+
}
94+
95+
@Test
96+
fun `countTokens excludes whitespace and comments from Kotlin token count`() {
97+
val ktFile = tempDir.newFile("Commented.kt")
98+
99+
// Code without comments
100+
ktFile.writeText("val x = 1")
101+
val baseResult =
102+
SourceTokenCounter.countTokens(listOf(Path.of("Commented.kt")), tempDir.root.toPath())
103+
104+
// Same code with added comments and extra whitespace — should yield same count
105+
ktFile.writeText(
106+
"// line comment\nval x = 1 /* block comment */ /** doc comment */\n// trailing"
107+
)
108+
val commentedResult =
109+
SourceTokenCounter.countTokens(listOf(Path.of("Commented.kt")), tempDir.root.toPath())
110+
111+
assertEquals(baseResult.kotlinTokens, commentedResult.kotlinTokens)
112+
}
113+
114+
@Test
115+
fun `countTokens handles empty source file`() {
116+
val ktFile = tempDir.newFile("Empty.kt")
117+
ktFile.writeText("")
118+
val sourcePaths = listOf(Path.of("Empty.kt"))
119+
120+
val result = SourceTokenCounter.countTokens(sourcePaths, tempDir.root.toPath())
121+
122+
assertEquals(0L, result.kotlinTokens)
123+
}
124+
}

0 commit comments

Comments
 (0)