Skip to content

Commit 623f613

Browse files
Track model loading duration in RunMetrics and include it in timing reports
1 parent 83998cc commit 623f613

3 files changed

Lines changed: 11 additions & 10 deletions

File tree

src/main/java/org/beehive/gpullama3/LlamaApp.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,7 @@ private static void runSingleInstruction(Model model, Sampler sampler, Options o
3535
*/
3636
static void main(String[] args) throws IOException {
3737
Options options = Options.parseOptions(args);
38-
long loadStart = System.nanoTime();
3938
Model model = loadModel(options);
40-
RunMetrics.setLoadDuration(System.nanoTime() - loadStart);
4139
Sampler sampler = createSampler(model, options);
4240

4341
if (options.interactive()) {

src/main/java/org/beehive/gpullama3/auxiliary/RunMetrics.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,9 +120,11 @@ public static void printMetrics() {
120120
if (Boolean.parseBoolean(System.getProperty("llama.EnableTimingForTornadoVMInit", "false"))
121121
&& m.tornadoPlanCreationNs > 0) {
122122
System.err.printf(
123+
"GGUF Model Load: %.2f ms%n" +
123124
"Compilation & CodeGen: %.2f ms%n" +
124125
"Warmup: %.2f ms%n" +
125126
"Read-only weights Copy-in: %.2f ms%n",
127+
m.loadDurationNs / 1_000_000.0,
126128
m.tornadoPlanCreationNs / 1_000_000.0,
127129
m.tornadoJitNs / 1_000_000.0,
128130
m.readOnlyWeightsCopyInNs / 1_000_000.0);

src/main/java/org/beehive/gpullama3/model/loader/ModelLoader.java

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package org.beehive.gpullama3.model.loader;
22

33
import org.beehive.gpullama3.Options;
4+
import org.beehive.gpullama3.auxiliary.RunMetrics;
45
import org.beehive.gpullama3.tensor.GGMLType;
56
import org.beehive.gpullama3.tensor.GGUF;
67
import org.beehive.gpullama3.tensor.*;
@@ -91,24 +92,24 @@ public static Model loadModel(Options options) throws IOException {
9192
int contextLength = options.maxTokens();
9293
boolean useTornadovm = options.useTornadovm();
9394

94-
// initial load of metadata from gguf file
95+
long start = System.nanoTime();
9596
GGUF gguf = GGUF.loadGGUFMetadata(ggufPath);
96-
// detect model type
9797
ModelType modelType = detectModelType(gguf.getMetadata());
98-
// model type-specific load
99-
return modelType.loadModel(gguf.getFileChannel(), gguf, contextLength, useTornadovm);
98+
Model model = modelType.loadModel(gguf.getFileChannel(), gguf, contextLength, useTornadovm);
99+
RunMetrics.setLoadDuration(System.nanoTime() - start);
100+
return model;
100101
}
101102

102103
/**
103104
* For compatibility with langchain4j and quarkus.
104105
*/
105106
public static Model loadModel(Path ggufPath, int contextLength, boolean loadWeights, boolean useTornadovm) throws IOException {
106-
// initial load of metadata from gguf file
107+
long start = System.nanoTime();
107108
GGUF gguf = GGUF.loadGGUFMetadata(ggufPath);
108-
// detect model type
109109
ModelType modelType = detectModelType(gguf.getMetadata());
110-
// model type-specific load
111-
return modelType.loadModel(gguf.getFileChannel(), gguf, contextLength, useTornadovm);
110+
Model model = modelType.loadModel(gguf.getFileChannel(), gguf, contextLength, useTornadovm);
111+
RunMetrics.setLoadDuration(System.nanoTime() - start);
112+
return model;
112113
}
113114

114115
/**

0 commit comments

Comments
 (0)