Open
Description
(word_embeddings): Embedding(65024, 4544)
Traceback (most recent call last):
File "/jet/assets/recipe/mixology-huggingface-transformers-tiiuae-falcon-7b__benchmarks-develop-pyt_perf-infer_--h100-pcie-80gb-_-1_gpus-1_bs--1_dld-synthetic_seed-42_-source/benchmark.py", line 372, in <module>
CLI(benchmark)
│ └ <function benchmark at 0x7fec8b78d1b0>
└ <function CLI at 0x7fec8b6f0310>
File "/usr/local/lib/python3.10/dist-packages/jsonargparse/_cli.py", line 96, in CLI
return _run_component(components, cfg_init)
│ │ └ Namespace(cfg=Config(model_callable='transformers.AutoModelForCausalLM.from_pretrained', model_device=None, model_args=['tiiu...
│ └ <function benchmark at 0x7fec8b78d1b0>
└ <function _run_component at 0x7fec8b78c790>
File "/usr/local/lib/python3.10/dist-packages/jsonargparse/_cli.py", line 196, in _run_component
return component(**cfg)
│ └ Namespace(cfg=Config(model_callable='transformers.AutoModelForCausalLM.from_pretrained', model_device=None, model_args=['tiiu...
└ <function benchmark at 0x7fec8b78d1b0>
File "/jet/assets/recipe/mixology-huggingface-transformers-tiiuae-falcon-7b__benchmarks-develop-pyt_perf-infer_--h100-pcie-80gb-_-1_gpus-1_bs--1_dld-synthetic_seed-42_-source/benchmark.py", line 299, in benchmark
nav.optimize(
│ └ <function optimize at 0x7fec8b92ea70>
└ <module 'model_navigator' from '/usr/local/lib/python3.10/dist-packages/model_navigator/__init__.py'>
File "/usr/local/lib/python3.10/dist-packages/model_navigator/inplace/__init__.py", line 119, in optimize
module_registry.optimize()
│ └ <function ModuleRegistry.optimize at 0x7fec8ca35b40>
└ <model_navigator.inplace.registry.ModuleRegistry object at 0x7fec8ca477c0>
File "/usr/local/lib/python3.10/dist-packages/model_navigator/inplace/registry.py", line 75, in optimize
module.optimize()
│ └ <function Module.optimize at 0x7fec8b913640>
└ <Module at 0x7fec8e037c40 for FalconForCausalLM at 0x7fec8ad0cca0>
File "/usr/local/lib/python3.10/dist-packages/model_navigator/inplace/wrapper.py", line 207, in optimize
self._wrapper.optimize()
└ <Module at 0x7fec8e037c40 for FalconForCausalLM at 0x7fec8ad0cca0>
File "/usr/local/lib/python3.10/dist-packages/model_navigator/inplace/model.py", line 203, in optimize
optimize(model=self._module, dataloader=TorchDataloader(samples), **updated_config_dict)
│ │ │ │ │ └ {'sample_count': 1, 'batching': True, 'input_names': None, 'output_names': None, 'target_formats': [<Format.TORCHSCRIPT: 'tor...
│ │ │ │ └ [PosixPath('/tmp/transformers.models.falcon.modeling_falcon.FalconForCausalLM_6fuf1a6k/0.pt')]
│ │ │ └ <class 'model_navigator.inplace.utils.TorchDataloader'>
│ │ └ FalconForCausalLM(
│ │ (transformer): FalconModel(
│ │ (word_embeddings): Embedding(65024, 4544)
│ │ (h): ModuleList(
│ │ (0-...
│ └ <model_navigator.inplace.model.RecordingModule object at 0x7fec8b794c70>
└ <function optimize at 0x7fef3571ce50>
File "/usr/local/lib/python3.10/dist-packages/model_navigator/torch/__init__.py", line 152, in optimize
package = optimize_pipeline(
└ <function optimize_pipeline at 0x7fec8ca0e4d0>
File "/usr/local/lib/python3.10/dist-packages/model_navigator/pipelines/wrappers/optimize.py", line 73, in optimize_pipeline
context = pipeline_manager.run(
│ └ <function PipelineManager.run at 0x7fec8ca0f0a0>
└ <model_navigator.pipelines.pipeline_manager.PipelineManager object at 0x7fec88467820>
File "/usr/local/lib/python3.10/dist-packages/model_navigator/pipelines/pipeline_manager.py", line 88, in run
pipeline.run(workspace=workspace, config=config, context=context)
│ │ │ │ └ <model_navigator.pipelines.pipeline_context.PipelineContext object at 0x7fec88466b90>
│ │ │ └ CommonConfig(framework=<Framework.TORCH: 'torch'>, model=FalconForCausalLM(
│ │ │ (transformer): FalconModel(
│ │ │ (word_embedding...
│ │ └ <model_navigator.core.workspace.Workspace object at 0x7fec8add2e30>
│ └ <function Pipeline.run at 0x7fec8cdd0280>
└ Correctness
File "/usr/local/lib/python3.10/dist-packages/model_navigator/pipelines/pipeline.py", line 68, in run
command_output = self._execute_unit(
│ └ <function Pipeline._execute_unit at 0x7fec8cdd05e0>
└ Correctness
File "/usr/local/lib/python3.10/dist-packages/model_navigator/pipelines/pipeline.py", line 121, in _execute_unit
command_output = execution_unit.command().run(
│ └ Correctness
└ Cmd:Correctness, Config:torch, Runner:TorchTensorRTCompile
File "/usr/local/lib/python3.10/dist-packages/model_navigator/commands/base.py", line 127, in run
output = self._run(*args, **_filter_dict_for_func(kwargs, self._run))
│ │ │ │ │ │ └ <function Correctness._run at 0x7fec8cec0820>
│ │ │ │ │ └ <model_navigator.commands.correctness.correctness.Correctness object at 0x7fd6fc2c9870>
│ │ │ │ └ {'framework': <Framework.TORCH: 'torch'>, 'model': FalconForCausalLM(
│ │ │ │ (transformer): FalconModel(
│ │ │ │ (word_embeddings): Em...
│ │ │ └ <function Command.run.<locals>._filter_dict_for_func at 0x7fec3efe4ca0>
│ │ └ ()
│ └ <function Correctness._run at 0x7fec8cec0820>
└ <model_navigator.commands.correctness.correctness.Correctness object at 0x7fd6fc2c9870>
File "/usr/local/lib/python3.10/dist-packages/model_navigator/commands/correctness/correctness.py", line 150, in _run
context.execute_python_script(
│ └ <function ExecutionContext.execute_python_script at 0x7fec8cec0160>
└ <model_navigator.commands.execution_context.ExecutionContext object at 0x7fd6fc2c9ea0>
File "/usr/local/lib/python3.10/dist-packages/model_navigator/commands/execution_context.py", line 142, in execute_python_script
self._execute_function(func, unwrapped_args, allow_failure, cmd)
│ │ │ │ │ └ ['/bin/bash', 'torch/reproduce_correctness-torchtensorrtcompilerunner.sh']
│ │ │ │ └ False
│ │ │ └ ['--navigator_workspace', '/root/.cache/model_navigator/transformers.models.falcon.modeling_falcon.FalconForCausalLM/0', '--b...
│ │ └ <function correctness at 0x7fddd16e9000>
│ └ <function ExecutionContext._execute_function at 0x7fec8cec01f0>
└ <model_navigator.commands.execution_context.ExecutionContext object at 0x7fd6fc2c9ea0>
File "/usr/local/lib/python3.10/dist-packages/model_navigator/commands/execution_context.py", line 156, in _execute_function
fire.Fire(func, unwrapped_args)
│ │ │ └ ['--navigator_workspace', '/root/.cache/model_navigator/transformers.models.falcon.modeling_falcon.FalconForCausalLM/0', '--b...
│ │ └ <function correctness at 0x7fddd16e9000>
│ └ <function Fire at 0x7fec8ce0cd30>
└ <module 'fire' from '/usr/local/lib/python3.10/dist-packages/fire/__init__.py'>
File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 141, in Fire
component_trace = _Fire(component, args, parsed_flag_args, context, name)
│ │ │ │ │ └ 'benchmark.py'
│ │ │ │ └ {}
│ │ │ └ Namespace(verbose=False, interactive=False, separator='-', completion=None, help=False, trace=False)
│ │ └ ['--navigator_workspace', '/root/.cache/model_navigator/transformers.models.falcon.modeling_falcon.FalconForCausalLM/0', '--b...
│ └ <function correctness at 0x7fddd16e9000>
└ <function _Fire at 0x7fec8ce74f70>
File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 466, in _Fire
component, remaining_args = _CallAndUpdateTrace(
│ └ <function _CallAndUpdateTrace at 0x7fec8ce75090>
└ <function correctness at 0x7fddd16e9000>
File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 681, in _CallAndUpdateTrace
component = fn(*varargs, **kwargs)
│ │ └ {}
│ └ [0, '/tmp/tmp8bzfvp1_', 'TorchTensorRTCompile', {'metadata': [{'name': 'input__1', 'shape': (-1, 16), 'dtype': 'int64'}, {'na...
└ <function correctness at 0x7fddd16e9000>
File "/usr/local/lib/python3.10/dist-packages/model_navigator/commands/correctness/correctness_script.py", line 93, in correctness
comp_output = runner.infer(sample)
│ │ └ {'input__1': array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'input__0': array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
│ └ <function NavigatorRunner.infer at 0x7fef35b1f250>
└ <model_navigator_custom_runners.torch_trt_compile.runner.TorchTensorRTCompileRunner object at 0x7fec28ef5e70>
File "/usr/local/lib/python3.10/dist-packages/model_navigator/runners/base.py", line 325, in infer
output = self.infer_impl(feed_dict, *args, **kwargs)
│ │ │ │ └ {}
│ │ │ └ ()
│ │ └ {'input__1': array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'input__0': array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
│ └ <function _BaseTorchRunner.infer_impl at 0x7fef3571da20>
└ <model_navigator_custom_runners.torch_trt_compile.runner.TorchTensorRTCompileRunner object at 0x7fec28ef5e70>
File "/usr/local/lib/python3.10/dist-packages/model_navigator/runners/torch.py", line 94, in infer_impl
outputs = self._infer(feed_dict=feed_dict)
│ │ └ {'input__1': array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'input__0': array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
│ └ <bound method _BaseTorchRunner._infer_v1 of <model_navigator_custom_runners.torch_trt_compile.runner.TorchTensorRTCompileRunn...
└ <model_navigator_custom_runners.torch_trt_compile.runner.TorchTensorRTCompileRunner object at 0x7fec28ef5e70>
File "/usr/local/lib/python3.10/dist-packages/model_navigator/runners/torch.py", line 135, in _infer_v1
outputs = self._loaded_model(*args, **kwargs)
│ │ │ └ {'attention_mask': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], device='cuda:0'), 'input_ids': tensor([[0, 0, 0...
│ │ └ ()
│ └ OptimizedModule(
│ (_orig_mod): FalconForCausalLM(
│ (transformer): FalconModel(
│ (word_embeddings): Embedding(65024, ...
└ <model_navigator_custom_runners.torch_trt_compile.runner.TorchTensorRTCompileRunner object at 0x7fec28ef5e70>
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
│ │ │ └ {'attention_mask': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], device='cuda:0'), 'input_ids': tensor([[0, 0, 0...
│ │ └ ()
│ └ <function Module._call_impl at 0x7fefc3611c60>
└ OptimizedModule(
(_orig_mod): FalconForCausalLM(
(transformer): FalconModel(
(word_embeddings): Embedding(65024, ...
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
│ │ └ {'attention_mask': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], device='cuda:0'), 'input_ids': tensor([[0, 0, 0...
│ └ ()
└ <function Module._wrapped_call_impl at 0x7fdd7f4e0280>
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/eval_frame.py", line 464, in _fn
return fn(*args, **kwargs)
│ │ └ {'attention_mask': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], device='cuda:0'), 'input_ids': tensor([[0, 0, 0...
│ └ ()
└ <bound method Module._wrapped_call_impl of FalconForCausalLM(
(transformer): FalconModel(
(word_embeddings): Embedding(...
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
│ │ │ └ {'attention_mask': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], device='cuda:0'), 'input_ids': tensor([[0, 0, 0...
│ │ └ ()
│ └ <function Module._call_impl at 0x7fefc3611c60>
└ FalconForCausalLM(
(transformer): FalconModel(
(word_embeddings): Embedding(65024, 4544)
(h): ModuleList(
(0-...
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
│ │ └ {'attention_mask': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], device='cuda:0'), 'input_ids': tensor([[0, 0, 0...
│ └ ()
└ <bound method FalconForCausalLM.forward of FalconForCausalLM(
(transformer): FalconModel(
(word_embeddings): Embedding(...
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/convert_frame.py", line 1224, in __call__
return self._torchdynamo_orig_callable(
│ └ <torch._dynamo.convert_frame.ConvertFrame object at 0x7fec28f639d0>
└ <torch._dynamo.convert_frame.CatchErrorsWrapper object at 0x7fec28f60370>
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/convert_frame.py", line 1025, in __call__
result = self._inner_convert(
│ └ <torch._dynamo.convert_frame.ConvertFrameAssert object at 0x7fec28f61060>
└ <torch._dynamo.convert_frame.ConvertFrame object at 0x7fec28f639d0>
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/convert_frame.py", line 514, in __call__
return _compile(
└ <function _compile at 0x7fef25d1e560>
File "/usr/lib/python3.10/contextlib.py", line 79, in inner
return func(*args, **kwds)
│ │ └ {'frame_state': {'_id': 0, "L['input_ids']": FrameStateSizeEntry(scalar=None, size=[1, 16], stride=[16, 1]), "L['position_ids...
│ └ (<code object forward at 0x7fec8e2a6970, file "/usr/local/lib/python3.10/dist-packages/transformers/models/falcon/modeling_fa...
└ <function _compile at 0x7fef25d1e200>
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/convert_frame.py", line 896, in _compile
guarded_code = compile_inner(code, one_graph, hooks, transform)
│ │ │ │ └ <function _compile.<locals>.transform at 0x7febe3aba7a0>
│ │ │ └ Hooks(guard_export_fn=None, guard_fail_fn=None)
│ │ └ False
│ └ <code object forward at 0x7fec8e2a6970, file "/usr/local/lib/python3.10/dist-packages/transformers/models/falcon/modeling_fal...
└ <function _compile.<locals>.compile_inner at 0x7febe3aba320>
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/convert_frame.py", line 662, in compile_inner
return _compile_inner(code, one_graph, hooks, transform)
│ │ │ │ └ <function _compile.<locals>.transform at 0x7febe3aba7a0>
│ │ │ └ Hooks(guard_export_fn=None, guard_fail_fn=None)
│ │ └ False
│ └ <code object forward at 0x7fec8e2a6970, file "/usr/local/lib/python3.10/dist-packages/transformers/models/falcon/modeling_fal...
└ <function _compile.<locals>._compile_inner at 0x7febe3ab8940>
File "/usr/local/lib/python3.10/dist-packages/torch/_utils_internal.py", line 85, in wrapper_function
return StrobelightCompileTimeProfiler.profile_compile_time(
│ └ <classmethod(<function StrobelightCompileTimeProfiler.profile_compile_time at 0x7ff029dcd360>)>
└ <class 'torch._strobelight.compile_time_profiler.StrobelightCompileTimeProfiler'>
File "/usr/local/lib/python3.10/dist-packages/torch/_strobelight/compile_time_profiler.py", line 129, in profile_compile_time
return func(*args, **kwargs)
│ │ └ {}
│ └ (<code object forward at 0x7fec8e2a6970, file "/usr/local/lib/python3.10/dist-packages/transformers/models/falcon/modeling_fa...
└ <function _compile.<locals>._compile_inner at 0x7febe3ab8a60>
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/convert_frame.py", line 697, in _compile_inner
out_code = transform_code_object(code, transform)
│ │ └ <function _compile.<locals>.transform at 0x7febe3aba7a0>
│ └ <code object forward at 0x7fec8e2a6970, file "/usr/local/lib/python3.10/dist-packages/transformers/models/falcon/modeling_fal...
└ <function transform_code_object at 0x7fef2946a200>
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/bytecode_transformation.py", line 1322, in transform_code_object
transformations(instructions, code_options)
│ │ └ {'co_argcount': 12, 'co_posonlyargcount': 0, 'co_kwonlyargcount': 0, 'co_nlocals': 23, 'co_stacksize': 12, 'co_flags': 67, 'c...
│ └ [Instruction(opcode=124, opname='LOAD_FAST', arg=11, argval='return_dict', offset=0, starts_line=1169, is_jump_target=False, ...
└ <function _compile.<locals>.transform at 0x7febe3aba7a0>
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/convert_frame.py", line 209, in _fn
return fn(*args, **kwargs)
│ │ └ {}
│ └ ([Instruction(opcode=124, opname='LOAD_FAST', arg=11, argval='return_dict', offset=0, starts_line=1169, is_jump_target=False,...
└ <function _compile.<locals>.transform at 0x7febe3aba710>
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/convert_frame.py", line 631, in transform
tracer.run()
│ └ <function InstructionTranslator.run at 0x7fef25d3a950>
└ <torch._dynamo.symbolic_convert.InstructionTranslator object at 0x7fec28edd8a0>
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/symbolic_convert.py", line 2722, in run
super().run()
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/symbolic_convert.py", line 957, in run
while self.step():
│ └ <function InstructionTranslatorBase.step at 0x7fef25d25120>
└ <torch._dynamo.symbolic_convert.InstructionTranslator object at 0x7fec28edd8a0>
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/symbolic_convert.py", line 869, in step
self.dispatch_table[inst.opcode](self, inst)
│ │ │ │ │ └ Instruction(opcode=83, opname='RETURN_VALUE', arg=None, argval=None, offset=242, starts_line=1203, is_jump_target=False, posi...
│ │ │ │ └ <torch._dynamo.symbolic_convert.InstructionTranslator object at 0x7fec28edd8a0>
│ │ │ └ 83
│ │ └ Instruction(opcode=83, opname='RETURN_VALUE', arg=None, argval=None, offset=242, starts_line=1203, is_jump_target=False, posi...
│ └ [None, <function InstructionTranslatorBase.POP_TOP at 0x7fef25d1caf0>, <function InstructionTranslatorBase.ROT_TWO at 0x7fef2...
└ <torch._dynamo.symbolic_convert.InstructionTranslator object at 0x7fec28edd8a0>
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/symbolic_convert.py", line 2913, in RETURN_VALUE
self._return(inst)
│ │ └ Instruction(opcode=83, opname='RETURN_VALUE', arg=None, argval=None, offset=242, starts_line=1203, is_jump_target=False, posi...
│ └ <function InstructionTranslator._return at 0x7fef25d3ac20>
└ <torch._dynamo.symbolic_convert.InstructionTranslator object at 0x7fec28edd8a0>
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/symbolic_convert.py", line 2898, in _return
self.output.compile_subgraph(
│ │ └ <function OutputGraph.compile_subgraph at 0x7fef25d12560>
│ └ <torch._dynamo.output_graph.OutputGraph object at 0x7febe26ebbe0>
└ <torch._dynamo.symbolic_convert.InstructionTranslator object at 0x7fec28edd8a0>
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/output_graph.py", line 1133, in compile_subgraph
self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
│ │ │ │ │ └ FakeRootModule(...)
│ │ │ │ └ <function PyCodegen.graph_output_vars at 0x7fef25e70310>
│ │ │ └ <torch._dynamo.codegen.PyCodegen object at 0x7fec28efa7a0>
│ │ └ <torch._dynamo.symbolic_convert.InstructionTranslator object at 0x7fec28edd8a0>
│ └ <function OutputGraph.compile_and_call_fx_graph at 0x7fef25d12a70>
└ <torch._dynamo.output_graph.OutputGraph object at 0x7febe26ebbe0>
File "/usr/lib/python3.10/contextlib.py", line 79, in inner
return func(*args, **kwds)
│ │ └ {}
│ └ (<torch._dynamo.output_graph.OutputGraph object at 0x7febe26ebbe0>, <torch._dynamo.symbolic_convert.InstructionTranslator obj...
└ <function OutputGraph.compile_and_call_fx_graph at 0x7fef25d129e0>
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/output_graph.py", line 1360, in compile_and_call_fx_graph
compiled_fn = self.call_user_compiler(gm)
│ │ └ GraphModule()
│ └ <function OutputGraph.call_user_compiler at 0x7fef25d12c20>
└ <torch._dynamo.output_graph.OutputGraph object at 0x7febe26ebbe0>
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/output_graph.py", line 1407, in call_user_compiler
return self._call_user_compiler(gm)
│ │ └ GraphModule()
│ └ <function OutputGraph._call_user_compiler at 0x7fef25d12cb0>
└ <torch._dynamo.output_graph.OutputGraph object at 0x7febe26ebbe0>
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/output_graph.py", line 1437, in _call_user_compiler
compiled_fn = compiler_fn(gm, self.example_inputs())
│ │ │ └ <function OutputGraph.example_inputs at 0x7fef25d12d40>
│ │ └ <torch._dynamo.output_graph.OutputGraph object at 0x7febe26ebbe0>
│ └ GraphModule()
└ <torch._dynamo.repro.after_dynamo.WrapBackendDebug object at 0x7fec28f608b0>
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/repro/after_dynamo.py", line 129, in __call__
compiled_gm = compiler_fn(gm, example_inputs)
│ │ └ [tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], device='cuda:0'), Parameter containing:
│ │ tensor([[-0.0179, 0.0201...
│ └ GraphModule()
└ functools.partial(<torch._TorchCompileWrapper object at 0x7fec28f60550>)
File "/usr/local/lib/python3.10/dist-packages/torch/__init__.py", line 2276, in __call__
return self.compiler_fn(model_, inputs_, **self.kwargs)
│ │ │ │ │ └ {'options': {'truncate_long_and_double': True, 'enabled_precisions': {torch.float32, torch.float16}}}
│ │ │ │ └ <torch._TorchCompileWrapper object at 0x7fec28f60550>
│ │ │ └ [tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], device='cuda:0'), Parameter containing:
│ │ │ tensor([[-0.0179, 0.0201...
│ │ └ GraphModule()
│ └ <function torch_tensorrt_backend at 0x7fec8e0684c0>
└ <torch._TorchCompileWrapper object at 0x7fec28f60550>
File "/usr/local/lib/python3.10/dist-packages/torch_tensorrt/dynamo/backend/backends.py", line 44, in torch_tensorrt_backend
return DEFAULT_BACKEND(gm, sample_inputs, **kwargs)
│ │ │ └ {'options': {'truncate_long_and_double': True, 'enabled_precisions': {torch.float32, torch.float16}}}
│ │ └ [tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], device='cuda:0'), Parameter containing:
│ │ tensor([[-0.0179, 0.0201...
│ └ GraphModule()
└ <function aot_torch_tensorrt_aten_backend at 0x7fec8e068550>
File "/usr/local/lib/python3.10/dist-packages/torch_tensorrt/dynamo/backend/backends.py", line 52, in aot_torch_tensorrt_aten_backend
return _pretraced_backend(gm, sample_inputs, settings)
│ │ │ └ CompilationSettings(enabled_precisions={<dtype.f16: 6>, <dtype.f32: 7>}, debug=False, workspace_size=0, min_block_size=5, tor...
│ │ └ [tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], device='cuda:0'), Parameter containing:
│ │ tensor([[-0.0179, 0.0201...
│ └ GraphModule()
└ <function _pretraced_backend at 0x7fec8e0685e0>
> File "/usr/local/lib/python3.10/dist-packages/torch_tensorrt/dynamo/backend/backends.py", line 108, in _pretraced_backend
trt_compiled = compile_module(
└ <function compile_module at 0x7fec8e0023b0>
File "/usr/local/lib/python3.10/dist-packages/torch_tensorrt/dynamo/_compiler.py", line 440, in compile_module
*get_torch_inputs(submodule_inputs, to_torch_device(settings.device))
│ │ │ │ └ Device(type=DeviceType.GPU, gpu_id=0)
│ │ │ └ CompilationSettings(enabled_precisions={<dtype.f16: 6>, <dtype.f32: 7>}, debug=False, workspace_size=0, min_block_size=5, tor...
│ │ └ <function to_torch_device at 0x7fece7286440>
│ └ [Input(shape=(65024, 4544), dtype=dtype.f32, format=memory_format.linear, domain=[0.0, 2.0)), Input(shape=(1, 16), dtype=dtyp...
└ <function get_torch_inputs at 0x7fece7[2861](https://gitlab-master.nvidia.com/dl/jet/ci/-/jobs/111305421#L2861)70>
File "/usr/local/lib/python3.10/dist-packages/torch_tensorrt/dynamo/utils.py", line 166, in get_torch_inputs
return [
File "/usr/local/lib/python3.10/dist-packages/torch_tensorrt/dynamo/utils.py", line 167, in <listcomp>
input.torch_tensor.to(device) if isinstance(input, Input) else input
│ │ │ │ │ │ └ Input(shape=(65024, 4544), dtype=dtype.f32, format=memory_format.linear, domain=[0.0, 2.0))
│ │ │ │ │ └ <class 'torch_tensorrt._Input.Input'>
│ │ │ │ └ Input(shape=(65024, 4544), dtype=dtype.f32, format=memory_format.linear, domain=[0.0, 2.0))
│ │ │ └ device(type='cuda', index=0)
│ │ └ <method 'to' of 'torch._C.TensorBase' objects>
│ └ tensor([[0.7543, 0.5797, 0.8699, ..., 0.8869, 0.0986, 0.1086],
│ [0.2815, 0.0157, 0.6210, ..., 0.2740, 0.1452, 0.2492...
└ Input(shape=(65024, 4544), dtype=dtype.f32, format=memory_format.linear, domain=[0.0, 2.0))
File "/usr/local/lib/python3.10/dist-packages/torch/utils/_stats.py", line 21, in wrapper
return fn(*args, **kwargs)
│ │ └ {}
│ └ (<torch._subclasses.fake_tensor.FakeTensorMode object at 0x7fd6fc747d00>, <OpOverload(op='aten._to_copy', overload='default')...
└ <function FakeTensorMode.__torch_dispatch__ at 0x7fef5056e3b0>
File "/usr/local/lib/python3.10/dist-packages/torch/_subclasses/fake_tensor.py", line 1194, in __torch_dispatch__
return self.dispatch(func, types, args, kwargs)
│ │ │ │ │ └ {'dtype': torch.float32, 'layout': torch.strided, 'device': device(type='cuda', index=0)}
│ │ │ │ └ (tensor([[0.7543, 0.5797, 0.8699, ..., 0.8869, 0.0986, 0.1086],
│ │ │ │ [0.2815, 0.0157, 0.6210, ..., 0.2740, 0.1452, 0.249...
│ │ │ └ ()
│ │ └ <OpOverload(op='aten._to_copy', overload='default')>
│ └ <function FakeTensorMode.dispatch at 0x7fef5056eb90>
└ <torch._subclasses.fake_tensor.FakeTensorMode object at 0x7fd6fc747d00>
File "/usr/local/lib/python3.10/dist-packages/torch/_subclasses/fake_tensor.py", line 1658, in dispatch
return self._cached_dispatch_impl(func, types, args, kwargs)
│ │ │ │ │ └ {'dtype': torch.float32, 'layout': torch.strided, 'device': device(type='cuda', index=0)}
│ │ │ │ └ (tensor([[0.7543, 0.5797, 0.8699, ..., 0.8869, 0.0986, 0.1086],
│ │ │ │ [0.2815, 0.0157, 0.6210, ..., 0.2740, 0.1452, 0.249...
│ │ │ └ ()
│ │ └ <OpOverload(op='aten._to_copy', overload='default')>
│ └ <function FakeTensorMode._cached_dispatch_impl at 0x7fef5056e7a0>
└ <torch._subclasses.fake_tensor.FakeTensorMode object at 0x7fd6fc747d00>
File "/usr/local/lib/python3.10/dist-packages/torch/_subclasses/fake_tensor.py", line 1302, in _cached_dispatch_impl
output = self._dispatch_impl(func, types, args, kwargs)
│ │ │ │ │ └ {'dtype': torch.float32, 'layout': torch.strided, 'device': device(type='cuda', index=0)}
│ │ │ │ └ (tensor([[0.7543, 0.5797, 0.8699, ..., 0.8869, 0.0986, 0.1086],
│ │ │ │ [0.2815, 0.0157, 0.6210, ..., 0.2740, 0.1452, 0.249...
│ │ │ └ ()
│ │ └ <OpOverload(op='aten._to_copy', overload='default')>
│ └ <function FakeTensorMode._dispatch_impl at 0x7fef5056ec20>
└ <torch._subclasses.fake_tensor.FakeTensorMode object at 0x7fd6fc747d00>
File "/usr/local/lib/python3.10/dist-packages/torch/_subclasses/fake_tensor.py", line 1700, in _dispatch_impl
out = func(*const_args, **const_kwargs)
│ │ └ {'dtype': torch.float32, 'layout': torch.strided, 'device': device(type='cuda', index=0)}
│ └ (tensor([[0.7543, 0.5797, 0.8699, ..., 0.8869, 0.0986, 0.1086],
│ [0.2815, 0.0157, 0.6210, ..., 0.2740, 0.1452, 0.249...
└ <OpOverload(op='aten._to_copy', overload='default')>
File "/usr/local/lib/python3.10/dist-packages/torch/_ops.py", line 720, in __call__
return self._op(*args, **kwargs)
│ │ │ └ {'dtype': torch.float32, 'layout': torch.strided, 'device': device(type='cuda', index=0)}
│ │ └ (tensor([[0.7543, 0.5797, 0.8699, ..., 0.8869, 0.0986, 0.1086],
│ │ [0.2815, 0.0157, 0.6210, ..., 0.2740, 0.1452, 0.249...
│ └ <built-in method of PyCapsule object at 0x7fef4bf27060>
└ <OpOverload(op='aten._to_copy', overload='default')>
torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.10 GiB. GPU 0 has a total capacity of 79.11 GiB of which 654.94 MiB is free. Process [2868](https://gitlab-master.nvidia.com/dl/jet/ci/-/jobs/111305421#L2868)791 has 78.46 GiB memory in use. Of the allocated memory 77.60 GiB is allocated by PyTorch, and 216.33 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
2024-09-17 20:29:33.113 | ERROR | MainProcess | /usr/local/lib/python3.10/dist-packages/model_navigator/commands/correctness/correctness_script.py:106 - Comparison output output__0 contains NaN
2024-09-17 20:29:34.050 | INFO | MainProcess | /usr/local/lib/python3.10/dist-packages/model_navigator/pipelines/pipeline.py:128 - 1
2024-09-17 20:29:34.051 | WARNING | MainProcess | /usr/local/lib/python3.10/dist-packages/model_navigator/pipelines/pipeline.py:131 - Command finished with ModelNavigatorUserInputError. The error is considered as external error. Usually caused by incompatibilities between the model and the target formats and/or runtimes. Please review the command output.
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/model_navigator/commands/correctness/correctness.py", line 150, in _run
context.execute_python_script(
File "/usr/local/lib/python3.10/dist-packages/model_navigator/commands/execution_context.py", line 142, in execute_python_script
self._execute_function(func, unwrapped_args, allow_failure, cmd)
File "/usr/local/lib/python3.10/dist-packages/model_navigator/commands/execution_context.py", line 156, in _execute_function
fire.Fire(func, unwrapped_args)
File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 141, in Fire
component_trace = _Fire(component, args, parsed_flag_args, context, name)
File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 466, in _Fire
component, remaining_args = _CallAndUpdateTrace(
File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 681, in _CallAndUpdateTrace
component = fn(*varargs, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/model_navigator/commands/correctness/correctness_script.py", line 107, in correctness
sys.exit(1)
SystemExit: 1
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/model_navigator/pipelines/pipeline.py", line 121, in _execute_unit
command_output = execution_unit.command().run(
File "/usr/local/lib/python3.10/dist-packages/model_navigator/commands/base.py", line 127, in run
output = self._run(*args, **_filter_dict_for_func(kwargs, self._run))
File "/usr/local/lib/python3.10/dist-packages/model_navigator/commands/correctness/correctness.py", line 125, in _run
with ExecutionContext(
File "/usr/local/lib/python3.10/dist-packages/model_navigator/commands/execution_context.py", line 102, in __exit__
raise ModelNavigatorUserInputError(message=message)
model_navigator.exceptions.ModelNavigatorUserInputError: 1
File "/usr/local/lib/python3.10/dist-packages/model_navigator/commands/correctness/correctness.py", line 150, in _run
context.execute_python_script(
File "/usr/local/lib/python3.10/dist-packages/model_navigator/commands/execution_context.py", line 142, in execute_python_script
self._execute_function(func, unwrapped_args, allow_failure, cmd)
File "/usr/local/lib/python3.10/dist-packages/model_navigator/commands/execution_context.py", line 156, in _execute_function
fire.Fire(func, unwrapped_args)
File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 141, in Fire
component_trace = _Fire(component, args, parsed_flag_args, context, name)
File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 466, in _Fire
component, remaining_args = _CallAndUpdateTrace(
File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 681, in _CallAndUpdateTrace
component = fn(*varargs, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/model_navigator/commands/correctness/correctness_script.py", line 107, in correctness
sys.exit(1)
transformers.models.falcon.modeling_falcon.FalconForCausalLM: Validating model
torch on TorchTensorRTCompile backend FAIL
2024-09-17 20:29:34.052 | INFO | MainProcess | /usr/local/lib/python3.10/dist-packages/model_navigator/pipelines/pipeline.py:148 - Execution time: 43.40[s]
Metadata
Metadata
Assignees
Labels
No labels