diff --git a/ci/builder/requirements.txt b/ci/builder/requirements.txt index 427966f1073da..4dc83d0d84228 100644 --- a/ci/builder/requirements.txt +++ b/ci/builder/requirements.txt @@ -36,6 +36,7 @@ pdoc==15.0.3 # We can revert back to standard pg8000 versions once https://github.com/tlocke/pg8000/pull/161 is released pg8000@git+https://github.com/tlocke/pg8000@46c00021ade1d19466b07ed30392386c5f0a6b8e prettytable==3.16.0 +protobuf==5.29.3 psutil==7.0.0 # psycopg 3.2.8 causes Scalability test failures psycopg==3.2.7 diff --git a/ci/deploy_mz-debug/README.md b/ci/deploy_mz-debug/README.md index 17b045b7735a5..b653bbc84536d 100644 --- a/ci/deploy_mz-debug/README.md +++ b/ci/deploy_mz-debug/README.md @@ -22,6 +22,7 @@ You can manually deploy by following steps 1-2 above and running the following c ```bash # Set a tag version. export BUILDKITE_TAG=mz-debug-vx.y.z +export AWS_PROFILE=... # macOS bin/pyactivate -m ci.deploy_mz-debug.macos @@ -29,6 +30,7 @@ bin/pyactivate -m ci.deploy_mz-debug.macos # Linux bin/pyactivate -m ci.deploy_mz-debug.linux ``` +where AWS_PROFILE is the profile with access to the materialize-binaries S3 bucket in the Materialize Core account. **Important Notes:** - When running on macOS, modify `linux.py` to use `target` instead of `target-xcompile` diff --git a/ci/test/lint-main/checks/check-copyright.sh b/ci/test/lint-main/checks/check-copyright.sh index 51ab97372dad4..6402e573994c4 100755 --- a/ci/test/lint-main/checks/check-copyright.sh +++ b/ci/test/lint-main/checks/check-copyright.sh @@ -70,6 +70,7 @@ copyright_files=$(grep -vE \ -e '^src/repr/src/adt/snapshots/.*' \ -e '^src/environmentd/tests/testdata/timezones/.*\.csv' \ -e '^test/fivetran-destination/.*\/00-README$' \ + -e '^misc/python/materialize/visualize_pprof_profile/(profile\.proto|profile_pb2\.py)' \ <<< "$files" ) diff --git a/misc/python/materialize/visualize_pprof_profile/README.md b/misc/python/materialize/visualize_pprof_profile/README.md new file mode 100644 index 0000000000000..1609d936bec5e --- /dev/null +++ b/misc/python/materialize/visualize_pprof_profile/README.md @@ -0,0 +1,53 @@ +# Materialize pprof Profile Visualization Tool + +This tool allows you to symbolize and visualize pprof profiles offline using debug symbols stored in Materialize's S3 bucket. + +## Prerequisites + +- Docker installed and running on your system +- AWS credentials with access to the materialize-debuginfo S3 bucket in the Materialize Core account + +## Setup + +1. Set up your AWS credentials: +```bash +export AWS_PROFILE= +``` +Where `` is your AWS profile with access to the materialize-debuginfo S3 bucket in the Materialize Core account. + +## Usage + +```bash +python3 visualize_pprof_profile.py [--port PORT] +``` + +### Arguments + +- ``: Path to your pprof.gz profile file (required) +- `--port`: Port number to run the pprof web UI (optional, defaults to 8080) + +## How It Works + +1. The tool reads your pprof profile and extracts the build ID +2. It automatically fetches the corresponding debug symbols from S3 +3. Creates a Docker container with the necessary tools (pprof, graphviz) +4. Starts a web UI where you can analyze the profile + +## Important Notes + +- The web UI will be available at `http://localhost:` (default: http://localhost:8080) +- Initial symbolization might take a few moments - wait until you see "Serving web UI on http://localhost:8080" message +- The Docker container continues running even after you quit the program +- The profile.proto file is sourced from: https://raw.githubusercontent.com/google/pprof/main/proto/profile.proto + +## Example + +```bash +# Set up AWS credentials +export AWS_PROFILE=mz-cloud-production-engineering-on-call + +# Run the visualization tool +python3 visualize_pprof_profile.py /path/to/your/profile.pprof.gz +``` + +After running the command, open your web browser and navigate to http://localhost:8080 to view the profile visualization. diff --git a/misc/python/materialize/visualize_pprof_profile/profile.proto b/misc/python/materialize/visualize_pprof_profile/profile.proto new file mode 100644 index 0000000000000..9cb816e663889 --- /dev/null +++ b/misc/python/materialize/visualize_pprof_profile/profile.proto @@ -0,0 +1,233 @@ +// Copyright 2016 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Profile is a common stacktrace profile format. +// +// Measurements represented with this format should follow the +// following conventions: +// +// - Consumers should treat unset optional fields as if they had been +// set with their default value. +// +// - When possible, measurements should be stored in "unsampled" form +// that is most useful to humans. There should be enough +// information present to determine the original sampled values. +// +// - On-disk, the serialized proto must be gzip-compressed. +// +// - The profile is represented as a set of samples, where each sample +// references a sequence of locations, and where each location belongs +// to a mapping. +// - There is a N->1 relationship from sample.location_id entries to +// locations. For every sample.location_id entry there must be a +// unique Location with that id. +// - There is an optional N->1 relationship from locations to +// mappings. For every nonzero Location.mapping_id there must be a +// unique Mapping with that id. + +syntax = "proto3"; + +package perftools.profiles; + +option java_package = "com.google.perftools.profiles"; +option java_outer_classname = "ProfileProto"; + +message Profile { + // A description of the samples associated with each Sample.value. + // For a cpu profile this might be: + // [["cpu","nanoseconds"]] or [["wall","seconds"]] or [["syscall","count"]] + // For a heap profile, this might be: + // [["allocations","count"], ["space","bytes"]], + // If one of the values represents the number of events represented + // by the sample, by convention it should be at index 0 and use + // sample_type.unit == "count". + repeated ValueType sample_type = 1; + // The set of samples recorded in this profile. + repeated Sample sample = 2; + // Mapping from address ranges to the image/binary/library mapped + // into that address range. mapping[0] will be the main binary. + repeated Mapping mapping = 3; + // Locations referenced by samples. + repeated Location location = 4; + // Functions referenced by locations. + repeated Function function = 5; + // A common table for strings referenced by various messages. + // string_table[0] must always be "". + repeated string string_table = 6; + // frames with Function.function_name fully matching the following + // regexp will be dropped from the samples, along with their successors. + int64 drop_frames = 7; // Index into string table. + // frames with Function.function_name fully matching the following + // regexp will be kept, even if it matches drop_frames. + int64 keep_frames = 8; // Index into string table. + + // The following fields are informational, do not affect + // interpretation of results. + + // Time of collection (UTC) represented as nanoseconds past the epoch. + int64 time_nanos = 9; + // Duration of the profile, if a duration makes sense. + int64 duration_nanos = 10; + // The kind of events between sampled occurrences. + // e.g [ "cpu","cycles" ] or [ "heap","bytes" ] + ValueType period_type = 11; + // The number of events between sampled occurrences. + int64 period = 12; + // Free-form text associated with the profile. The text is displayed as is + // to the user by the tools that read profiles (e.g. by pprof). This field + // should not be used to store any machine-readable information, it is only + // for human-friendly content. The profile must stay functional if this field + // is cleaned. + repeated int64 comment = 13; // Indices into string table. + // Index into the string table of the type of the preferred sample + // value. If unset, clients should default to the last sample value. + int64 default_sample_type = 14; + // Documentation link for this profile. The URL must be absolute, + // e.g., http://pprof.example.com/cpu-profile.html + // + // The URL may be missing if the profile was generated by older code or code + // that did not bother to supply a link. + int64 doc_url = 15; // Index into string table. +} + +// ValueType describes the semantics and measurement units of a value. +message ValueType { + int64 type = 1; // Index into string table. + int64 unit = 2; // Index into string table. +} + +// Each Sample records values encountered in some program +// context. The program context is typically a stack trace, perhaps +// augmented with auxiliary information like the thread-id, some +// indicator of a higher level request being handled etc. +message Sample { + // The ids recorded here correspond to a Profile.location.id. + // The leaf is at location_id[0]. + repeated uint64 location_id = 1; + // The type and unit of each value is defined by the corresponding + // entry in Profile.sample_type. All samples must have the same + // number of values, the same as the length of Profile.sample_type. + // When aggregating multiple samples into a single sample, the + // result has a list of values that is the element-wise sum of the + // lists of the originals. + repeated int64 value = 2; + // label includes additional context for this sample. It can include + // things like a thread id, allocation size, etc. + // + // NOTE: While possible, having multiple values for the same label key is + // strongly discouraged and should never be used. Most tools (e.g. pprof) do + // not have good (or any) support for multi-value labels. And an even more + // discouraged case is having a string label and a numeric label of the same + // name on a sample. Again, possible to express, but should not be used. + repeated Label label = 3; +} + +message Label { + // Index into string table. An annotation for a sample (e.g. + // "allocation_size") with an associated value. + // Keys with "pprof::" prefix are reserved for internal use by pprof. + int64 key = 1; + + // At most one of the following must be present + int64 str = 2; // Index into string table + int64 num = 3; + + // Should only be present when num is present. + // Specifies the units of num. + // Use arbitrary string (for example, "requests") as a custom count unit. + // If no unit is specified, consumer may apply heuristic to deduce the unit. + // Consumers may also interpret units like "bytes" and "kilobytes" as memory + // units and units like "seconds" and "nanoseconds" as time units, + // and apply appropriate unit conversions to these. + int64 num_unit = 4; // Index into string table +} + +message Mapping { + // Unique nonzero id for the mapping. + uint64 id = 1; + // Address at which the binary (or DLL) is loaded into memory. + uint64 memory_start = 2; + // The limit of the address range occupied by this mapping. + uint64 memory_limit = 3; + // Offset in the binary that corresponds to the first mapped address. + uint64 file_offset = 4; + // The object this entry is loaded from. This can be a filename on + // disk for the main binary and shared libraries, or virtual + // abstractions like "[vdso]". + int64 filename = 5; // Index into string table + // A string that uniquely identifies a particular program version + // with high probability. E.g., for binaries generated by GNU tools, + // it could be the contents of the .note.gnu.build-id field. + int64 build_id = 6; // Index into string table + + // The following fields indicate the resolution of symbolic info. + bool has_functions = 7; + bool has_filenames = 8; + bool has_line_numbers = 9; + bool has_inline_frames = 10; +} + +// Describes function and line table debug information. +message Location { + // Unique nonzero id for the location. A profile could use + // instruction addresses or any integer sequence as ids. + uint64 id = 1; + // The id of the corresponding profile.Mapping for this location. + // It can be unset if the mapping is unknown or not applicable for + // this profile type. + uint64 mapping_id = 2; + // The instruction address for this location, if available. It + // should be within [Mapping.memory_start...Mapping.memory_limit] + // for the corresponding mapping. A non-leaf address may be in the + // middle of a call instruction. It is up to display tools to find + // the beginning of the instruction if necessary. + uint64 address = 3; + // Multiple line indicates this location has inlined functions, + // where the last entry represents the caller into which the + // preceding entries were inlined. + // + // E.g., if memcpy() is inlined into printf: + // line[0].function_name == "memcpy" + // line[1].function_name == "printf" + repeated Line line = 4; + // Provides an indication that multiple symbols map to this location's + // address, for example due to identical code folding by the linker. In that + // case the line information above represents one of the multiple + // symbols. This field must be recomputed when the symbolization state of the + // profile changes. + bool is_folded = 5; +} + +message Line { + // The id of the corresponding profile.Function for this line. + uint64 function_id = 1; + // Line number in source code. + int64 line = 2; + // Column number in source code. + int64 column = 3; +} + +message Function { + // Unique nonzero id for the function. + uint64 id = 1; + // Name of the function, in human-readable form if available. + int64 name = 2; // Index into string table + // Name of the function, as identified by the system. + // For instance, it can be a C++ mangled name. + int64 system_name = 3; // Index into string table + // Source file containing the function. + int64 filename = 4; // Index into string table + // Line number in source file. + int64 start_line = 5; +} diff --git a/misc/python/materialize/visualize_pprof_profile/profile_pb2.py b/misc/python/materialize/visualize_pprof_profile/profile_pb2.py new file mode 100644 index 0000000000000..1f0bb5365f89f --- /dev/null +++ b/misc/python/materialize/visualize_pprof_profile/profile_pb2.py @@ -0,0 +1,49 @@ +# type: ignore +# Generated by the protocol buffer compiler. DO NOT EDIT! +# NO CHECKED-IN PROTOBUF GENCODE +# source: profile.proto +# Protobuf Python Version: 5.29.3 +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import runtime_version as _runtime_version +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder + +_runtime_version.ValidateProtobufRuntimeVersion( + _runtime_version.Domain.PUBLIC, 5, 29, 3, "", "profile.proto" +) +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( + b'\n\rprofile.proto\x12\x12perftools.profiles"\xe6\x03\n\x07Profile\x12\x32\n\x0bsample_type\x18\x01 \x03(\x0b\x32\x1d.perftools.profiles.ValueType\x12*\n\x06sample\x18\x02 \x03(\x0b\x32\x1a.perftools.profiles.Sample\x12,\n\x07mapping\x18\x03 \x03(\x0b\x32\x1b.perftools.profiles.Mapping\x12.\n\x08location\x18\x04 \x03(\x0b\x32\x1c.perftools.profiles.Location\x12.\n\x08\x66unction\x18\x05 \x03(\x0b\x32\x1c.perftools.profiles.Function\x12\x14\n\x0cstring_table\x18\x06 \x03(\t\x12\x13\n\x0b\x64rop_frames\x18\x07 \x01(\x03\x12\x13\n\x0bkeep_frames\x18\x08 \x01(\x03\x12\x12\n\ntime_nanos\x18\t \x01(\x03\x12\x16\n\x0e\x64uration_nanos\x18\n \x01(\x03\x12\x32\n\x0bperiod_type\x18\x0b \x01(\x0b\x32\x1d.perftools.profiles.ValueType\x12\x0e\n\x06period\x18\x0c \x01(\x03\x12\x0f\n\x07\x63omment\x18\r \x03(\x03\x12\x1b\n\x13\x64\x65\x66\x61ult_sample_type\x18\x0e \x01(\x03\x12\x0f\n\x07\x64oc_url\x18\x0f \x01(\x03"\'\n\tValueType\x12\x0c\n\x04type\x18\x01 \x01(\x03\x12\x0c\n\x04unit\x18\x02 \x01(\x03"V\n\x06Sample\x12\x13\n\x0blocation_id\x18\x01 \x03(\x04\x12\r\n\x05value\x18\x02 \x03(\x03\x12(\n\x05label\x18\x03 \x03(\x0b\x32\x19.perftools.profiles.Label"@\n\x05Label\x12\x0b\n\x03key\x18\x01 \x01(\x03\x12\x0b\n\x03str\x18\x02 \x01(\x03\x12\x0b\n\x03num\x18\x03 \x01(\x03\x12\x10\n\x08num_unit\x18\x04 \x01(\x03"\xdd\x01\n\x07Mapping\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x14\n\x0cmemory_start\x18\x02 \x01(\x04\x12\x14\n\x0cmemory_limit\x18\x03 \x01(\x04\x12\x13\n\x0b\x66ile_offset\x18\x04 \x01(\x04\x12\x10\n\x08\x66ilename\x18\x05 \x01(\x03\x12\x10\n\x08\x62uild_id\x18\x06 \x01(\x03\x12\x15\n\rhas_functions\x18\x07 \x01(\x08\x12\x15\n\rhas_filenames\x18\x08 \x01(\x08\x12\x18\n\x10has_line_numbers\x18\t \x01(\x08\x12\x19\n\x11has_inline_frames\x18\n \x01(\x08"v\n\x08Location\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x12\n\nmapping_id\x18\x02 \x01(\x04\x12\x0f\n\x07\x61\x64\x64ress\x18\x03 \x01(\x04\x12&\n\x04line\x18\x04 \x03(\x0b\x32\x18.perftools.profiles.Line\x12\x11\n\tis_folded\x18\x05 \x01(\x08"9\n\x04Line\x12\x13\n\x0b\x66unction_id\x18\x01 \x01(\x04\x12\x0c\n\x04line\x18\x02 \x01(\x03\x12\x0e\n\x06\x63olumn\x18\x03 \x01(\x03"_\n\x08\x46unction\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0c\n\x04name\x18\x02 \x01(\x03\x12\x13\n\x0bsystem_name\x18\x03 \x01(\x03\x12\x10\n\x08\x66ilename\x18\x04 \x01(\x03\x12\x12\n\nstart_line\x18\x05 \x01(\x03\x42-\n\x1d\x63om.google.perftools.profilesB\x0cProfileProtob\x06proto3' +) + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "profile_pb2", _globals) +if not _descriptor._USE_C_DESCRIPTORS: + _globals["DESCRIPTOR"]._loaded_options = None + _globals["DESCRIPTOR"]._serialized_options = ( + b"\n\035com.google.perftools.profilesB\014ProfileProto" + ) + _globals["_PROFILE"]._serialized_start = 38 + _globals["_PROFILE"]._serialized_end = 524 + _globals["_VALUETYPE"]._serialized_start = 526 + _globals["_VALUETYPE"]._serialized_end = 565 + _globals["_SAMPLE"]._serialized_start = 567 + _globals["_SAMPLE"]._serialized_end = 653 + _globals["_LABEL"]._serialized_start = 655 + _globals["_LABEL"]._serialized_end = 719 + _globals["_MAPPING"]._serialized_start = 722 + _globals["_MAPPING"]._serialized_end = 943 + _globals["_LOCATION"]._serialized_start = 945 + _globals["_LOCATION"]._serialized_end = 1063 + _globals["_LINE"]._serialized_start = 1065 + _globals["_LINE"]._serialized_end = 1122 + _globals["_FUNCTION"]._serialized_start = 1124 + _globals["_FUNCTION"]._serialized_end = 1219 +# @@protoc_insertion_point(module_scope) diff --git a/misc/python/materialize/visualize_pprof_profile/visualize_pprof_profile.py b/misc/python/materialize/visualize_pprof_profile/visualize_pprof_profile.py new file mode 100644 index 0000000000000..a7bb54cf78b53 --- /dev/null +++ b/misc/python/materialize/visualize_pprof_profile/visualize_pprof_profile.py @@ -0,0 +1,195 @@ +#!/usr/bin/env python3 + +# Copyright Materialize, Inc. and contributors. All rights reserved. +# +# Use of this software is governed by the Business Source License +# included in the LICENSE file at the root of this repository. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0. + +import argparse +import gzip +import subprocess +from pathlib import Path + +from materialize import ui +from materialize.ci_util.upload_debug_symbols_to_polarsignals import ( + fetch_debug_symbols_from_s3, +) +from materialize.visualize_pprof_profile.profile_pb2 import ( + Profile, # type: ignore , Generated from protobuf +) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Symbolize and view pprof profiles offline using debug symbols from S3" + ) + parser.add_argument( + "profile_path", + type=Path, + help="Path to the pprof profile file", + ) + parser.add_argument( + "--port", + type=int, + default=8080, + help="Port to run the pprof web UI on (default: 8080)", + ) + return parser.parse_args() + + +def get_build_id(pprof_path: Path) -> str: + # Read raw binary data + with gzip.open(pprof_path, "rb") as f: + data = f.read() + + # Parse the profile + profile = Profile() + profile.ParseFromString(data) + # Access string_table[5] + # Extract build_ids from mappings + build_ids = [] + for mapping in profile.mapping: + if mapping.build_id: + build_id_str = profile.string_table[mapping.build_id] + build_ids.append(build_id_str) + + if len(build_ids) == 0: + raise ui.UIError( + f"No build ID found in {pprof_path}. Please check if the profile is valid." + ) + # We assume that the first build ID is the one we've stored in S3. + return build_ids[0] + + +def create_pprof_container(container_name: str) -> str: + """Create a Docker container with pprof installed using golang image.""" + try: + # Remove existing container if it exists + subprocess.run(["docker", "rm", "-f", container_name], check=False) + + command = [ + "docker", + "run", + "-d", # Run in detached mode + "-e", + # Set the path to the binary and debug info used for symbolization + "PPROF_BINARY_PATH=/tmp", + "--name", + container_name, + "--network", + "host", + "golang:latest", # Official Golang image which includes an outdated versino of pprof + "tail", + "-f", + "/dev/null", # Keep container running + ] + subprocess.run(command, check=True) + return container_name + except subprocess.CalledProcessError as e: + raise ui.UIError(f"Error creating docker container: {e}") + + +def run_pprof_ui( + profile_path: Path, + binary_path: Path, + debug_info_path: Path, + build_id: str, + port: int, +) -> None: + """Run pprof web UI with the symbolized profile.""" + try: + container_name = "pprof-viewer" + + # Create and start container + create_pprof_container(container_name) + + # Create build_id directory in container first + subprocess.run( + ["docker", "exec", container_name, "mkdir", "-p", f"/tmp/{build_id}"], + check=True, + ) + + # Copy files into container + for path, dest in [ + (profile_path, "/tmp/profile.pprof"), + # We need this specific path and file format in order for pprof to read the binary/debug info + (binary_path, f"/tmp/{build_id}/binary"), + (debug_info_path, "/tmp/binary.debug"), + ]: + subprocess.run( + ["docker", "cp", str(path), f"{container_name}:{dest}"], check=True + ) + + # Install graphviz in the container + subprocess.run( + ["docker", "exec", container_name, "apt-get", "update"], check=True + ) + subprocess.run( + ["docker", "exec", container_name, "apt-get", "install", "-y", "graphviz"], + check=True, + ) + + # Install pprof in the container + # Although the official golang image includes a version of pprof, + # there exists a bug where it cannot read the binary/debug info with error "build-id mismatch" and + # will just show the profile as a flat profile. Thus we install the latest version of pprof. + subprocess.run( + [ + "docker", + "exec", + container_name, + "go", + "install", + "github.com/google/pprof@latest", + ], + check=True, + ) + + # Run pprof in the container + cmd = [ + "docker", + "exec", + container_name, + # Run the latest version of pprof + "/go/bin/pprof", + "-http", + f"localhost:{port}", + "/tmp/profile.pprof", + ] + print(f"Starting pprof web UI in Docker on http://localhost:{port}") + subprocess.run(cmd, check=True) + + except subprocess.CalledProcessError as e: + raise RuntimeError(f"Failed to run pprof: {e}") + + +def main() -> None: + args = parse_args() + + if not args.profile_path.exists(): + raise ui.UIError(f"Profile file not found: {args.profile_path}") + + Path(args.profile_path) + # Get build ID from pprof file + build_id = get_build_id(args.profile_path) + print(f"Found profile for {args.profile_path} with build ID {build_id}") + + # Fetch debug symbols from S3 + binary_path, debug_path = fetch_debug_symbols_from_s3(build_id) + binary_path = Path(binary_path) + debug_path = Path(debug_path) + try: + # Run pprof UI + run_pprof_ui(args.profile_path, binary_path, debug_path, build_id, args.port) + finally: + # Cleanup temporary files + binary_path.unlink(missing_ok=True) + debug_path.unlink(missing_ok=True) + + +if __name__ == "__main__": + main()