Skip to content

Commit 127aa99

Browse files
committed
Merge remote-tracking branch 'origin/master' into vandonr/process3
2 parents c18fc65 + 1b5e971 commit 127aa99

File tree

585 files changed

+27288
-5694
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

585 files changed

+27288
-5694
lines changed

.azure-pipelines/ultimate-pipeline.yml

Lines changed: 47 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ variables:
143143
DD_LOGGER_DD_TAGS: test.configuration.job:$(System.JobDisplayName)
144144
DD_LOGGER_ENABLED: true
145145
DD_COLLECTOR_CPU_USAGE: true
146-
ToolVersion: 3.35.0
146+
ToolVersion: 3.36.0
147147
# .NET SDK performance optimization variables
148148
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: 1
149149
DOTNET_CLI_TELEMETRY_OPTOUT: 1
@@ -1710,12 +1710,25 @@ stages:
17101710
SampleName: $(IntegrationTestSampleName)
17111711

17121712
- script: tracer\build.cmd RunIntegrationTests RunWindowsRegressionTests -Framework $(framework) --code-coverage-enabled $(CodeCoverageEnabled)
1713-
displayName: Run integration tests
1713+
displayName: Run integration tests (Tracer)
1714+
condition: ne(variables['area'], 'ASM')
1715+
env:
1716+
DD_LOGGER_DD_API_KEY: $(ddApiKey)
1717+
enable_crash_dumps: true
1718+
Filter: $(IntegrationTestFilter)
1719+
SampleName: $(IntegrationTestSampleName)
1720+
Area: $(area)
1721+
1722+
- script: tracer\build.cmd RunIntegrationTests RunWindowsRegressionTests -Framework $(framework) --code-coverage-enabled $(CodeCoverageEnabled)
1723+
displayName: Run integration tests (ASM)
1724+
condition: eq(variables['area'], 'ASM')
1725+
retryCountOnTaskFailure: 2
17141726
env:
17151727
DD_LOGGER_DD_API_KEY: $(ddApiKey)
17161728
enable_crash_dumps: true
17171729
Filter: $(IntegrationTestFilter)
17181730
SampleName: $(IntegrationTestSampleName)
1731+
Area: $(area)
17191732

17201733
# TODO: Re-enable test-agent.windows once VM images are updated (hitting rate limits for docker)
17211734
# - script: docker compose -f docker-compose.windows.yml down
@@ -2272,7 +2285,26 @@ stages:
22722285
-e SampleName=$(IntegrationTestSampleName) \
22732286
-e Area=$(area) \
22742287
IntegrationTests
2275-
displayName: docker-compose run IntegrationTests
2288+
displayName: docker-compose run IntegrationTests (Tracer)
2289+
condition: ne(variables['area'], 'ASM')
2290+
env:
2291+
DD_LOGGER_DD_API_KEY: $(ddApiKey)
2292+
baseImage: $(baseImage) # for interpolation in the docker-compose file
2293+
2294+
- script: |
2295+
docker-compose -f docker-compose.yml -p $(DockerComposeProjectName) \
2296+
run --no-deps --rm \
2297+
-e baseImage=$(baseImage) \
2298+
-e framework=$(publishTargetFramework) \
2299+
-e CodeCoverageEnabled=$(CodeCoverageEnabled) \
2300+
-e IncludeTestsRequiringDocker=false \
2301+
-e Filter=$(IntegrationTestFilter) \
2302+
-e SampleName=$(IntegrationTestSampleName) \
2303+
-e Area=$(area) \
2304+
IntegrationTests
2305+
displayName: docker-compose run IntegrationTests (ASM)
2306+
condition: eq(variables['area'], 'ASM')
2307+
retryCountOnTaskFailure: 2
22762308
env:
22772309
DD_LOGGER_DD_API_KEY: $(ddApiKey)
22782310
baseImage: $(baseImage) # for interpolation in the docker-compose file
@@ -4511,32 +4543,32 @@ stages:
45114543
sampleName: Computer01
45124544
testName: cpu-walltime
45134545
poolName: ProfilerExecBenchAgent1
4514-
framework: net7.0
4546+
framework: net10.0
45154547
Exceptions:
45164548
sampleName: ExceptionGenerator
45174549
testName: exceptions
45184550
poolName: ProfilerExecBenchAgent2
4519-
framework: net7.0
4551+
framework: net10.0
45204552
Contention:
45214553
sampleName: Computer01
45224554
testName: contention
45234555
poolName: ProfilerExecBenchAgent3
4524-
framework: net7.0
4556+
framework: net10.0
45254557
Allocations:
45264558
sampleName: Computer01
45274559
testName: allocations
45284560
poolName: ProfilerExecBenchAgent4
4529-
framework: net7.0
4561+
framework: net10.0
45304562
LiveHeap:
45314563
sampleName: Computer01
45324564
testName: liveheap
45334565
poolName: ProfilerExecBenchAgent5
4534-
framework: net7.0
4566+
framework: net10.0
45354567
GarbageCollections:
45364568
sampleName: Computer01
45374569
testName: garbagecollections
45384570
poolName: ProfilerExecBenchAgent11
4539-
framework: net7.0
4571+
framework: net10.0
45404572
pool:
45414573
name: $(poolName)
45424574
timeoutInMinutes: 30 #default value
@@ -4605,32 +4637,32 @@ stages:
46054637
sampleName: Computer01
46064638
testName: cpu-walltime
46074639
poolName: ProfilerExecBenchAgent6
4608-
framework: net7.0
4640+
framework: net10.0
46094641
Exceptions:
46104642
sampleName: ExceptionGenerator
46114643
testName: exceptions
46124644
poolName: ProfilerExecBenchAgent7
4613-
framework: net7.0
4645+
framework: net10.0
46144646
Contention:
46154647
sampleName: Computer01
46164648
testName: contention
46174649
poolName: ProfilerExecBenchAgent8
4618-
framework: net7.0
4650+
framework: net10.0
46194651
Allocations:
46204652
sampleName: Computer01
46214653
testName: allocations
46224654
poolName: ProfilerExecBenchAgent9
4623-
framework: net7.0
4655+
framework: net10.0
46244656
LiveHeap:
46254657
sampleName: Computer01
46264658
testName: liveheap
46274659
poolName: ProfilerExecBenchAgent10
4628-
framework: net7.0
4660+
framework: net10.0
46294661
GarbageCollections:
46304662
sampleName: Computer01
46314663
testName: garbagecollections
46324664
poolName: ProfilerExecBenchAgent12
4633-
framework: net7.0
4665+
framework: net10.0
46344666
pool:
46354667
name: $(poolName)
46364668
timeoutInMinutes: 30 #default value

.claude/commands/analyze-crash.md

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
# Stack Trace Crash Analysis for dd-trace-dotnet
2+
3+
You are analyzing a crash stack trace for the dd-trace-dotnet repository. Perform a comprehensive investigation to help engineers understand and triage the crash. Focus on de-mystifying the crashing thread and explaining how the crash occurred.
4+
5+
## Input Processing
6+
The user has provided a crash stack trace. Parse and analyze it systematically.
7+
8+
## Analysis Workflow
9+
10+
## GitHub Link Generation
11+
12+
When referencing files in the dd-trace-dotnet repository, always provide clickable GitHub links in addition to local paths:
13+
14+
**Format**:
15+
```
16+
[filename:line](https://github.com/DataDog/dd-trace-dotnet/blob/master/path/to/file#Lline)
17+
```
18+
19+
**Examples**:
20+
- Single line: `[cor_profiler.cpp:1430](https://github.com/DataDog/dd-trace-dotnet/blob/master/tracer/src/Datadog.Tracer.Native/cor_profiler.cpp#L1430)`
21+
- Line range: `[rejit_handler.cpp:263-296](https://github.com/DataDog/dd-trace-dotnet/blob/master/tracer/src/Datadog.Tracer.Native/rejit_handler.cpp#L263-L296)`
22+
23+
**When to use**:
24+
- All file:line references in Executive Summary
25+
- Stack Trace Classification table "Location" column (when file paths are available)
26+
- All code context section headings
27+
- Related Code section
28+
29+
**Path construction**:
30+
- Base URL: `https://github.com/DataDog/dd-trace-dotnet/blob/master/`
31+
- Append the repository-relative path (strip `C:\Users\...\dd-trace-dotnet\` or similar prefixes)
32+
- Add `#L{lineNumber}` for single line or `#L{start}-L{end}` for ranges
33+
- Example: Local path `C:\Users\...\dd-trace-dotnet\tracer\src\Datadog.Tracer.Native\cor_profiler.cpp:1430` becomes `[cor_profiler.cpp:1430](https://github.com/DataDog/dd-trace-dotnet/blob/master/tracer/src/Datadog.Tracer.Native/cor_profiler.cpp#L1430)`
34+
35+
### Phase 1: Parse & Classify Stack Frames
36+
37+
Extract all stack frames and classify each one:
38+
39+
**Classification Categories:**
40+
- **CLR Runtime**: Functions like `ReJitManager::`, `ProfToEEInterfaceImpl::`, `ICorProfiler*`, CLR DLL references (clr.dll, coreclr.dll, mscorwks.dll)
41+
- **dd-trace-dotnet Native**: Namespace patterns (`trace::`, `debugger::`, `fault_tolerant::`, `iast::`), or paths containing `Datadog.Tracer.Native`
42+
- **dd-trace-dotnet Managed**: References to `Datadog.Trace.dll!`
43+
- **External/Application**: Everything else (customer code, third-party libraries, framework code)
44+
45+
Create a classification table showing frame number, type, location, and brief description.
46+
47+
### Phase 2: Locate dd-trace-dotnet Code
48+
49+
For each dd-trace-dotnet frame that includes file path and line number:
50+
51+
1. **Extract and normalize path**: Remove build-specific prefixes
52+
- Strip: `c:\mnt\`, `D:\a\_work\1\s\`, `/home/runner/work/dd-trace-dotnet/`, etc.
53+
- Result should be relative to repo root: `tracer/src/Datadog.Tracer.Native/{filename}`
54+
55+
2. **Find file in repository**:
56+
- First try direct path match
57+
- If not found, use Glob tool with pattern: `**/Datadog.Tracer.Native/**/{filename}`
58+
- If still not found, try just the filename: `**/{filename}`
59+
60+
3. **Read code with context**:
61+
- Find the function containing the crash line
62+
- Include 10-15 lines before the crash line
63+
- Mark or highlight the actual crash line
64+
- Include 5-10 lines after
65+
- Show enough context to understand what the code is doing
66+
67+
### Phase 3: Code Context Extraction
68+
69+
For each critical dd-trace-dotnet frame:
70+
1. Show the function signature
71+
2. Include surrounding code (described in Phase 2)
72+
3. **Clearly mark the crash line** with a comment like `// >>> CRASH POINT <<<`
73+
4. Explain what this code does
74+
5. Explain why it crashed based on the evidence
75+
76+
Format each code section as:
77+
```
78+
### Frame X: {function_name} ([{file}:{line}](GitHub link))
79+
80+
​```cpp
81+
// {file}:{start_line}-{end_line}
82+
{code with crash line marked}
83+
​```
84+
85+
**Analysis**: {Explanation of what this code does and why it failed}
86+
```
87+
88+
### Phase 4: Reconstruct Crash Flow
89+
90+
Build a narrative explaining the execution flow leading to the crash. This is the primary goal of the analysis - to help engineers understand what happened:
91+
92+
1. **Entry point**: Where did execution start? (e.g., profiler callback, background thread loop)
93+
2. **Key operations**: What was the code trying to do?
94+
3. **Critical transitions**: Where did control flow between components?
95+
4. **Failure point**: Where and why did it crash?
96+
5. **Crash type**: Describe what type of crash this is (e.g., null pointer dereference, access violation, invalid module reference, race condition, etc.)
97+
98+
Write this as a clear, step-by-step narrative that someone unfamiliar with the code can follow. Focus on explaining HOW the crash happened based on the evidence in the stack trace and code, without prescribing a specific fix.
99+
100+
### Phase 5: Identify Related Code
101+
102+
Use Bash tool with git commands to find relevant context, focusing on commits associated with PRs:
103+
104+
1. Check recent commits to the affected files: `git log --oneline -10 {file}`
105+
2. Search for related changes: `git log --grep="crash" --grep="fix" --oneline -20` (use keywords relevant to the crash area)
106+
3. For each relevant commit, check if it's associated with a PR:
107+
- Look for PR numbers in commit messages (e.g., "(#1234)" or "PR #1234")
108+
- If found, construct PR link: `https://github.com/DataDog/dd-trace-dotnet/pull/{number}`
109+
4. Prioritize commits with PR associations - these have more context
110+
5. Look for similar code patterns in other files that might provide context
111+
112+
**Focus on commits with PR links** - PRs provide valuable context including descriptions, discussions, and rationale that individual commits lack.
113+
114+
## Output Format
115+
116+
Generate a well-formatted markdown document with these sections:
117+
118+
```markdown
119+
# Crash Analysis Report
120+
**Generated**: {ISO 8601 timestamp}
121+
122+
## Executive Summary
123+
{2-3 sentence summary of what crashed and where in the code. Focus on demystifying the crash location and describing what the crashing thread was doing.}
124+
125+
## Stack Trace Classification
126+
127+
### Crashed Thread: #{thread_number}
128+
129+
| # | Type | Location | Description |
130+
|---|------|----------|-------------|
131+
| 0 | {type} | {function/location} | {brief description} |
132+
| 1 | {type} | {function/location} | {brief description} |
133+
| ... | ... | ... | ... |
134+
135+
{If multiple threads provided, note other interesting threads but focus on crashed thread}
136+
137+
## Code Context
138+
139+
{For each critical dd-trace-dotnet frame, show code with analysis}
140+
141+
### Frame X: {function} ([{file}:{line}](GitHub link))
142+
143+
​```cpp
144+
{code snippet with crash line marked}
145+
​```
146+
147+
**Analysis**: {Explanation of what this code does}
148+
149+
## Crash Flow Reconstruction
150+
151+
{Step-by-step narrative explaining the execution flow from start to crash point}
152+
153+
**Crash Type**: {Description of what type of crash this is - e.g., null pointer dereference, access violation, invalid module reference, race condition}
154+
155+
**How it happened**: {Clear explanation of the sequence of events that led to the crash based on the stack trace and code analysis}
156+
157+
## Related Code
158+
159+
**Relevant PRs and commits**:
160+
- [#{PR number}](https://github.com/DataDog/dd-trace-dotnet/pull/{PR number}): {PR title/description} - {why relevant}
161+
- [#{PR number}](https://github.com/DataDog/dd-trace-dotnet/pull/{PR number}): {PR title/description} - {why relevant}
162+
163+
{Only include commits without PR associations if they are particularly relevant}
164+
165+
**Related code locations**:
166+
- [{file}:{line}](GitHub link) - {description}
167+
- [{file}:{line}](GitHub link) - {description}
168+
169+
## Additional Context
170+
171+
{Any additional useful context about the application environment, runtime, features in use, or relevant background}
172+
173+
---
174+
*Analysis generated by Claude Code /analyze-crash command*
175+
*This analysis is intended to help understand and triage the crash. Engineers should review this analysis to determine if a code fix is needed.*
176+
```
177+
178+
## Output File Management
179+
180+
1. **Create output directory**:
181+
- On Windows: Use `powershell.exe -NoProfile -Command 'New-Item -ItemType Directory -Force -Path (Join-Path $env:USERPROFILE ".claude\analysis") | Select-Object -ExpandProperty FullName'`
182+
- On Linux/Mac: Use `mkdir -p ~/.claude/analysis && echo ~/.claude/analysis`
183+
- **IMPORTANT**: On Windows, you MUST use single quotes around the PowerShell command to prevent bash from interpreting `$env:USERPROFILE`
184+
2. **Generate filename**: Use format `crash-analysis-{YYYYMMDD-HHMMSS}.md` (e.g., `crash-analysis-20250316-143022.md`)
185+
3. **Save file**: Write the markdown analysis to the file
186+
4. **Return path**: Tell the user the full path where the analysis was saved
187+
188+
## Important Guidelines
189+
190+
- **Focus on triage and understanding**: The goal is to help engineers understand HOW the crash happened, not to prescribe specific fixes
191+
- **Describe crash types**: It's okay to identify what type of crash this is (e.g., null pointer, race condition, invalid reference), but don't match against fixed "patterns"
192+
- **End at explanation**: Phase 4 (Crash Flow Reconstruction) should provide a clear explanation of how the crash occurred. Engineers will then decide on fixes
193+
- **No fix suggestions**: Do not suggest code changes, patches, or specific implementation fixes. Focus on analysis only
194+
- **Handle path variations gracefully**: Stack traces from different build environments will have different path prefixes
195+
- **Continue with missing information**: If a file can't be located, note this but continue the analysis with available information
196+
- **Focus on the crashed thread**: If multiple threads are provided, focus primarily on the crashed thread but mention other relevant threads
197+
- **Be concise but thorough**: Provide enough detail to understand the issue without unnecessary verbosity
198+
- **Always include GitHub links**: Every file:line reference should have a clickable GitHub link to master branch
199+
- **Prefer PR links over commits**: When git log finds relevant commits, prioritize those associated with PRs. Extract PR numbers from commit messages (e.g., "(#1234)") and link to the PR: `https://github.com/DataDog/dd-trace-dotnet/pull/{number}`. Only include standalone commit links if particularly relevant and not part of a PR
200+
- **Mark uncertainties**: If something is unclear or speculative, explicitly state this
201+
202+
## Now Analyze
203+
204+
Parse the stack trace provided by the user and follow the workflow above to generate a comprehensive crash analysis.

0 commit comments

Comments
 (0)