Skip to content

Commit 9d0cb3c

Browse files
authored
Merge pull request #1989 from tyrielv/tyrielv/fix-orphan-lock-pid-race
Fix PID-recycling race in orphan-lock detection
2 parents 7bd0c2d + abe5da6 commit 9d0cb3c

7 files changed

Lines changed: 230 additions & 10 deletions

File tree

GVFS/GVFS.Common/GVFSLock.cs

Lines changed: 61 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,20 @@ public bool TryAcquireLockForExternalRequestor(
4141

4242
existingExternalHolder = null;
4343

44+
// Capture the requestor's process start time so we can later distinguish the
45+
// genuine holder from an unrelated process that happens to be reusing the same
46+
// PID after the holder exits. If we cannot read the start time (e.g. permission
47+
// failure on OpenProcess for a different-integrity caller) we still accept the
48+
// lock and fall back to the legacy PID-only orphan check; record the fallback in
49+
// telemetry so we can spot if it becomes common.
50+
long? requestorStartTime = GVFSPlatform.Instance.TryGetActiveProcessStartTime(requestor.PID, out long startTime)
51+
? startTime
52+
: (long?)null;
53+
if (requestorStartTime == null)
54+
{
55+
metadata.Add("StartTimeUnavailable", true);
56+
}
57+
4458
try
4559
{
4660
lock (this.acquisitionLock)
@@ -65,7 +79,7 @@ public bool TryAcquireLockForExternalRequestor(
6579
metadata.Add("Result", "Accepted");
6680
eventLevel = EventLevel.Informational;
6781

68-
this.currentLockHolder.AcquireForExternalRequestor(requestor);
82+
this.currentLockHolder.AcquireForExternalRequestor(requestor, requestorStartTime);
6983
this.Stats = new ActiveGitCommandStats();
7084

7185
return true;
@@ -190,12 +204,14 @@ private bool IsLockAvailable(bool checkExternalHolderOnly, out NamedPipeMessages
190204
}
191205

192206
bool externalHolderTerminatedWithoutReleasingLock;
207+
string terminationReason;
193208
existingExternalHolder = this.currentLockHolder.GetExternalHolder(
194-
out externalHolderTerminatedWithoutReleasingLock);
209+
out externalHolderTerminatedWithoutReleasingLock,
210+
out terminationReason);
195211

196212
if (externalHolderTerminatedWithoutReleasingLock)
197213
{
198-
this.ReleaseLockForTerminatedProcess(existingExternalHolder.PID);
214+
this.ReleaseLockForTerminatedProcess(existingExternalHolder.PID, terminationReason);
199215
this.tracer.SetGitCommandSessionId(string.Empty);
200216
existingExternalHolder = null;
201217
}
@@ -204,11 +220,11 @@ private bool IsLockAvailable(bool checkExternalHolderOnly, out NamedPipeMessages
204220
}
205221
}
206222

207-
private bool ReleaseExternalLock(int pid, string eventName)
223+
private bool ReleaseExternalLock(int pid, string eventName, EventMetadata extraMetadata = null)
208224
{
209225
lock (this.acquisitionLock)
210226
{
211-
EventMetadata metadata = new EventMetadata();
227+
EventMetadata metadata = extraMetadata ?? new EventMetadata();
212228

213229
try
214230
{
@@ -251,9 +267,11 @@ private bool ReleaseExternalLock(int pid, string eventName)
251267
}
252268
}
253269

254-
private void ReleaseLockForTerminatedProcess(int pid)
270+
private void ReleaseLockForTerminatedProcess(int pid, string terminationReason)
255271
{
256-
this.ReleaseExternalLock(pid, "ExternalLockHolderExited");
272+
EventMetadata metadata = new EventMetadata();
273+
metadata.Add("ExternalHolderTerminationReason", terminationReason ?? "Unknown");
274+
this.ReleaseExternalLock(pid, "ExternalLockHolderExited", metadata);
257275
}
258276

259277
// The lock release event is a convenient place to record stats about things that happened while a git command was running,
@@ -383,6 +401,7 @@ public void AddStatsToTelemetry(EventMetadata metadata)
383401
private class LockHolder
384402
{
385403
private NamedPipeMessages.LockData externalLockHolder;
404+
private long? externalLockHolderStartTime;
386405

387406
public bool IsFree
388407
{
@@ -404,7 +423,7 @@ public void AcquireForGVFS()
404423
this.IsGVFS = true;
405424
}
406425

407-
public void AcquireForExternalRequestor(NamedPipeMessages.LockData externalLockHolder)
426+
public void AcquireForExternalRequestor(NamedPipeMessages.LockData externalLockHolder, long? startTime)
408427
{
409428
if (this.IsGVFS ||
410429
this.externalLockHolder != null)
@@ -413,27 +432,59 @@ public void AcquireForExternalRequestor(NamedPipeMessages.LockData externalLockH
413432
}
414433

415434
this.externalLockHolder = externalLockHolder;
435+
this.externalLockHolderStartTime = startTime;
416436
}
417437

418438
public void Release()
419439
{
420440
this.IsGVFS = false;
421441
this.externalLockHolder = null;
442+
this.externalLockHolderStartTime = null;
422443
}
423444

424445
public NamedPipeMessages.LockData GetExternalHolder()
425446
{
426447
return this.externalLockHolder;
427448
}
428449

429-
public NamedPipeMessages.LockData GetExternalHolder(out bool externalHolderTerminatedWithoutReleasingLock)
450+
public NamedPipeMessages.LockData GetExternalHolder(out bool externalHolderTerminatedWithoutReleasingLock, out string terminationReason)
430451
{
431452
externalHolderTerminatedWithoutReleasingLock = false;
453+
terminationReason = null;
432454

433455
if (this.externalLockHolder != null)
434456
{
435457
int pid = this.externalLockHolder.PID;
436-
externalHolderTerminatedWithoutReleasingLock = !GVFSPlatform.Instance.IsProcessActive(pid);
458+
459+
if (this.externalLockHolderStartTime is long capturedStartTime)
460+
{
461+
// Identity check: confirm the same process still owns this PID by comparing
462+
// the OS-supplied process start time we captured at acquisition with the
463+
// current one. A mismatch means the original holder exited and Windows
464+
// recycled the PID to a different process (the bug this code fixes).
465+
if (!GVFSPlatform.Instance.TryGetActiveProcessStartTime(pid, out long currentStartTime))
466+
{
467+
externalHolderTerminatedWithoutReleasingLock = true;
468+
terminationReason = "ProcessNotActive";
469+
}
470+
else if (currentStartTime != capturedStartTime)
471+
{
472+
externalHolderTerminatedWithoutReleasingLock = true;
473+
terminationReason = "PidRecycled";
474+
}
475+
}
476+
else
477+
{
478+
// Fallback for the rare case where we could not capture a start time at
479+
// acquisition time (e.g. cross-integrity OpenProcess denial). Use the
480+
// legacy PID-only liveness check, which is vulnerable to PID recycling
481+
// but matches pre-fix behavior.
482+
if (!GVFSPlatform.Instance.IsProcessActive(pid))
483+
{
484+
externalHolderTerminatedWithoutReleasingLock = true;
485+
terminationReason = "ProcessNotActive";
486+
}
487+
}
437488
}
438489

439490
return this.externalLockHolder;

GVFS/GVFS.Common/GVFSPlatform.cs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,19 @@ public static void Register(GVFSPlatform platform)
6666
public abstract void PrepareProcessToRunInBackground();
6767

6868
public abstract bool IsProcessActive(int processId);
69+
70+
/// <summary>
71+
/// Returns true and writes an opaque, OS-supplied process-identity token (e.g. process
72+
/// creation time on Windows) when the process with the given PID is currently active.
73+
/// The token has no meaning beyond identity comparison: two calls for the same underlying
74+
/// process yield equal tokens, and a call after the OS has recycled the PID to a different
75+
/// process yields a different token. Returns false (and startTime = 0) if the process
76+
/// is no longer running, or if it could not be identified for any reason (e.g. permission
77+
/// failure). Callers must treat a false return as "no identity information available" and
78+
/// fall back to <see cref="IsProcessActive(int)"/> if they still need a liveness check.
79+
/// </summary>
80+
public abstract bool TryGetActiveProcessStartTime(int processId, out long startTime);
81+
6982
public abstract void IsServiceInstalledAndRunning(string name, out bool installed, out bool running);
7083
public abstract string GetNamedPipeName(string enlistmentRoot);
7184
public abstract string GetGVFSServiceNamedPipeName(string serviceName);

GVFS/GVFS.Common/NativeMethods.Shared.cs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,18 @@ public static extern SafeFileHandle OpenProcess(
158158
[return: MarshalAs(UnmanagedType.Bool)]
159159
public static extern bool GetExitCodeProcess(SafeFileHandle hProcess, out uint lpExitCode);
160160

161+
// GetProcessTimes writes four FILETIME values (each two DWORDs / 8 bytes).
162+
// We marshal each as `out long` because we only ever compare the raw 64-bit value
163+
// (creation time) for identity purposes; we never decode it as a date.
164+
[DllImport("kernel32.dll", SetLastError = true)]
165+
[return: MarshalAs(UnmanagedType.Bool)]
166+
public static extern bool GetProcessTimes(
167+
SafeFileHandle hProcess,
168+
out long lpCreationTime,
169+
out long lpExitTime,
170+
out long lpKernelTime,
171+
out long lpUserTime);
172+
161173
[DllImport("kernel32.dll", SetLastError = true, CharSet = CharSet.Unicode)]
162174
private static extern SafeFileHandle CreateFile(
163175
[In] string lpFileName,

GVFS/GVFS.Platform.Windows/WindowsPlatform.Shared.cs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,44 @@ public static bool IsProcessActiveImplementation(int processId, bool tryGetProce
7070
}
7171
}
7272

73+
/// <summary>
74+
/// Returns true if a process with the given PID is currently active AND writes its
75+
/// creation timestamp (raw FILETIME, 100-ns ticks since 1601) to <paramref name="startTime"/>.
76+
/// The returned value is intended for identity comparison only -- two calls for the
77+
/// same underlying process (no PID reuse) always yield equal values; if the OS recycles
78+
/// a PID to a new process the value will differ. Returns false (and startTime = 0)
79+
/// if the process is gone, has terminated (but its kernel object lingers due to an
80+
/// outstanding handle elsewhere), or cannot be opened for QueryLimitedInformation.
81+
/// </summary>
82+
public static bool TryGetActiveProcessStartTimeImplementation(int processId, out long startTime)
83+
{
84+
startTime = 0;
85+
86+
using (SafeFileHandle process = NativeMethods.OpenProcess(NativeMethods.ProcessAccessFlags.QueryLimitedInformation, false, processId))
87+
{
88+
if (process.IsInvalid)
89+
{
90+
return false;
91+
}
92+
93+
// GetProcessTimes succeeds for terminated processes whose kernel object still
94+
// exists (e.g., an outstanding handle elsewhere). Confirm the process is still
95+
// running before trusting the creation time as an identity marker.
96+
if (!NativeMethods.GetExitCodeProcess(process, out uint exitCode) || exitCode != StillActive)
97+
{
98+
return false;
99+
}
100+
101+
if (!NativeMethods.GetProcessTimes(process, out long creationTime, out _, out _, out _))
102+
{
103+
return false;
104+
}
105+
106+
startTime = creationTime;
107+
return true;
108+
}
109+
}
110+
73111
public static string GetNamedPipeNameImplementation(string enlistmentRoot)
74112
{
75113
return "GVFS_" + enlistmentRoot.ToUpper().Replace(':', '_');

GVFS/GVFS.Platform.Windows/WindowsPlatform.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,11 @@ public override bool IsProcessActive(int processId)
205205
return WindowsPlatform.IsProcessActiveImplementation(processId, tryGetProcessById: true);
206206
}
207207

208+
public override bool TryGetActiveProcessStartTime(int processId, out long startTime)
209+
{
210+
return WindowsPlatform.TryGetActiveProcessStartTimeImplementation(processId, out startTime);
211+
}
212+
208213
public override void IsServiceInstalledAndRunning(string name, out bool installed, out bool running)
209214
{
210215
ServiceController service = ServiceController.GetServices().FirstOrDefault(s => s.ServiceName.Equals(name, StringComparison.Ordinal));

GVFS/GVFS.UnitTests/Common/GVFSLockTests.cs

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,84 @@ public void TryAcquireLockForExternalRequestor_WhenExternalHolderTerminated()
182182
mockTracer.VerifyAll();
183183
}
184184

185+
/// <summary>
186+
/// Regression test for OrphanedGVFSLockIsCleanedUp flake: simulates the original lock holder
187+
/// exiting and the OS recycling its PID to a different, unrelated process before the orphan
188+
/// check fires. Identity is preserved via process start time, so the orphan must be detected
189+
/// even though the (now-different) process at the holder's PID still appears active.
190+
/// </summary>
191+
[TestCase]
192+
public void TryAcquireLockForExternalRequestor_WhenHolderPidRecycled()
193+
{
194+
Mock<ITracer> mockTracer = new Mock<ITracer>(MockBehavior.Strict);
195+
mockTracer.Setup(x => x.RelatedEvent(EventLevel.Informational, "TryAcquireLockExternal", It.IsAny<EventMetadata>()));
196+
mockTracer.Setup(x => x.RelatedEvent(EventLevel.Informational, "ExternalLockHolderExited", It.IsAny<EventMetadata>(), Keywords.Telemetry));
197+
mockTracer.Setup(x => x.SetGitCommandSessionId(string.Empty));
198+
MockPlatform mockPlatform = (MockPlatform)GVFSPlatform.Instance;
199+
200+
// Acquire as DefaultLockData.PID with an explicit start time so the new identity-check
201+
// path (not the legacy fallback) runs.
202+
mockPlatform.ActiveProcesses.Add(DefaultLockData.PID);
203+
mockPlatform.ProcessStartTimes[DefaultLockData.PID] = 1000;
204+
GVFSLock gvfsLock = new GVFSLock(mockTracer.Object);
205+
NamedPipeMessages.LockData existingExternalHolder;
206+
gvfsLock.TryAcquireLockForExternalRequestor(DefaultLockData, out existingExternalHolder).ShouldBeTrue();
207+
existingExternalHolder.ShouldBeNull();
208+
this.ValidateLockHeld(gvfsLock, DefaultLockData);
209+
210+
// Simulate PID recycling: the PID is still reported active (a different process now owns it),
211+
// but the start time differs from the one we captured at acquisition.
212+
mockPlatform.ProcessStartTimes[DefaultLockData.PID] = 2000;
213+
214+
NamedPipeMessages.LockData newLockData = new NamedPipeMessages.LockData(4321, false, false, "git new", "456");
215+
mockPlatform.ActiveProcesses.Add(newLockData.PID);
216+
mockPlatform.ProcessStartTimes[newLockData.PID] = 3000;
217+
gvfsLock.TryAcquireLockForExternalRequestor(newLockData, out existingExternalHolder).ShouldBeTrue();
218+
existingExternalHolder.ShouldBeNull();
219+
this.ValidateLockHeld(gvfsLock, newLockData);
220+
221+
mockPlatform.ActiveProcesses.Remove(DefaultLockData.PID);
222+
mockPlatform.ActiveProcesses.Remove(newLockData.PID);
223+
mockPlatform.ProcessStartTimes.Remove(DefaultLockData.PID);
224+
mockPlatform.ProcessStartTimes.Remove(newLockData.PID);
225+
mockTracer.VerifyAll();
226+
}
227+
228+
/// <summary>
229+
/// Inverse of the PID-recycle test: when start time still matches, the original holder
230+
/// must still be considered the active holder and a new acquisition must be denied.
231+
/// Guards against an over-eager orphan detector that would release any lock on every check.
232+
/// </summary>
233+
[TestCase]
234+
public void TryAcquireLockForExternalRequestor_WhenHolderStartTimeMatches()
235+
{
236+
Mock<ITracer> mockTracer = new Mock<ITracer>(MockBehavior.Strict);
237+
mockTracer.Setup(x => x.RelatedEvent(EventLevel.Informational, "TryAcquireLockExternal", It.IsAny<EventMetadata>()));
238+
mockTracer.Setup(x => x.RelatedEvent(EventLevel.Verbose, "TryAcquireLockExternal", It.IsAny<EventMetadata>()));
239+
MockPlatform mockPlatform = (MockPlatform)GVFSPlatform.Instance;
240+
241+
mockPlatform.ActiveProcesses.Add(DefaultLockData.PID);
242+
mockPlatform.ProcessStartTimes[DefaultLockData.PID] = 1000;
243+
GVFSLock gvfsLock = new GVFSLock(mockTracer.Object);
244+
NamedPipeMessages.LockData existingExternalHolder;
245+
gvfsLock.TryAcquireLockForExternalRequestor(DefaultLockData, out existingExternalHolder).ShouldBeTrue();
246+
this.ValidateLockHeld(gvfsLock, DefaultLockData);
247+
248+
// Start time is unchanged -- holder is genuinely still alive; new acquire must be denied.
249+
NamedPipeMessages.LockData newLockData = new NamedPipeMessages.LockData(4321, false, false, "git new", "456");
250+
mockPlatform.ActiveProcesses.Add(newLockData.PID);
251+
mockPlatform.ProcessStartTimes[newLockData.PID] = 3000;
252+
gvfsLock.TryAcquireLockForExternalRequestor(newLockData, out existingExternalHolder).ShouldBeFalse();
253+
this.ValidateExistingExternalHolder(DefaultLockData, existingExternalHolder);
254+
this.ValidateLockHeld(gvfsLock, DefaultLockData);
255+
256+
mockPlatform.ActiveProcesses.Remove(DefaultLockData.PID);
257+
mockPlatform.ActiveProcesses.Remove(newLockData.PID);
258+
mockPlatform.ProcessStartTimes.Remove(DefaultLockData.PID);
259+
mockPlatform.ProcessStartTimes.Remove(newLockData.PID);
260+
mockTracer.VerifyAll();
261+
}
262+
185263
private GVFSLock AcquireDefaultLock(MockPlatform mockPlatform, ITracer mockTracer)
186264
{
187265
GVFSLock gvfsLock = new GVFSLock(mockTracer);

GVFS/GVFS.UnitTests/Mock/Common/MockPlatform.cs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,14 @@ public override bool SupportsSystemInstallLog
4545

4646
public HashSet<int> ActiveProcesses { get; } = new HashSet<int>();
4747

48+
/// <summary>
49+
/// Optional per-PID start-time overrides used by tests that exercise PID-reuse
50+
/// detection. When a PID is in <see cref="ActiveProcesses"/> but not in this
51+
/// dictionary, <see cref="TryGetActiveProcessStartTime"/> returns the PID itself
52+
/// as a synthetic non-zero start time.
53+
/// </summary>
54+
public Dictionary<int, long> ProcessStartTimes { get; } = new Dictionary<int, long>();
55+
4856
public override void ConfigureVisualStudio(string gitBinPath, ITracer tracer)
4957
{
5058
throw new NotSupportedException();
@@ -138,6 +146,21 @@ public override bool IsProcessActive(int processId)
138146
return this.ActiveProcesses.Contains(processId);
139147
}
140148

149+
public override bool TryGetActiveProcessStartTime(int processId, out long startTime)
150+
{
151+
if (this.ActiveProcesses.Contains(processId))
152+
{
153+
// If the test populated an explicit start time use that, otherwise fall back
154+
// to a deterministic non-zero value so test scenarios that don't care about
155+
// PID identity (the existing majority of unit tests) keep working.
156+
startTime = this.ProcessStartTimes.TryGetValue(processId, out long stored) ? stored : processId;
157+
return true;
158+
}
159+
160+
startTime = 0;
161+
return false;
162+
}
163+
141164
public override void IsServiceInstalledAndRunning(string name, out bool installed, out bool running)
142165
{
143166
throw new NotSupportedException();

0 commit comments

Comments
 (0)