Skip to content

Commit 37dd08b

Browse files
tyrielvCopilot
andcommitted
Fix PID-recycling race in orphan-lock detection
GVFS's orphan-lock check (GVFSLock.LockHolder.GetExternalHolder) only asked the OS "is there a process at this PID?" via IsProcessActive. If the original lock-holder exited and Windows recycled its PID to an unrelated process before the next git command arrived, GVFS saw a live process at that PID and concluded the lock was still held. The git command then either waited the full 300s timeout or printed a spurious "Waiting for '<holder>' to release the lock" message. This is a real product bug, not just a test issue. It surfaced as flakiness in the OrphanedGVFSLockIsCleanedUp functional test on busy CI agents (two failure modes: 300s timeout, and assertion failure with non-empty stderr), but the same race can affect users when a PID is quickly reassigned by the OS scheduler. The fix is process-identity tracking. When the lock is granted to an external requestor, capture that process's creation timestamp (GetProcessTimes, an opaque 100-ns FILETIME value) alongside its PID. On every orphan check, look up the current creation timestamp at that PID and compare. If the process is gone the lookup fails; if a different process is now at the PID the timestamps differ. Either way the lock is correctly recognized as orphaned. Implementation notes: * New abstract GVFSPlatform.TryGetActiveProcessStartTime returns the raw 64-bit creation time. It is documented as identity-only; the value is never decoded as a date. * The Windows implementation combines GetExitCodeProcess (STILL_ACTIVE check) and GetProcessTimes. GetProcessTimes alone is not sufficient because it still returns a creation time for terminated processes whose kernel object is kept alive by an outstanding handle elsewhere. * Acquisition does not fail if start-time capture fails. The existing IsProcessActiveImplementation has a Process.GetProcessById fallback for cross-integrity callers that OpenProcess(QueryLimitedInformation) cannot open, and we preserve the same compatibility surface here. A HasStartTime flag is stored alongside the PID; when it is false the orphan check falls back to the legacy IsProcessActive call. The fallback is recorded in telemetry (StartTimeUnavailable=true on the TryAcquireLockExternal event) so we can see if it ever becomes common in the field. * The ExternalLockHolderExited telemetry event now carries an ExternalHolderTerminationReason field (ProcessNotActive | PidRecycled | Unknown) to make future post-mortems straightforward. * The OrphanedGVFSLockIsCleanedUp functional test is unchanged. It polls Process.GetProcessById to detect when the lock holder has exited, then runs git status. With this product fix, git status succeeds cleanly even if the PID has already been recycled to an unrelated process by the time mount processes the request, so the test passes naturally without any test-side workaround. * Two new unit tests cover the new identity-check path: TryAcquireLockForExternalRequestor_WhenHolderPidRecycled (verifies that an orphan is detected when start times differ at the same PID) and TryAcquireLockForExternalRequestor_WhenHolderStartTimeMatches (the inverse: a still-alive holder is correctly seen as active and a new acquisition is denied). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent c53aea6 commit 37dd08b

7 files changed

Lines changed: 231 additions & 10 deletions

File tree

GVFS/GVFS.Common/GVFSLock.cs

Lines changed: 62 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,18 @@ public bool TryAcquireLockForExternalRequestor(
4141

4242
existingExternalHolder = null;
4343

44+
// Capture the requestor's process start time so we can later distinguish the
45+
// genuine holder from an unrelated process that happens to be reusing the same
46+
// PID after the holder exits. If we cannot read the start time (e.g. permission
47+
// failure on OpenProcess for a different-integrity caller) we still accept the
48+
// lock and fall back to the legacy PID-only orphan check; record the fallback in
49+
// telemetry so we can spot if it becomes common.
50+
bool hasStartTime = GVFSPlatform.Instance.TryGetActiveProcessStartTime(requestor.PID, out long requestorStartTime);
51+
if (!hasStartTime)
52+
{
53+
metadata.Add("StartTimeUnavailable", true);
54+
}
55+
4456
try
4557
{
4658
lock (this.acquisitionLock)
@@ -65,7 +77,7 @@ public bool TryAcquireLockForExternalRequestor(
6577
metadata.Add("Result", "Accepted");
6678
eventLevel = EventLevel.Informational;
6779

68-
this.currentLockHolder.AcquireForExternalRequestor(requestor);
80+
this.currentLockHolder.AcquireForExternalRequestor(requestor, hasStartTime, requestorStartTime);
6981
this.Stats = new ActiveGitCommandStats();
7082

7183
return true;
@@ -190,12 +202,14 @@ private bool IsLockAvailable(bool checkExternalHolderOnly, out NamedPipeMessages
190202
}
191203

192204
bool externalHolderTerminatedWithoutReleasingLock;
205+
string terminationReason;
193206
existingExternalHolder = this.currentLockHolder.GetExternalHolder(
194-
out externalHolderTerminatedWithoutReleasingLock);
207+
out externalHolderTerminatedWithoutReleasingLock,
208+
out terminationReason);
195209

196210
if (externalHolderTerminatedWithoutReleasingLock)
197211
{
198-
this.ReleaseLockForTerminatedProcess(existingExternalHolder.PID);
212+
this.ReleaseLockForTerminatedProcess(existingExternalHolder.PID, terminationReason);
199213
this.tracer.SetGitCommandSessionId(string.Empty);
200214
existingExternalHolder = null;
201215
}
@@ -204,11 +218,11 @@ private bool IsLockAvailable(bool checkExternalHolderOnly, out NamedPipeMessages
204218
}
205219
}
206220

207-
private bool ReleaseExternalLock(int pid, string eventName)
221+
private bool ReleaseExternalLock(int pid, string eventName, EventMetadata extraMetadata = null)
208222
{
209223
lock (this.acquisitionLock)
210224
{
211-
EventMetadata metadata = new EventMetadata();
225+
EventMetadata metadata = extraMetadata ?? new EventMetadata();
212226

213227
try
214228
{
@@ -251,9 +265,11 @@ private bool ReleaseExternalLock(int pid, string eventName)
251265
}
252266
}
253267

254-
private void ReleaseLockForTerminatedProcess(int pid)
268+
private void ReleaseLockForTerminatedProcess(int pid, string terminationReason)
255269
{
256-
this.ReleaseExternalLock(pid, "ExternalLockHolderExited");
270+
EventMetadata metadata = new EventMetadata();
271+
metadata.Add("ExternalHolderTerminationReason", terminationReason ?? "Unknown");
272+
this.ReleaseExternalLock(pid, "ExternalLockHolderExited", metadata);
257273
}
258274

259275
// The lock release event is a convenient place to record stats about things that happened while a git command was running,
@@ -383,6 +399,8 @@ public void AddStatsToTelemetry(EventMetadata metadata)
383399
private class LockHolder
384400
{
385401
private NamedPipeMessages.LockData externalLockHolder;
402+
private bool hasExternalLockHolderStartTime;
403+
private long externalLockHolderStartTime;
386404

387405
public bool IsFree
388406
{
@@ -404,7 +422,7 @@ public void AcquireForGVFS()
404422
this.IsGVFS = true;
405423
}
406424

407-
public void AcquireForExternalRequestor(NamedPipeMessages.LockData externalLockHolder)
425+
public void AcquireForExternalRequestor(NamedPipeMessages.LockData externalLockHolder, bool hasStartTime, long startTime)
408426
{
409427
if (this.IsGVFS ||
410428
this.externalLockHolder != null)
@@ -413,27 +431,61 @@ public void AcquireForExternalRequestor(NamedPipeMessages.LockData externalLockH
413431
}
414432

415433
this.externalLockHolder = externalLockHolder;
434+
this.hasExternalLockHolderStartTime = hasStartTime;
435+
this.externalLockHolderStartTime = startTime;
416436
}
417437

418438
public void Release()
419439
{
420440
this.IsGVFS = false;
421441
this.externalLockHolder = null;
442+
this.hasExternalLockHolderStartTime = false;
443+
this.externalLockHolderStartTime = 0;
422444
}
423445

424446
public NamedPipeMessages.LockData GetExternalHolder()
425447
{
426448
return this.externalLockHolder;
427449
}
428450

429-
public NamedPipeMessages.LockData GetExternalHolder(out bool externalHolderTerminatedWithoutReleasingLock)
451+
public NamedPipeMessages.LockData GetExternalHolder(out bool externalHolderTerminatedWithoutReleasingLock, out string terminationReason)
430452
{
431453
externalHolderTerminatedWithoutReleasingLock = false;
454+
terminationReason = null;
432455

433456
if (this.externalLockHolder != null)
434457
{
435458
int pid = this.externalLockHolder.PID;
436-
externalHolderTerminatedWithoutReleasingLock = !GVFSPlatform.Instance.IsProcessActive(pid);
459+
460+
if (this.hasExternalLockHolderStartTime)
461+
{
462+
// Identity check: confirm the same process still owns this PID by comparing
463+
// the OS-supplied process start time we captured at acquisition with the
464+
// current one. A mismatch means the original holder exited and Windows
465+
// recycled the PID to a different process (the bug this code fixes).
466+
if (!GVFSPlatform.Instance.TryGetActiveProcessStartTime(pid, out long currentStartTime))
467+
{
468+
externalHolderTerminatedWithoutReleasingLock = true;
469+
terminationReason = "ProcessNotActive";
470+
}
471+
else if (currentStartTime != this.externalLockHolderStartTime)
472+
{
473+
externalHolderTerminatedWithoutReleasingLock = true;
474+
terminationReason = "PidRecycled";
475+
}
476+
}
477+
else
478+
{
479+
// Fallback for the rare case where we could not capture a start time at
480+
// acquisition time (e.g. cross-integrity OpenProcess denial). Use the
481+
// legacy PID-only liveness check, which is vulnerable to PID recycling
482+
// but matches pre-fix behavior.
483+
if (!GVFSPlatform.Instance.IsProcessActive(pid))
484+
{
485+
externalHolderTerminatedWithoutReleasingLock = true;
486+
terminationReason = "ProcessNotActive";
487+
}
488+
}
437489
}
438490

439491
return this.externalLockHolder;

GVFS/GVFS.Common/GVFSPlatform.cs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,19 @@ public static void Register(GVFSPlatform platform)
6666
public abstract void PrepareProcessToRunInBackground();
6767

6868
public abstract bool IsProcessActive(int processId);
69+
70+
/// <summary>
71+
/// Returns true and writes an opaque, OS-supplied process-identity token (e.g. process
72+
/// creation time on Windows) when the process with the given PID is currently active.
73+
/// The token has no meaning beyond identity comparison: two calls for the same underlying
74+
/// process yield equal tokens, and a call after the OS has recycled the PID to a different
75+
/// process yields a different token. Returns false (and startTime = 0) if the process
76+
/// is no longer running, or if it could not be identified for any reason (e.g. permission
77+
/// failure). Callers must treat a false return as "no identity information available" and
78+
/// fall back to <see cref="IsProcessActive(int)"/> if they still need a liveness check.
79+
/// </summary>
80+
public abstract bool TryGetActiveProcessStartTime(int processId, out long startTime);
81+
6982
public abstract void IsServiceInstalledAndRunning(string name, out bool installed, out bool running);
7083
public abstract string GetNamedPipeName(string enlistmentRoot);
7184
public abstract string GetGVFSServiceNamedPipeName(string serviceName);

GVFS/GVFS.Common/NativeMethods.Shared.cs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,18 @@ public static extern SafeFileHandle OpenProcess(
158158
[return: MarshalAs(UnmanagedType.Bool)]
159159
public static extern bool GetExitCodeProcess(SafeFileHandle hProcess, out uint lpExitCode);
160160

161+
// GetProcessTimes writes four FILETIME values (each two DWORDs / 8 bytes).
162+
// We marshal each as `out long` because we only ever compare the raw 64-bit value
163+
// (creation time) for identity purposes; we never decode it as a date.
164+
[DllImport("kernel32.dll", SetLastError = true)]
165+
[return: MarshalAs(UnmanagedType.Bool)]
166+
public static extern bool GetProcessTimes(
167+
SafeFileHandle hProcess,
168+
out long lpCreationTime,
169+
out long lpExitTime,
170+
out long lpKernelTime,
171+
out long lpUserTime);
172+
161173
[DllImport("kernel32.dll", SetLastError = true, CharSet = CharSet.Unicode)]
162174
private static extern SafeFileHandle CreateFile(
163175
[In] string lpFileName,

GVFS/GVFS.Platform.Windows/WindowsPlatform.Shared.cs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,44 @@ public static bool IsProcessActiveImplementation(int processId, bool tryGetProce
7070
}
7171
}
7272

73+
/// <summary>
74+
/// Returns true if a process with the given PID is currently active AND writes its
75+
/// creation timestamp (raw FILETIME, 100-ns ticks since 1601) to <paramref name="startTime"/>.
76+
/// The returned value is intended for identity comparison only -- two calls for the
77+
/// same underlying process (no PID reuse) always yield equal values; if the OS recycles
78+
/// a PID to a new process the value will differ. Returns false (and startTime = 0)
79+
/// if the process is gone, has terminated (but its kernel object lingers due to an
80+
/// outstanding handle elsewhere), or cannot be opened for QueryLimitedInformation.
81+
/// </summary>
82+
public static bool TryGetActiveProcessStartTimeImplementation(int processId, out long startTime)
83+
{
84+
startTime = 0;
85+
86+
using (SafeFileHandle process = NativeMethods.OpenProcess(NativeMethods.ProcessAccessFlags.QueryLimitedInformation, false, processId))
87+
{
88+
if (process.IsInvalid)
89+
{
90+
return false;
91+
}
92+
93+
// GetProcessTimes succeeds for terminated processes whose kernel object still
94+
// exists (e.g., an outstanding handle elsewhere). Confirm the process is still
95+
// running before trusting the creation time as an identity marker.
96+
if (!NativeMethods.GetExitCodeProcess(process, out uint exitCode) || exitCode != StillActive)
97+
{
98+
return false;
99+
}
100+
101+
if (!NativeMethods.GetProcessTimes(process, out long creationTime, out _, out _, out _))
102+
{
103+
return false;
104+
}
105+
106+
startTime = creationTime;
107+
return true;
108+
}
109+
}
110+
73111
public static string GetNamedPipeNameImplementation(string enlistmentRoot)
74112
{
75113
return "GVFS_" + enlistmentRoot.ToUpper().Replace(':', '_');

GVFS/GVFS.Platform.Windows/WindowsPlatform.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,11 @@ public override bool IsProcessActive(int processId)
205205
return WindowsPlatform.IsProcessActiveImplementation(processId, tryGetProcessById: true);
206206
}
207207

208+
public override bool TryGetActiveProcessStartTime(int processId, out long startTime)
209+
{
210+
return WindowsPlatform.TryGetActiveProcessStartTimeImplementation(processId, out startTime);
211+
}
212+
208213
public override void IsServiceInstalledAndRunning(string name, out bool installed, out bool running)
209214
{
210215
ServiceController service = ServiceController.GetServices().FirstOrDefault(s => s.ServiceName.Equals(name, StringComparison.Ordinal));

GVFS/GVFS.UnitTests/Common/GVFSLockTests.cs

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,84 @@ public void TryAcquireLockForExternalRequestor_WhenExternalHolderTerminated()
182182
mockTracer.VerifyAll();
183183
}
184184

185+
/// <summary>
186+
/// Regression test for OrphanedGVFSLockIsCleanedUp flake: simulates the original lock holder
187+
/// exiting and the OS recycling its PID to a different, unrelated process before the orphan
188+
/// check fires. Identity is preserved via process start time, so the orphan must be detected
189+
/// even though the (now-different) process at the holder's PID still appears active.
190+
/// </summary>
191+
[TestCase]
192+
public void TryAcquireLockForExternalRequestor_WhenHolderPidRecycled()
193+
{
194+
Mock<ITracer> mockTracer = new Mock<ITracer>(MockBehavior.Strict);
195+
mockTracer.Setup(x => x.RelatedEvent(EventLevel.Informational, "TryAcquireLockExternal", It.IsAny<EventMetadata>()));
196+
mockTracer.Setup(x => x.RelatedEvent(EventLevel.Informational, "ExternalLockHolderExited", It.IsAny<EventMetadata>(), Keywords.Telemetry));
197+
mockTracer.Setup(x => x.SetGitCommandSessionId(string.Empty));
198+
MockPlatform mockPlatform = (MockPlatform)GVFSPlatform.Instance;
199+
200+
// Acquire as DefaultLockData.PID with an explicit start time so the new identity-check
201+
// path (not the legacy fallback) runs.
202+
mockPlatform.ActiveProcesses.Add(DefaultLockData.PID);
203+
mockPlatform.ProcessStartTimes[DefaultLockData.PID] = 1000;
204+
GVFSLock gvfsLock = new GVFSLock(mockTracer.Object);
205+
NamedPipeMessages.LockData existingExternalHolder;
206+
gvfsLock.TryAcquireLockForExternalRequestor(DefaultLockData, out existingExternalHolder).ShouldBeTrue();
207+
existingExternalHolder.ShouldBeNull();
208+
this.ValidateLockHeld(gvfsLock, DefaultLockData);
209+
210+
// Simulate PID recycling: the PID is still reported active (a different process now owns it),
211+
// but the start time differs from the one we captured at acquisition.
212+
mockPlatform.ProcessStartTimes[DefaultLockData.PID] = 2000;
213+
214+
NamedPipeMessages.LockData newLockData = new NamedPipeMessages.LockData(4321, false, false, "git new", "456");
215+
mockPlatform.ActiveProcesses.Add(newLockData.PID);
216+
mockPlatform.ProcessStartTimes[newLockData.PID] = 3000;
217+
gvfsLock.TryAcquireLockForExternalRequestor(newLockData, out existingExternalHolder).ShouldBeTrue();
218+
existingExternalHolder.ShouldBeNull();
219+
this.ValidateLockHeld(gvfsLock, newLockData);
220+
221+
mockPlatform.ActiveProcesses.Remove(DefaultLockData.PID);
222+
mockPlatform.ActiveProcesses.Remove(newLockData.PID);
223+
mockPlatform.ProcessStartTimes.Remove(DefaultLockData.PID);
224+
mockPlatform.ProcessStartTimes.Remove(newLockData.PID);
225+
mockTracer.VerifyAll();
226+
}
227+
228+
/// <summary>
229+
/// Inverse of the PID-recycle test: when start time still matches, the original holder
230+
/// must still be considered the active holder and a new acquisition must be denied.
231+
/// Guards against an over-eager orphan detector that would release any lock on every check.
232+
/// </summary>
233+
[TestCase]
234+
public void TryAcquireLockForExternalRequestor_WhenHolderStartTimeMatches()
235+
{
236+
Mock<ITracer> mockTracer = new Mock<ITracer>(MockBehavior.Strict);
237+
mockTracer.Setup(x => x.RelatedEvent(EventLevel.Informational, "TryAcquireLockExternal", It.IsAny<EventMetadata>()));
238+
mockTracer.Setup(x => x.RelatedEvent(EventLevel.Verbose, "TryAcquireLockExternal", It.IsAny<EventMetadata>()));
239+
MockPlatform mockPlatform = (MockPlatform)GVFSPlatform.Instance;
240+
241+
mockPlatform.ActiveProcesses.Add(DefaultLockData.PID);
242+
mockPlatform.ProcessStartTimes[DefaultLockData.PID] = 1000;
243+
GVFSLock gvfsLock = new GVFSLock(mockTracer.Object);
244+
NamedPipeMessages.LockData existingExternalHolder;
245+
gvfsLock.TryAcquireLockForExternalRequestor(DefaultLockData, out existingExternalHolder).ShouldBeTrue();
246+
this.ValidateLockHeld(gvfsLock, DefaultLockData);
247+
248+
// Start time is unchanged -- holder is genuinely still alive; new acquire must be denied.
249+
NamedPipeMessages.LockData newLockData = new NamedPipeMessages.LockData(4321, false, false, "git new", "456");
250+
mockPlatform.ActiveProcesses.Add(newLockData.PID);
251+
mockPlatform.ProcessStartTimes[newLockData.PID] = 3000;
252+
gvfsLock.TryAcquireLockForExternalRequestor(newLockData, out existingExternalHolder).ShouldBeFalse();
253+
this.ValidateExistingExternalHolder(DefaultLockData, existingExternalHolder);
254+
this.ValidateLockHeld(gvfsLock, DefaultLockData);
255+
256+
mockPlatform.ActiveProcesses.Remove(DefaultLockData.PID);
257+
mockPlatform.ActiveProcesses.Remove(newLockData.PID);
258+
mockPlatform.ProcessStartTimes.Remove(DefaultLockData.PID);
259+
mockPlatform.ProcessStartTimes.Remove(newLockData.PID);
260+
mockTracer.VerifyAll();
261+
}
262+
185263
private GVFSLock AcquireDefaultLock(MockPlatform mockPlatform, ITracer mockTracer)
186264
{
187265
GVFSLock gvfsLock = new GVFSLock(mockTracer);

0 commit comments

Comments
 (0)