2
0
mirror of https://github.com/esiur/esiur-dotnet.git synced 2026-06-13 14:38:43 +00:00

Deadlock tests

This commit is contained in:
2026-06-03 13:02:56 +03:00
parent 3dc36149b7
commit 2431166f25
25 changed files with 2160 additions and 157 deletions
+255 -11
View File
@@ -57,6 +57,27 @@ partial class EpConnection
KeyList<uint, WeakReference<EpResource>> _attachedResources = new KeyList<uint, WeakReference<EpResource>>();
KeyList<uint, WeakReference<EpResource>> _suspendedResources = new KeyList<uint, WeakReference<EpResource>>();
KeyList<uint, FetchRequestInfo<EpResource, uint>> _resourceRequests = new KeyList<uint, FetchRequestInfo<EpResource, uint>>();
// Wait-for graph for in-flight resource fetches: maps a resource id to the set of in-flight
// child resource ids its attachment is currently blocked on. Used to detect genuine cycles
// (e.g. two concurrent fetches A<->B) so a placeholder can break the deadlock, while
// independent/app-facing fetches of an in-flight resource simply wait for full attachment.
readonly Dictionary<uint, HashSet<uint>> _fetchBlockedOn = new Dictionary<uint, HashSet<uint>>();
/// <summary>
/// Strategy FetchResource uses for an in-flight resource. Defaults to the new wait + cycle
/// detection. Selectable for experimental evaluation (see <see cref="DeadlockResolutionMode"/>).
/// </summary>
public DeadlockResolutionMode DeadlockResolution { get; set; } = DeadlockResolutionMode.WaitWithCycleDetection;
// Per-connection diagnostics (free of the cross-connection contamination that the shared
// Global.Counters suffer from). Used by the deadlock experiments.
/// <summary>Number of resources fully attached on this connection (a monotonic progress signal).</summary>
public long AttachedResourceCount { get; private set; }
/// <summary>Number of wait-for-cycle breaks (placeholders returned to break a cycle) on this connection.</summary>
public long CycleBreakCount { get; private set; }
/// <summary>Number of placeholders returned where no genuine cycle existed (legacy resolver only).</summary>
public long UnnecessaryPlaceholderCount { get; private set; }
//KeyList<ulong, AsyncReply<RemoteTypeDef>> _typeDefsByIdRequests = new KeyList<ulong, AsyncReply<RemoteTypeDef>>();
//KeyList<string, AsyncReply<RemoteTypeDef>> _typeDefsByNameRequests = new KeyList<string, AsyncReply<RemoteTypeDef>>();
@@ -1969,6 +1990,11 @@ partial class EpConnection
req.Then(result =>
{
// The resource is being handed to the application: publish its fully-attached
// graph so that, if any dependency is only partially attached, it stays unpublished.
if (result is EpResource resource)
PublishGraph(resource);
rt.Trigger(result);
}).Error(ex => rt.TriggerError(ex));
@@ -2026,6 +2052,103 @@ partial class EpConnection
/// <returns>DistributedResource</returns>
///
//object fetchResourceLock = new object();
// Records that the attachment of `parent` is now blocked waiting on in-flight child `child`.
void AddFetchBlock(uint parent, uint child)
{
if (!_fetchBlockedOn.TryGetValue(parent, out var set))
_fetchBlockedOn[parent] = set = new HashSet<uint>();
set.Add(child);
}
// Removes a resource from the wait-for graph once it is attached or its fetch failed: it is
// no longer blocked on anything and no longer a pending child of anyone.
void ClearFetchNode(uint id)
{
_fetchBlockedOn.Remove(id);
foreach (var set in _fetchBlockedOn.Values)
set.Remove(id);
}
/// <summary>
/// Returns true if completing the fetch of <paramref name="id"/> by waiting for its in-flight
/// request would deadlock, i.e. the resource is (transitively) blocked on a resource that the
/// current request chain is itself building. In that case the caller should hand back the
/// placeholder to break the cycle instead of waiting.
/// </summary>
internal static bool HasWaitForCycle(uint id, uint[] requestSequence, IReadOnlyDictionary<uint, HashSet<uint>> blockedOn)
{
if (requestSequence == null || requestSequence.Length == 0)
return false;
var chain = new HashSet<uint>(requestSequence);
var visited = new HashSet<uint>();
var stack = new Stack<uint>();
stack.Push(id);
while (stack.Count > 0)
{
var current = stack.Pop();
if (!visited.Add(current))
continue;
if (!blockedOn.TryGetValue(current, out var children))
continue;
foreach (var child in children)
{
// Reaching a node that the current chain is attaching closes the cycle.
if (chain.Contains(child))
return true;
stack.Push(child);
}
}
return false;
}
/// <summary>
/// Publishes a fully-attached object graph to the application: every resource reachable from
/// <paramref name="root"/> is marked <see cref="ResourceStatus.Published"/>, but only if the
/// entire reachable graph is already attached. If any reachable resource is still being
/// attached (e.g. a placeholder handed out to break a cycle), the graph is left unpublished —
/// exactly the partially-attached delivery that the wait-by-default resolver prevents and the
/// legacy resolver does not.
/// </summary>
internal void PublishGraph(EpResource root)
{
if (root == null)
return;
var seen = new HashSet<uint>();
var reachable = new List<EpResource>();
var queue = new Queue<EpResource>();
queue.Enqueue(root);
var fullyAttached = true;
while (queue.Count > 0)
{
var node = queue.Dequeue();
if (node == null || !seen.Add(node.ResourceInstanceId))
continue;
reachable.Add(node);
if (node.Status != ResourceStatus.Attached)
{
fullyAttached = false;
continue; // do not traverse into a not-yet-attached node
}
foreach (var child in node.GetReferencedResources())
queue.Enqueue(child);
}
if (fullyAttached)
foreach (var node in reachable)
node.Publish();
}
public AsyncReply<EpResource> FetchResource(uint id, uint[] requestSequence)
{
//lock (fetchLock)
@@ -2044,27 +2167,64 @@ partial class EpConnection
var requestInfo = _resourceRequests[id];
// The resource that triggered this fetch (the tail of the chain), if any. Used to record
// wait-for edges and to tell graph-internal references from app-facing fetches (no chain).
uint? parent = requestSequence != null && requestSequence.Length > 0
? requestSequence[requestSequence.Length - 1]
: (uint?)null;
if (requestInfo != null)
{
if (resource != null && (requestSequence?.Contains(id) ?? false))
// Same dependency chain (A->B->A): the placeholder is an internal node of the graph
// currently being attached. The application only observes the chain's top-level reply,
// which fires after full attachment, so returning the not-yet-attached placeholder here
// is safe and breaks the reference cycle. NaiveWait skips this so that even same-chain
// cycles deadlock (used to demonstrate the protection is necessary).
if (DeadlockResolution != DeadlockResolutionMode.NaiveWait
&& resource != null && (requestSequence?.Contains(id) ?? false))
{
Global.Counters["EpResourceDeadLockSameChain"]++;
// dead lock avoidance for loop reference.
CycleBreakCount++;
return new AsyncReply<EpResource>(resource);
}
else if (resource != null && requestInfo.RequestSequence.Contains(id))
// Decide whether to break the wait by returning the placeholder:
// - Legacy: hand it to ANY cross-chain requester (over-eager; the bug under study).
// - WaitWithCycleDetection: only on a genuine wait-for cycle.
// - NaiveWait: never — always wait below (deadlocks on cycles).
var breakCycle = resource != null && DeadlockResolution switch
{
DeadlockResolutionMode.LegacyCrossChainPlaceholder => requestInfo.RequestSequence.Contains(id),
DeadlockResolutionMode.WaitWithCycleDetection => HasWaitForCycle(id, requestSequence, _fetchBlockedOn),
_ => false,
};
if (breakCycle)
{
Global.Counters["EpResourceDeadLockCrossChain"]++;
// dead lock avoidance for dependent reference.
CycleBreakCount++;
// Instrumentation: a placeholder handed out where there is no genuine wait-for cycle
// is an unnecessary, partial delivery — the new resolver would have waited for full
// attachment instead. This counts the legacy resolver's over-eager placeholders.
if (DeadlockResolution == DeadlockResolutionMode.LegacyCrossChainPlaceholder
&& !HasWaitForCycle(id, requestSequence, _fetchBlockedOn))
{
Global.Counters["EpResourceUnnecessaryPlaceholder"]++;
UnnecessaryPlaceholderCount++;
}
return new AsyncReply<EpResource>(resource);
}
else
{
Global.Counters["EpResourcePendingCacheHit"]++;
return requestInfo.Reply;
}
// Otherwise an independent or application-facing requester: wait for the in-flight
// attachment to complete fully rather than exposing a partially attached resource.
Global.Counters["EpResourcePendingCacheHit"]++;
if (parent != null)
AddFetchBlock(parent.Value, id);
return requestInfo.Reply;
}
else if (resource != null && !resource.ResourceSuspended)
else if (resource != null && resource.Status != ResourceStatus.Suspended)
{
// @REVIEW: this should never happen
Global.Log("DCON", LogType.Error, "Resource not moved to attached.");
@@ -2077,6 +2237,10 @@ partial class EpConnection
var reply = new AsyncReply<EpResource>();
_resourceRequests.Add(id, new FetchRequestInfo<EpResource, uint>(reply, newSequence));
// This fetch's parent now waits on `id` until it attaches.
if (parent != null)
AddFetchBlock(parent.Value, id);
SendRequest(EpPacketRequest.AttachResource, id)
.Then((result) =>
{
@@ -2113,12 +2277,19 @@ partial class EpConnection
var pvs = results as PropertyValue[];
dr._Attach(pvs);
// Progress signal: a resource has fully attached. Used by tests to
// distinguish a true deadlock (no progress while requests pend) from
// merely slow processing (these counters keep advancing).
Global.Counters["EpResourceAttached"]++;
AttachedResourceCount++;
_resourceRequests.Remove(id);
// move from needed to attached.
_neededResources.Remove(id);
_attachedResources[id] = new WeakReference<EpResource>(dr);
// attached: no longer part of the in-flight wait-for graph.
ClearFetchNode(id);
reply.Trigger(dr);
}).Error(ex => reply.TriggerError(ex));
}).Error(ex => { _resourceRequests.Remove(id); ClearFetchNode(id); reply.TriggerError(ex); });
};
if (typeDef == null)
@@ -2135,6 +2306,9 @@ partial class EpConnection
resource.ResourceDefinition = td;
typeDef = td;
// Register the placeholder before parsing properties so cyclic
// references in the graph can resolve back to this instance.
_neededResources[id] = resource;
Instance.Warehouse.Put(Instance.Link + "/" + id.ToString(), resource)
.Then(initResource)
.Error(ex => reply.TriggerError(ex));
@@ -2160,6 +2334,9 @@ partial class EpConnection
resource.ResourceDefinition = typeDef;
// Register the placeholder before parsing properties so cyclic
// references in the graph can resolve back to this instance.
_neededResources[id] = resource;
Instance.Warehouse.Put(this.Instance.Link + "/" + id.ToString(), resource)
.Then(initResource).Error((ex) => reply.TriggerError(ex));
}
@@ -2171,6 +2348,10 @@ partial class EpConnection
}).Error((ex) =>
{
// Failed to attach: drop the in-flight request and wait-for edges so a
// later retry is not blocked by a stale entry.
_resourceRequests.Remove(id);
ClearFetchNode(id);
reply.TriggerError(ex);
});
@@ -2187,6 +2368,69 @@ partial class EpConnection
/// <param name="id">Resource Id</param>
/// <returns>DistributedResource</returns>
///
/// <summary>
/// Re-attaches an already-known resource after reconnection using its last-known age. The peer
/// returns only the properties modified after <paramref name="age"/> (the delta), which are
/// merged into the existing instance instead of re-fetching everything. Falls back to a full
/// <see cref="FetchResource"/> if there is no prior state to merge into.
/// </summary>
public AsyncReply<EpResource> Reattach(uint id, ulong age, EpResource resource)
{
EpResource attachedResource = null;
_attachedResources[id]?.TryGetTarget(out attachedResource);
if (attachedResource != null)
return new AsyncReply<EpResource>(attachedResource);
var existing = _resourceRequests[id];
if (existing != null)
return existing.Reply;
var reply = new AsyncReply<EpResource>();
var sequence = new uint[] { id };
_resourceRequests.Add(id, new FetchRequestInfo<EpResource, uint>(reply, sequence));
SendRequest(EpPacketRequest.ReattachResource, id, age).Then(result =>
{
if (result == null)
{
_resourceRequests.Remove(id);
reply.TriggerError(new AsyncException(ErrorType.Management,
(ushort)ExceptionCode.ResourceNotFound, "Null response"));
return;
}
// typeId, age, link, hops, delta(index -> PropertyValue)
var args = (object[])result;
var deltaData = (byte[])args[4];
DataDeserializer.PropertyValueMapParserAsync(deltaData, 0, (uint)deltaData.Length, this, sequence)
.Then(delta =>
{
if (!resource._Reattach(delta))
{
// No prior state to merge into — perform a full attach instead.
_resourceRequests.Remove(id);
FetchResource(id, null).Then(r => reply.Trigger(r)).Error(ex => reply.TriggerError(ex));
return;
}
_resourceRequests.Remove(id);
_neededResources.Remove(id);
_attachedResources[id] = new WeakReference<EpResource>(resource);
ClearFetchNode(id);
reply.Trigger(resource);
})
.Error(ex => { _resourceRequests.Remove(id); ClearFetchNode(id); reply.TriggerError(ex); });
}).Error(ex =>
{
_resourceRequests.Remove(id);
ClearFetchNode(id);
reply.TriggerError(ex);
});
return reply;
}
//object fetchResourceLock = new object();
public AsyncReply<RemoteTypeDef> FetchTypeDef(ulong id, ulong[] requestSequence)
{