using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using Esiur.Core;
using Esiur.Misc;
using Esiur.Protocol;
using Esiur.Resource;
using Xunit.Abstractions;
namespace Esiur.Tests.Unit.Integration;
///
/// Answers the methodological questions a deadlock-prevention experiment must address:
/// (a) the timeout / detection thresholds, justified against the measured completion-time
/// distribution;
/// (b) how a deadlock is detected as distinct from slow processing — via a progress (stall)
/// detector, validated by a NaiveWait resolver that genuinely deadlocks on cycles;
/// (c) that circular dependencies are actually present in the (randomly generated) request pool —
/// counted by static cycle detection (DFS) and by the resolver's cycle-break operations.
///
[Collection("Integration")]
public class DeadlockDetectionTests
{
readonly ITestOutputHelper _out;
public DeadlockDetectionTests(ITestOutputHelper output) => _out = output;
// ---- detection thresholds (reported in the paper) --------------------------------------
// A run is a DEADLOCK if no resource attaches for StallMs while fetches are still pending; it is
// SLOW (not deadlock) if it is still making progress at HardTimeoutMs. StallMs is ~3 orders of
// magnitude above the observed completion time, so a stall is unambiguous.
const int StallMs = 1500;
const int HardTimeoutMs = 15000;
const int PollMs = 25;
enum Outcome { Completed, Deadlocked, SlowTimeout, Faulted }
static long Counter(string name) => Global.Counters.Contains(name) ? Global.Counters[name] : 0;
static async Task StartGraph(int nodes, IEnumerable<(int from, int to)> edges, DeadlockResolutionMode mode)
{
var edgeList = edges.ToArray();
var cluster = await IntegrationCluster.StartAsync(async wh =>
{
var ns = new Node[nodes];
for (var i = 0; i < nodes; i++) { ns[i] = new Node { Id = i }; await wh.Put($"sys/n{i}", ns[i]); }
foreach (var grp in edgeList.GroupBy(e => e.from))
ns[grp.Key].Links = grp.Select(e => ns[e.to]).ToArray();
});
cluster.Connection.DeadlockResolution = mode;
return cluster;
}
// Fires fetches for all roots and classifies the run using the progress (stall) detector.
// Uses per-connection counters (each run has a fresh connection) so progress and cycle-break
// measurements are free of cross-connection contamination from the shared Global.Counters.
async Task<(Outcome outcome, double ms, long cycleBreaks)> Classify(IntegrationCluster cluster, int[] roots)
{
var connection = cluster.Connection;
var tasks = roots.Select(r =>
{
var tcs = new TaskCompletionSource();
connection.Get($"sys/n{r}")
.Then(_ => tcs.TrySetResult(true))
.Error(ex => tcs.TrySetException((Exception)ex));
return tcs.Task;
}).ToArray();
var all = Task.WhenAll(tasks);
var sw = Stopwatch.StartNew();
var lastProgress = connection.AttachedResourceCount;
var lastProgressMs = 0.0;
while (true)
{
await Task.WhenAny(all, Task.Delay(PollMs));
if (all.IsCompletedSuccessfully)
{
sw.Stop();
return (Outcome.Completed, sw.Elapsed.TotalMilliseconds, connection.CycleBreakCount);
}
if (all.IsFaulted)
{
sw.Stop();
return (Outcome.Faulted, sw.Elapsed.TotalMilliseconds, 0);
}
var progress = connection.AttachedResourceCount;
if (progress != lastProgress) { lastProgress = progress; lastProgressMs = sw.Elapsed.TotalMilliseconds; }
var sinceProgress = sw.Elapsed.TotalMilliseconds - lastProgressMs;
if (sinceProgress >= StallMs) // pending, but no resource attached for the stall window
{
sw.Stop();
return (Outcome.Deadlocked, sw.Elapsed.TotalMilliseconds, 0);
}
if (sw.Elapsed.TotalMilliseconds >= HardTimeoutMs) // still progressing but not done
{
sw.Stop();
return (Outcome.SlowTimeout, sw.Elapsed.TotalMilliseconds, 0);
}
}
}
// ---- (b) deadlock is real and detectable, distinct from slow ----------------------------
public static IEnumerable