CSH medoid
27. 8. 2020 #kód
D version of primordial python implemntation of Ultra Fast Medoid Identification via Correlated Sequential Halving.
ulong medoidIndex(alias DistFunc, T, alias less = (a, b) => a < b)(T[] data, double armBudget) { assert(!data.empty, "must not be empty"); const n = data.length; const budget = armBudget * n; const numRounds = log2(n).to!ulong + 1; auto pullCount = 0; // number of times each arm is pulled auto estimates = new double[n]; estimates[] = 0.0; auto candidates = iota(0, n).array; double mean(R)(R r) { return (r.length == 0) ? 0.0 : r.sum / double(r.length); } /* Pulls the given arms pullsPerArm times. Updates the estimates */ auto pullArm(ulong[] arms, ulong pullsPerArm) { auto others = randomSample(iota(0, n), pullsPerArm).array; foreach (a; arms) { auto meanDist = mean(others.map!(o => DistFunc(data[a], data[o]))); estimates[a] = (estimates[a]*pullCount + meanDist*pullsPerArm) / double(pullCount + pullsPerArm); if (pullsPerArm == n) { estimates[a] = meanDist; } assert(!isNaN(meanDist) && !isNaN(estimates[a])); } pullCount += pullsPerArm; } while (candidates.length > 1) { auto pullsPerArm = clamp((budget / candidates.length / numRounds).to!ulong, 1, n); pullArm(candidates, pullsPerArm); auto lst = indexed(estimates, candidates); if (pullsPerArm == n) { // calculated exactly candidates = [candidates[lst.minIndex]]; break; } auto locs = new ulong[lst.length/2]; topNIndex!less(lst, locs); auto median = estimates[candidates[locs[$-1]]]; candidates = indexed(candidates, locs).array; } assert(candidates.length == 1); return candidates[0]; }