1 module dgraph.metric; 2 3 import std.algorithm, std.conv, std.range, std.traits; 4 5 import dgraph.graph; 6 7 /** 8 * Simple queue implementation for internal use. This will probably be removed 9 * once Phobos has an effective queue container. 10 * 11 * This is derived from Bearophile's $(LINK2 http://rosettacode.org/wiki/Queue/Usage#Faster_Version, 12 * RosettaCode example of a circular queue), which he has kindly agreed to allow 13 * to be $(LINK2 http://forum.dlang.org/post/mvmzvkjpbhazezlsydim@forum.dlang.org, 14 * used under the terms of the Boost licence). 15 */ 16 struct VertexQueue 17 { 18 private size_t length_, maxLength, head, tail; 19 private size_t[] queue; 20 21 this(size_t m) 22 { 23 maxLength = m; 24 queue = new size_t[maxLength]; 25 } 26 27 bool empty() @property const pure nothrow 28 { 29 return (length_ == 0); 30 } 31 32 size_t length() @property const pure nothrow 33 { 34 return length_; 35 } 36 37 void push(immutable size_t v) nothrow 38 { 39 assert(v < maxLength, "Vertex ID is too large!"); 40 queue[tail] = v; 41 tail = (tail + 1) % maxLength; 42 ++length_; 43 assert(length_ <= maxLength, "Length has exceeded total number of vertices!"); 44 } 45 46 auto front() @property const pure 47 { 48 assert(!this.empty, "Node queue is empty!"); 49 return queue[head]; 50 } 51 52 void pop() nothrow 53 { 54 head = (head + 1) % maxLength; 55 --length_; 56 } 57 } 58 59 unittest 60 { 61 auto q = VertexQueue(8); 62 q.push(3); 63 q.push(7); 64 q.push(4); 65 assert(q.front == 3); 66 q.pop(); 67 assert(q.front == 7); 68 q.pop(); 69 assert(q.front == 4); 70 q.push(5); 71 assert(q.front == 4); 72 q.pop(); 73 assert(q.front == 5); 74 q.pop(); 75 assert(q.empty); 76 q.push(2); 77 assert(q.front == 2); 78 q.pop(); 79 assert(q.empty); 80 } 81 82 /** 83 * Calculate betweenness centrality of vertices in a graph, using the algorithm 84 * developed by Ulrik Brandes (2001) J. Math. Sociol. 25(2): 163-177. 85 * 86 * The optional function parameter ignore allows the user to indicate 87 * individual vertices to ignore in the calculation. 88 */ 89 auto ref betweenness(T = double, Graph)(ref Graph g, bool[] ignore = null) 90 if (isFloatingPoint!T && isGraph!Graph) 91 { 92 T[] centrality = new T[g.vertexCount]; 93 return betweenness!(T, Graph)(g, centrality, ignore); 94 } 95 96 auto ref betweenness(T = double, Graph)(ref Graph g, ref T[] centrality, bool[] ignore = null) 97 { 98 centrality.length = g.vertexCount; 99 centrality[] = to!T(0); 100 size_t[] stack = new size_t[g.vertexCount]; 101 T[] sigma = new T[g.vertexCount]; 102 T[] delta = new T[g.vertexCount]; 103 long[] d = new long[g.vertexCount]; 104 auto q = VertexQueue(g.vertexCount); 105 Appender!(size_t[])[] p = new Appender!(size_t[])[g.vertexCount]; 106 107 sigma[] = to!T(0); 108 delta[] = to!T(0); 109 d[] = -1L; 110 111 foreach (immutable s; 0 .. g.vertexCount) 112 { 113 if (ignore && ignore[s]) 114 { 115 continue; 116 } 117 118 size_t stackLength = 0; 119 assert(q.empty); 120 sigma[s] = to!T(1); 121 d[s] = 0L; 122 q.push(s); 123 124 while (!q.empty) 125 { 126 size_t v = q.front; 127 q.pop(); 128 stack[stackLength] = v; 129 ++stackLength; 130 foreach (immutable w; g.neighboursOut(v)) 131 { 132 if (ignore && ignore[w]) 133 { 134 continue; 135 } 136 137 if (d[w] < 0L) 138 { 139 q.push(w); 140 d[w] = d[v] + 1L; 141 assert(sigma[w] == to!T(0)); 142 sigma[w] = sigma[v]; 143 p[w].clear; 144 p[w].put(v); 145 } 146 else if (d[w] > (d[v] + 1L)) 147 { 148 /* w has already been encountered, but we've 149 found a shorter path to the source. This 150 is probably only relevant to the weighted 151 case, but let's keep it here in order to 152 be ready for that update. */ 153 d[w] = d[v] + 1L; 154 sigma[w] = sigma[v]; 155 p[w].clear; 156 p[w].put(v); 157 } 158 else if (d[w] == (d[v] + 1L)) 159 { 160 sigma[w] += sigma[v]; 161 p[w].put(v); 162 } 163 } 164 } 165 166 while (stackLength > to!size_t(0)) 167 { 168 --stackLength; 169 auto w = stack[stackLength]; 170 foreach (immutable v; p[w].data) 171 { 172 delta[v] += ((sigma[v] / sigma[w]) * (to!T(1) + delta[w])); 173 } 174 if (w != s) 175 { 176 centrality[w] += delta[w]; 177 } 178 sigma[w] = to!T(0); 179 delta[w] = to!T(0); 180 d[w] = -1L; 181 } 182 } 183 184 static if (!g.directed) 185 { 186 centrality[] /= 2; 187 } 188 189 return centrality; 190 } 191 192 /** 193 * Calculate the size of the largest connected cluster in the graph. 194 * 195 * The function parameter ignore allows the user to specify individual 196 * vertices to ignore for the purposes of the calculation. 197 * 198 * This algorithm is a rather ad-hoc construction inspired by Brandes' 199 * algorithm for betweenness centrality. No claims are made for its 200 * performance or even correctness. 201 */ 202 size_t largestClusterSize(Graph)(ref Graph g, bool[] ignore = null) 203 if (isGraph!Graph) 204 { 205 long[] cluster = new long[g.vertexCount]; 206 cluster[] = -1L; 207 auto q = VertexQueue(g.vertexCount); 208 size_t largestCluster = 0; 209 210 foreach (immutable s; 0 .. g.vertexCount) 211 { 212 if (ignore && ignore[s]) 213 { 214 continue; 215 } 216 else if (cluster[s] < 0) 217 { 218 assert(q.empty); 219 cluster[s] = 0; 220 q.push(s); 221 size_t clusterSize = 1; 222 223 while (!q.empty) 224 { 225 size_t v = q.front; 226 q.pop(); 227 228 static if (g.directed) 229 { 230 auto allNeighbours = chain(g.neighboursIn(v), g.neighboursOut(v)); 231 } 232 else 233 { 234 auto allNeighbours = g.neighbours(v); 235 } 236 237 foreach (immutable w; allNeighbours) 238 { 239 if (ignore && ignore[w]) 240 { 241 continue; 242 } 243 else if (cluster[w] < 0) 244 { 245 q.push(w); 246 cluster[w] = clusterSize; 247 ++clusterSize; 248 } 249 } 250 } 251 252 largestCluster = max(largestCluster, clusterSize); 253 } 254 } 255 256 return largestCluster; 257 } 258 259 unittest 260 { 261 import std.stdio, std.typecons; 262 263 void clusterTest1(Graph)() 264 if (isGraph!Graph) 265 { 266 static if (is(Graph == class)) 267 { 268 auto g = new Graph; 269 } 270 else 271 { 272 auto g = Graph; 273 } 274 bool[] ignore = new bool[5]; 275 g.vertexCount = 5; 276 g.addEdge(0, 1); 277 g.addEdge(1, 2); 278 g.addEdge(3, 4); 279 size_t largest = largestClusterSize(g); 280 writeln("largest cluster size = ", largest); 281 assert(largestClusterSize(g) == largestClusterSize(g, ignore)); 282 283 ignore[0 .. 2] = true; 284 largest = largestClusterSize(g, ignore); 285 writeln("largest cluster size = ", largest); 286 } 287 288 void clusterTest2(Graph)() 289 if (isGraph!Graph) 290 { 291 static if (is(Graph == class)) 292 { 293 auto g = new Graph; 294 } 295 else 296 { 297 auto g = Graph; 298 } 299 bool[] ignore = new bool[100]; 300 g.vertexCount = 100; 301 foreach (immutable i; 0 .. 100) 302 { 303 foreach (immutable j; i .. 100) 304 { 305 g.addEdge(i, j); 306 } 307 } 308 writeln("largest cluster size = ", largestClusterSize(g)); 309 assert(largestClusterSize(g) == largestClusterSize(g, ignore)); 310 foreach (immutable i; 0 .. 100) 311 { 312 ignore[i] = true; 313 writeln(i, ": largest cluster size = ", largestClusterSize(g, ignore)); 314 } 315 } 316 317 void clusterTest3(Graph)(immutable size_t n) 318 if (isGraph!Graph) 319 { 320 static if (is(Graph == class)) 321 { 322 auto g = new Graph; 323 } 324 else 325 { 326 auto g = Graph; 327 } 328 bool[] ignore = new bool[n]; 329 g.vertexCount = n; 330 foreach (immutable i; 0 .. n - 1) 331 { 332 g.addEdge(i, i + 1); 333 } 334 writeln("largest cluster size = ", largestClusterSize(g)); 335 assert(largestClusterSize(g) == largestClusterSize(g, ignore)); 336 ignore[n / 4] = true; 337 writeln("largest cluster size = ", largestClusterSize(g, ignore)); 338 } 339 340 void clusterTest50(Graph)() 341 if (isGraph!Graph) 342 { 343 import std.random; 344 import dgraph.test.samplegraph50; 345 346 static if (is(Graph == class)) 347 { 348 auto g = new Graph; 349 } 350 else 351 { 352 auto g = Graph; 353 } 354 bool[] ignore = new bool[50]; 355 g.vertexCount = 50; 356 g.addEdge(sampleGraph50); 357 writeln("[[50]] largest cluster size = ", largestClusterSize(g, ignore)); 358 359 foreach (immutable i; 0 .. 10) 360 { 361 auto sample1 = randomSample(iota(50), 7, Random(100 * i * i)); 362 ignore[] = false; 363 foreach (immutable s; sample1) 364 { 365 ignore[s] = true; 366 } 367 writeln("[[50.", i, "]] largest cluster size = ", largestClusterSize(g, ignore)); 368 } 369 370 } 371 372 clusterTest1!(CachedEdgeList!false)(); 373 clusterTest2!(CachedEdgeList!false)(); 374 clusterTest1!(CachedEdgeList!true)(); 375 clusterTest2!(CachedEdgeList!true)(); 376 377 clusterTest50!(CachedEdgeList!false)(); 378 clusterTest50!(CachedEdgeList!true)(); 379 380 clusterTest3!(CachedEdgeList!false)(40); 381 clusterTest3!(CachedEdgeList!true)(40); 382 }