1 module dgraph.metric;
2 
3 import std.algorithm, std.conv, std.range, std.traits;
4 
5 import dgraph.graph;
6 
7 /**
8  * Simple queue implementation for internal use.  This will probably be removed
9  * once Phobos has an effective queue container.
10  *
11  * This is derived from Bearophile's $(LINK2 http://rosettacode.org/wiki/Queue/Usage#Faster_Version,
12  * RosettaCode example of a circular queue), which he has kindly agreed to allow
13  * to be $(LINK2 http://forum.dlang.org/post/mvmzvkjpbhazezlsydim@forum.dlang.org,
14  * used under the terms of the Boost licence).
15  */
16 struct VertexQueue
17 {
18     private size_t length_, maxLength, head, tail;
19     private size_t[] queue;
20 
21     this(size_t m)
22     {
23         maxLength = m;
24         queue = new size_t[maxLength];
25     }
26 
27     bool empty() @property const pure nothrow
28     {
29         return (length_ == 0);
30     }
31 
32     size_t length() @property const pure nothrow
33     {
34         return length_;
35     }
36 
37     void push(immutable size_t v) nothrow
38     {
39         assert(v < maxLength, "Vertex ID is too large!");
40         queue[tail] = v;
41         tail = (tail + 1) % maxLength;
42         ++length_;
43         assert(length_ <= maxLength, "Length has exceeded total number of vertices!");
44     }
45 
46     auto front() @property const pure
47     {
48         assert(!this.empty, "Node queue is empty!");
49         return queue[head];
50     }
51 
52     void pop() nothrow
53     {
54         head = (head + 1) % maxLength;
55         --length_;
56     }
57 }
58 
59 unittest
60 {
61     auto q = VertexQueue(8);
62     q.push(3);
63     q.push(7);
64     q.push(4);
65     assert(q.front == 3);
66     q.pop();
67     assert(q.front == 7);
68     q.pop();
69     assert(q.front == 4);
70     q.push(5);
71     assert(q.front == 4);
72     q.pop();
73     assert(q.front == 5);
74     q.pop();
75     assert(q.empty);
76     q.push(2);
77     assert(q.front == 2);
78     q.pop();
79     assert(q.empty);
80 }
81 
82 /**
83  * Calculate betweenness centrality of vertices in a graph, using the algorithm
84  * developed by Ulrik Brandes (2001) J. Math. Sociol. 25(2): 163-177.
85  *
86  * The optional function parameter ignore allows the user to indicate
87  * individual vertices to ignore in the calculation.
88  */
89 auto ref betweenness(T = double, Graph)(ref Graph g, bool[] ignore = null)
90     if (isFloatingPoint!T && isGraph!Graph)
91 {
92     T[] centrality = new T[g.vertexCount];
93     return betweenness!(T, Graph)(g, centrality, ignore);
94 }
95 
96 auto ref betweenness(T = double, Graph)(ref Graph g, ref T[] centrality, bool[] ignore = null)
97 {
98     centrality.length = g.vertexCount;
99     centrality[] = to!T(0);
100     size_t[] stack = new size_t[g.vertexCount];
101     T[] sigma = new T[g.vertexCount];
102     T[] delta = new T[g.vertexCount];
103     long[] d = new long[g.vertexCount];
104     auto q = VertexQueue(g.vertexCount);
105     Appender!(size_t[])[] p = new Appender!(size_t[])[g.vertexCount];
106 
107     sigma[] = to!T(0);
108     delta[] = to!T(0);
109     d[] = -1L;
110 
111     foreach (immutable s; 0 .. g.vertexCount)
112     {
113         if (ignore && ignore[s])
114         {
115             continue;
116         }
117 
118         size_t stackLength = 0;
119         assert(q.empty);
120         sigma[s] = to!T(1);
121         d[s] = 0L;
122         q.push(s);
123 
124         while (!q.empty)
125         {
126             size_t v = q.front;
127             q.pop();
128             stack[stackLength] = v;
129             ++stackLength;
130             foreach (immutable w; g.neighboursOut(v))
131             {
132                 if (ignore && ignore[w])
133                 {
134                     continue;
135                 }
136 
137                 if (d[w] < 0L)
138                 {
139                     q.push(w);
140                     d[w] = d[v] + 1L;
141                     assert(sigma[w] == to!T(0));
142                     sigma[w] = sigma[v];
143                     p[w].clear;
144                     p[w].put(v);
145                 }
146                 else if (d[w] > (d[v] + 1L))
147                 {
148                     /* w has already been encountered, but we've
149                        found a shorter path to the source.  This
150                        is probably only relevant to the weighted
151                        case, but let's keep it here in order to
152                        be ready for that update. */
153                     d[w] = d[v] + 1L;
154                     sigma[w] = sigma[v];
155                     p[w].clear;
156                     p[w].put(v);
157                 }
158                 else if (d[w] == (d[v] + 1L))
159                 {
160                     sigma[w] += sigma[v];
161                     p[w].put(v);
162                 }
163             }
164         }
165 
166         while (stackLength > to!size_t(0))
167         {
168             --stackLength;
169             auto w = stack[stackLength];
170             foreach (immutable v; p[w].data)
171             {
172                 delta[v] += ((sigma[v] / sigma[w]) * (to!T(1) + delta[w]));
173             }
174             if (w != s)
175             {
176                 centrality[w] += delta[w];
177             }
178             sigma[w] = to!T(0);
179             delta[w] = to!T(0);
180             d[w] = -1L;
181         }
182     }
183 
184     static if (!g.directed)
185     {
186         centrality[] /= 2;
187     }
188 
189     return centrality;
190 }
191 
192 /**
193  * Calculate the size of the largest connected cluster in the graph.
194  *
195  * The function parameter ignore allows the user to specify individual
196  * vertices to ignore for the purposes of the calculation.
197  *
198  * This algorithm is a rather ad-hoc construction inspired by Brandes'
199  * algorithm for betweenness centrality.  No claims are made for its
200  * performance or even correctness.
201  */
202 size_t largestClusterSize(Graph)(ref Graph g, bool[] ignore = null)
203     if (isGraph!Graph)
204 {
205     long[] cluster = new long[g.vertexCount];
206     cluster[] = -1L;
207     auto q = VertexQueue(g.vertexCount);
208     size_t largestCluster = 0;
209 
210     foreach (immutable s; 0 .. g.vertexCount)
211     {
212         if (ignore && ignore[s])
213         {
214             continue;
215         }
216         else if (cluster[s] < 0)
217         {
218             assert(q.empty);
219             cluster[s] = 0;
220             q.push(s);
221             size_t clusterSize = 1;
222 
223             while (!q.empty)
224             {
225                 size_t v = q.front;
226                 q.pop();
227 
228                 static if (g.directed)
229                 {
230                     auto allNeighbours = chain(g.neighboursIn(v), g.neighboursOut(v));
231                 }
232                 else
233                 {
234                     auto allNeighbours = g.neighbours(v);
235                 }
236 
237                 foreach (immutable w; allNeighbours)
238                 {
239                     if (ignore && ignore[w])
240                     {
241                         continue;
242                     }
243                     else if (cluster[w] < 0)
244                     {
245                        q.push(w);
246                        cluster[w] = clusterSize;
247                        ++clusterSize;
248                     }
249                 }
250             }
251 
252             largestCluster = max(largestCluster, clusterSize);
253         }
254     }
255 
256     return largestCluster;
257 }
258 
259 unittest
260 {
261     import std.stdio, std.typecons;
262 
263     void clusterTest1(Graph)()
264         if (isGraph!Graph)
265     {
266         static if (is(Graph == class))
267         {
268             auto g = new Graph;
269         }
270         else
271         {
272             auto g = Graph;
273         }
274         bool[] ignore = new bool[5];
275         g.vertexCount = 5;
276         g.addEdge(0, 1);
277         g.addEdge(1, 2);
278         g.addEdge(3, 4);
279         size_t largest = largestClusterSize(g);
280         writeln("largest cluster size = ", largest);
281         assert(largestClusterSize(g) == largestClusterSize(g, ignore));
282 
283         ignore[0 .. 2] = true;
284         largest = largestClusterSize(g, ignore);
285         writeln("largest cluster size = ", largest);
286     }
287 
288     void clusterTest2(Graph)()
289         if (isGraph!Graph)
290     {
291         static if (is(Graph == class))
292         {
293             auto g = new Graph;
294         }
295         else
296         {
297             auto g = Graph;
298         }
299         bool[] ignore = new bool[100];
300         g.vertexCount = 100;
301         foreach (immutable i; 0 .. 100)
302         {
303             foreach (immutable j; i .. 100)
304             {
305                 g.addEdge(i, j);
306             }
307         }
308         writeln("largest cluster size = ", largestClusterSize(g));
309         assert(largestClusterSize(g) == largestClusterSize(g, ignore));
310         foreach (immutable i; 0 .. 100)
311         {
312             ignore[i] = true;
313             writeln(i, ": largest cluster size = ", largestClusterSize(g, ignore));
314         }
315     }
316 
317     void clusterTest3(Graph)(immutable size_t n)
318         if (isGraph!Graph)
319     {
320         static if (is(Graph == class))
321         {
322             auto g = new Graph;
323         }
324         else
325         {
326             auto g = Graph;
327         }
328         bool[] ignore = new bool[n];
329         g.vertexCount = n;
330         foreach (immutable i; 0 .. n - 1)
331         {
332             g.addEdge(i, i + 1);
333         }
334         writeln("largest cluster size = ", largestClusterSize(g));
335         assert(largestClusterSize(g) == largestClusterSize(g, ignore));
336         ignore[n / 4] = true;
337         writeln("largest cluster size = ", largestClusterSize(g, ignore));
338     }
339 
340     void clusterTest50(Graph)()
341         if (isGraph!Graph)
342     {
343         import std.random;
344         import dgraph.test.samplegraph50;
345 
346         static if (is(Graph == class))
347         {
348             auto g = new Graph;
349         }
350         else
351         {
352             auto g = Graph;
353         }
354         bool[] ignore = new bool[50];
355         g.vertexCount = 50;
356         g.addEdge(sampleGraph50);
357         writeln("[[50]] largest cluster size = ", largestClusterSize(g, ignore));
358 
359         foreach (immutable i; 0 .. 10)
360         {
361             auto sample1 = randomSample(iota(50), 7, Random(100 * i * i));
362             ignore[] = false;
363             foreach (immutable s; sample1)
364             {
365                 ignore[s] = true;
366             }
367             writeln("[[50.", i, "]] largest cluster size = ", largestClusterSize(g, ignore));
368         }
369 
370     }
371 
372     clusterTest1!(CachedEdgeList!false)();
373     clusterTest2!(CachedEdgeList!false)();
374     clusterTest1!(CachedEdgeList!true)();
375     clusterTest2!(CachedEdgeList!true)();
376 
377     clusterTest50!(CachedEdgeList!false)();
378     clusterTest50!(CachedEdgeList!true)();
379 
380     clusterTest3!(CachedEdgeList!false)(40);
381     clusterTest3!(CachedEdgeList!true)(40);
382 }