-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreal_data_test.cpp
More file actions
352 lines (284 loc) · 12.7 KB
/
real_data_test.cpp
File metadata and controls
352 lines (284 loc) · 12.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
#include "graph_operations.cpp"
#include <fstream>
#include <sstream>
#include <chrono>
#include <iomanip>
#include <cmath>
#include <windows.h>
#include <psapi.h>
using namespace std;
using namespace std::chrono;
// Function to get current memory usage in bytes
size_t getCurrentMemoryUsage() {
PROCESS_MEMORY_COUNTERS_EX pmc;
if (GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc))) {
return pmc.WorkingSetSize;
}
return 0;
}
// Function to format memory size
string formatMemory(size_t bytes) {
if (bytes < 1024) {
return to_string(bytes) + " B";
} else if (bytes < 1024 * 1024) {
return to_string(bytes / 1024) + " KB";
} else {
double mb = bytes / (1024.0 * 1024.0);
char buffer[50];
sprintf(buffer, "%.2f MB", mb);
return string(buffer);
}
}
// ADJACENCY CRITERIA 1: Original Edges (Direct Connections)
// Loads the graph exactly as provided in the dataset.
// Adjacency: Two vertices u and v are adjacent if there exists an edge (u,v)
// in the original YouTube social network data.
Graph loadGraphFromFile(const string& filename) {
Graph g;
ifstream file(filename);
if (!file.is_open()) {
cerr << "Error: Could not open file " << filename << endl;
return g;
}
cout << "Loading graph from file: " << filename << endl;
string line;
int edgeCount = 0;
int lineCount = 0;
while (getline(file, line)) {
lineCount++;
// Skip comment lines
if (line.empty() || line[0] == '#') {
continue;
}
// Parse edge: fromNode toNode
istringstream iss(line);
int fromNode, toNode;
if (iss >> fromNode >> toNode) {
g.addEdge(fromNode, toNode);
edgeCount++;
// Progress indicator every 100,000 edges
if (edgeCount % 100000 == 0) {
cout << " Loaded " << edgeCount << " edges..." << endl;
}
}
}
file.close();
cout << "Graph loading complete!" << endl;
cout << " Total vertices: " << g.numVertices() << endl;
cout << " Total edges: " << g.numEdges() << endl;
return g;
}
// ADJACENCY CRITERIA 2: High-Activity Users Filter
// Adjacency: Two vertices u and v are adjacent if:
// 1. An edge (u,v) exists in the original graph, AND
// 2. Both u and v have degree ≥ minDegree in the original graph
Graph loadGraphFiltered(const string& filename, int minDegree) {
// First pass: load original graph to compute degrees
cout << "\n--- Adjacency Criteria 2: High-Activity Filter ---" << endl;
cout << "Minimum degree threshold: " << minDegree << endl;
Graph originalGraph = loadGraphFromFile(filename);
cout << "Computing degree distribution..." << endl;
// Count degrees
map<int, int> degrees;
for (int v : originalGraph.getVertices()) {
const auto& adjList = originalGraph.getAdjList();
if (adjList.find(v) != adjList.end()) {
degrees[v] = adjList.at(v).size();
}
}
// Create filtered graph
cout << "Creating filtered graph (keeping high-activity users)..." << endl;
Graph filteredGraph;
int filteredEdges = 0;
for (int u : originalGraph.getVertices()) {
const auto& adjList = originalGraph.getAdjList();
if (adjList.find(u) != adjList.end()) {
for (int v : adjList.at(u)) {
// Only add edge if both vertices meet the degree threshold
if (u < v && degrees[u] >= minDegree && degrees[v] >= minDegree) {
filteredGraph.addEdge(u, v);
filteredEdges++;
}
}
}
}
cout << "Filtered graph created!" << endl;
cout << " Vertices: " << filteredGraph.numVertices() << endl;
cout << " Edges: " << filteredGraph.numEdges() << endl;
return filteredGraph;
}
// ADJACENCY CRITERIA 3: Degree Similarity
// Adjacency: Two vertices u and v are adjacent if:
// 1. An edge (u,v) exists in the original graph, AND
// 2. The degrees of u and v are within a similarity threshold:
// |degree(u) - degree(v)| / max(degree(u), degree(v)) ≤ (1 - threshold)
// i.e., similarity ≥ threshold means degrees are similar
Graph loadGraphDegreeSimilarity(const string& filename, double similarityThreshold) {
// First pass: load original graph to compute degrees
cout << "\n--- Adjacency Criteria 3: Degree Similarity ---" << endl;
cout << "Similarity threshold: " << similarityThreshold << " (1.0 = identical degrees)" << endl;
Graph originalGraph = loadGraphFromFile(filename);
cout << "Computing degree distribution..." << endl;
// Count degrees
map<int, int> degrees;
for (int v : originalGraph.getVertices()) {
const auto& adjList = originalGraph.getAdjList();
if (adjList.find(v) != adjList.end()) {
degrees[v] = adjList.at(v).size();
} else {
degrees[v] = 0;
}
}
// Create similarity-based graph
cout << "Creating degree similarity graph..." << endl;
Graph similarityGraph;
int similarityEdges = 0;
for (int u : originalGraph.getVertices()) {
const auto& adjList = originalGraph.getAdjList();
if (adjList.find(u) != adjList.end()) {
for (int v : adjList.at(u)) {
if (u < v) { // Avoid duplicate edges
// Calculate degree similarity
int degU = degrees[u];
int degV = degrees[v];
double maxDeg = max(degU, degV);
if (maxDeg > 0) {
double similarity = 1.0 - (abs(degU - degV) / maxDeg);
// Add edge if similarity meets threshold
if (similarity >= similarityThreshold) {
similarityGraph.addEdge(u, v);
similarityEdges++;
}
}
}
}
}
}
cout << "Degree similarity graph created!" << endl;
cout << " Vertices: " << similarityGraph.numVertices() << endl;
cout << " Edges: " << similarityGraph.numEdges() << endl;
return similarityGraph;
}
void testGraphWithCriteria(const string& criteriaName, Graph& g, size_t memBefore, long long loadTime) {
cout << "\n========================================" << endl;
cout << "TESTING: " << criteriaName << endl;
cout << "========================================" << endl;
cout << "Vertices: " << g.numVertices() << ", Edges: " << g.numEdges() << endl;
cout << "Graph loading time: " << loadTime << " ms (" << (loadTime / 1000.0) << " seconds)" << endl;
size_t memAfterLoad = getCurrentMemoryUsage();
cout << "Memory after load: " << formatMemory(memAfterLoad) << endl;
cout << endl;
auto totalStart = high_resolution_clock::now();
// Test 1: Connected Components
cout << "Test 1: Connected Components (DFS)" << endl;
auto start = high_resolution_clock::now();
auto components = g.connected_components();
auto end = high_resolution_clock::now();
auto duration = duration_cast<milliseconds>(end - start);
cout << " Number of connected components: " << components.size() << endl;
// Show size of largest components
vector<int> componentSizes;
for (const auto& comp : components) {
componentSizes.push_back(comp.size());
}
sort(componentSizes.begin(), componentSizes.end(), greater<int>());
if (componentSizes.size() > 0) {
cout << " Largest component: " << componentSizes[0];
if (componentSizes.size() == 1) cout << " (entire graph connected)";
cout << endl;
}
cout << " Time: " << duration.count() << " ms (" << (duration.count() / 1000.0) << " sec)" << endl;
size_t memAfterComponents = getCurrentMemoryUsage();
cout << " Memory: " << formatMemory(memAfterComponents) << endl;
cout << endl;
// Test 2: Cycle Detection
cout << "Test 2: Cycle Detection (DFS)" << endl;
start = high_resolution_clock::now();
auto cycle = g.one_cycle();
end = high_resolution_clock::now();
duration = duration_cast<milliseconds>(end - start);
if (cycle.empty()) {
cout << " No cycle found" << endl;
} else {
cout << " Cycle found! Length: " << cycle.size() - 1 << endl;
}
cout << " Time: " << duration.count() << " ms" << endl;
size_t memAfterCycle = getCurrentMemoryUsage();
cout << " Memory: " << formatMemory(memAfterCycle) << endl;
cout << endl;
// Test 3: Shortest Paths (Dijkstra)
cout << "Test 3: Shortest Paths (Dijkstra)" << endl;
int sourceVertex = 0;
if (!components.empty() && !components.front().empty()) {
sourceVertex = components.front().front();
}
cout << " Computing shortest paths from vertex " << sourceVertex << "..." << endl;
start = high_resolution_clock::now();
auto paths = g.shortest_paths(sourceVertex);
end = high_resolution_clock::now();
duration = duration_cast<milliseconds>(end - start);
cout << " Reachable vertices: " << paths.size() << endl;
map<int, int> pathLengthDistribution;
int maxLength = 0;
for (const auto& p : paths) {
int length = p.second.size() - 1;
pathLengthDistribution[length]++;
if (length > maxLength) maxLength = length;
}
cout << " Maximum distance: " << maxLength << endl;
cout << " Time: " << duration.count() << " ms (" << (duration.count() / 1000.0) << " sec)" << endl;
size_t memAfterPaths = getCurrentMemoryUsage();
cout << " Memory: " << formatMemory(memAfterPaths) << endl;
cout << endl;
// Summary
auto totalEnd = high_resolution_clock::now();
auto totalDuration = duration_cast<milliseconds>(totalEnd - totalStart);
size_t peakMemory = max({memAfterLoad, memAfterComponents, memAfterCycle, memAfterPaths});
cout << "SUMMARY - " << criteriaName << endl;
cout << " Peak Memory: " << formatMemory(peakMemory) << " (+" << formatMemory(peakMemory - memBefore) << " from baseline)" << endl;
cout << " Total Test Time: " << totalDuration.count() << " ms (" << (totalDuration.count() / 1000.0) << " sec)" << endl;
cout << "========================================" << endl;
}
int main() {
cout << "========================================" << endl;
cout << "REAL-WORLD GRAPH ANALYSIS" << endl;
cout << "Dataset: YouTube Social Network (SNAP)" << endl;
cout << "========================================" << endl;
cout << endl;
cout << "Testing ALL THREE Adjacency Criteria:" << endl;
cout << "1. Original Edges - Direct connections from dataset" << endl;
cout << "2. High-Activity Filter - Edges between users with degree >= 10" << endl;
cout << "3. Degree Similarity - Edges between users with 70% degree similarity" << endl;
cout << endl;
size_t memBefore = getCurrentMemoryUsage();
cout << "Baseline Memory: " << formatMemory(memBefore) << endl;
cout << endl;
// CRITERIA 1: Original Edges
cout << "\n>>>>> LOADING CRITERIA 1: Original Edges <<<<<" << endl;
auto loadStart = high_resolution_clock::now();
Graph g1 = loadGraphFromFile("com-youtube.ungraph.txt");
auto loadEnd = high_resolution_clock::now();
auto loadDuration = duration_cast<milliseconds>(loadEnd - loadStart);
if (g1.numVertices() > 0) {
testGraphWithCriteria("CRITERIA 1: Original Edges", g1, memBefore, loadDuration.count());
}
// CRITERIA 2: High-Activity Filter
cout << "\n>>>>> LOADING CRITERIA 2: High-Activity Filter <<<<<" << endl;
loadStart = high_resolution_clock::now();
Graph g2 = loadGraphFiltered("com-youtube.ungraph.txt", 10);
loadEnd = high_resolution_clock::now();
loadDuration = duration_cast<milliseconds>(loadEnd - loadStart);
if (g2.numVertices() > 0) {
testGraphWithCriteria("CRITERIA 2: High-Activity Filter (degree >= 10)", g2, memBefore, loadDuration.count());
}
// CRITERIA 3: Degree Similarity
cout << "\n>>>>> LOADING CRITERIA 3: Degree Similarity <<<<<" << endl;
loadStart = high_resolution_clock::now();
Graph g3 = loadGraphDegreeSimilarity("com-youtube.ungraph.txt", 0.7);
loadEnd = high_resolution_clock::now();
loadDuration = duration_cast<milliseconds>(loadEnd - loadStart);
if (g3.numVertices() > 0) {
testGraphWithCriteria("CRITERIA 3: Degree Similarity (70% threshold)", g3, memBefore, loadDuration.count());
}
return 0;
}