-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathweb-graph-workshop-wac2025.bib
295 lines (265 loc) · 12.7 KB
/
web-graph-workshop-wac2025.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
@Article{AbiteboulPredaCobena:2003:OPIC,
author = "Serge Abiteboul and Mihai Preda and Gregory Cobena",
title = "Adaptive on-line page importance computation",
URL = "https://dx.doi.org/10.1145/775152.775192",
year = "2003",
}
@Misc{Boldi:2013:centrality-measures,
author = "Boldi, Paolo",
title = "A modern view of centrality measures",
URL = "https://www.youtube.com/watch?v=cnGJtGP4gL4",
year = "2013",
}
@Article{BoldiVigna:2004:WebGraphFrameworkI,
address = "New York, NY, USA",
author = "Paolo Boldi and Sebastiano Vigna",
booktitle = "Proceedings of the 13th International Conference on World Wide Web",
doi = "10.1145/988672.988752",
ISBN = "1-58113-844-X",
keywords = "web graph, compression",
location = "New York, NY, USA",
numpages = "8",
pages = "595–602",
publisher = "Association for Computing Machinery",
series = "WWW '04",
title = "The WebGraph framework {I}: Compression techniques",
URL = "https://doi.org/10.1145/988672.988752",
year = "2004",
}
@Article{BoldiVigna:2013:AxiomsCentrality,
author = "Paolo Boldi and Sebastiano Vigna",
journal = "CoRR",
pdf = "https://arxiv.org/pdf/1308.2140.pdf",
title = "Axioms for Centrality",
URL = "http://arxiv.org/abs/1308.2140",
volume = "abs/1308.2140",
year = "2013",
}
@Article{BoldiVigna:2013:HyperBall,
author = "Paolo Boldi and Sebastiano Vigna",
journal = "2013 IEEE 13th International Conference on Data Mining Workshops",
pages = "621--628",
pdf = "http://vigna.di.unimi.it/ftp/papers/HyperBall.pdf",
title = "In-Core Computation of Geometric Centralities with HyperBall: {A} Hundred Billion Nodes and Beyond",
URL = "https://vigna.di.unimi.it/papers.php#BoVHB",
year = "2013",
}
@InProceedings{cc:AlbyJäschke:2022:top-websites,
address = "Cham",
author = "Alby, Tom and Jäschke, Robert",
booktitle = "Linking Theory and Practice of Digital Libraries",
cc-author-affiliation = "Humboldt-Universität zu Berlin, Berlin, Germany",
cc-class = "web-science, domain-ranking",
cc-dataset-used = "hyperlinkgraph/cc-main-2021-feb-apr-may/hostgraph",
editor = "Silvello, Gianmaria and Corcho, Oscar and Manghi, Paolo and Di Nunzio, Giorgio Maria and Golub,
Koraljka and Ferro, Nicola and Poggi, Antonella",
ISBN = "978-3-031-16802-4",
pages = "11--25",
publisher = "Springer International Publishing",
title = "Analyzing the Web: Are Top Websites Lists a Good Choice for Research?",
URL = "https://link.springer.com/chapter/10.1007/978-3-031-16802-4_2",
year = "2022",
}
@Article{cc:CarragherWilliamsCarley:2024:Misinformation-resilient-search-rankings,
address = "New York, NY, USA",
author = "Carragher, Peter and Williams, Evan M. and Carley, Kathleen M.",
cc-author-affiliation = "Carnegie Mellon University, USA",
cc-class = "web-science/hyperlinkgraph, misinformation, disinformation, domain-ranking",
doi = "10.1145/3670410",
ISSN = "2157-6904",
journal = "ACM Trans. Intell. Syst. Technol.",
keywords = "search engine optimization, misinformation, website reliability, pagerank",
month = jun,
note = "Just Accepted",
publisher = "Association for Computing Machinery",
title = "Misinformation Resilient Search Rankings with Webgraph-based Interventions",
URL = "https://doi.org/10.1145/3670410",
year = "2024",
}
@Article{cc:Funel:2018:analysis-web-graph,
author = "Funel, Agostino",
cc-author-affiliation = "ENEA, Italy",
cc-class = "web-science/hyperlinkgraph",
cc-dataset-used = "hyperlinkgraph/cc-main-2017-aug-sep-oct/hostgraph,
hyperlinkgraph/cc-main-2017-aug-sep-oct/domaingraph",
eprint = "arXiv:1802.05435",
title = "Analysis of the Web Graph Aggregated by Host and Pay-Level Domain",
URL = "https://arxiv.org/abs/1802.05435",
year = "2018",
}
@InProceedings{cc:LehmbergMeuselBizer:2014:Graph-structure-aggregated-by-pay-level-domain,
author = "Oliver Lehmberg and Robert Meusel and Christian Bizer",
booktitle = "Web Science Conference",
cc-author-affiliation = "University of Mannheim, Germany",
cc-class = "web-science/hyperlinkgraph",
title = "Graph structure in the web: aggregated by pay-level domain",
URL = "https://dl.acm.org/doi/10.1145/2615569.2615674",
year = "2014",
}
@Misc{cc:McSherry:2015:bigger-data-same-laptop,
author = "Frank McSherry",
cc-author-affiliation = "ETH Zurich, Switzerland",
cc-class = "web-science/hyperlinkgraph, big data",
cc-derived-dataset-used = "WDC-hyperlinkgraph",
title = "Bigger data; same laptop",
URL = "https://www.frankmcsherry.org/graph/scalability/cost/2015/02/04/COST2.html",
url2 = "https://github.com/frankmcsherry/blog/blob/master/posts/2015-02-04.md",
year = "2015",
}
@Misc{cc:McSherry:2015:scalability-at-what-cost,
author = "Frank McSherry",
cc-author-affiliation = "ETH Zurich, Switzerland",
cc-class = "web-science/hyperlinkgraph, big data",
title = "Scalability! {But} at what {COST}?",
URL = "https://www.frankmcsherry.org/graph/scalability/cost/2015/01/15/COST.html",
url2 = "https://github.com/frankmcsherry/blog/blob/master/posts/2015-01-15.md",
year = "2015",
}
@Article{cc:MeuselVignaLehmbergBizer:2014:Graph-structure-in-the-web-revisited,
author = "Robert Meusel and Sebastiano Vigna and Oliver Lehmberg and Christian Bizer",
cc-author-affiliation = "Data and Web Science Group - University of Mannheim, Germany; Laboratory for Web -
Algorithmics Università degli Studi di Milano, Italy",
cc-class = "web-science/hyperlinkgraph",
title = "Graph Structure in the Web — Revisited",
URL = "http://vigna.di.unimi.it/ftp/papers/GraphStructureRevisited.pdf",
year = "2014",
}
@Article{cc:MeuselVignaLehmbergBizer:2015:web-graph-structure-aggregation-levels,
author = "Robert Meusel and Sebastiano Vigna and Oliver Lehmberg and Christian Bizer",
cc-author-affiliation = "University of Mannheim, Germany; Università degli Studi di Milano, Italy",
cc-class = "web-science/hyperlinkgraph",
doi = "http://dx.doi.org/10.1561/106.00000003",
ISSN = "",
journal = "The Journal of Web Science",
number = "1",
pages = "33--47",
title = "The Graph Structure in the Web – Analyzed on Different Aggregation Levels",
URL = "https://pdfs.semanticscholar.org/b5d5/88298e6845b4bfd40ea779ce21e628239ef3.pdf",
volume = "1",
year = "2015",
}
@InProceedings{cc:NourinTranJiangBockEtAl:2023:Measuring-turkmenistans-internet-censorship,
address = "New York, NY, USA",
author = "Nourin, Sadia and Tran, Van and Jiang, Xi and Bock, Kevin and Feamster, Nick and Hoang, Nguyen Phong
and Levin, Dave",
booktitle = "Proceedings of the ACM Web Conference 2023",
cc-author-affiliation = "University of Maryland, USA; University of Chicago, USA",
cc-class = "web-filtering, internet-censorship",
cc-dataset-used = "hyperlinkgraph",
doi = "10.1145/3543507.3583189",
ISBN = "978-1-4503-9416-1",
keywords = "Censorship Measurement, Web Filtering, Turkmenistan",
location = "Austin, TX, USA",
numpages = "11",
pages = "1969--1979",
pdf = "https://dl.acm.org/doi/pdf/10.1145/3543507.3583189",
publisher = "Association for Computing Machinery",
series = "WWW '23",
title = "Measuring and Evading Turkmenistan’s Internet Censorship: {A} Case Study in Large-Scale Measurements
of a Low-Penetration Country",
URL = "https://doi.org/10.1145/3543507.3583189",
year = "2023",
}
@Misc{cc:SquarcinaTempestaVeroneseCalzavaraEtAl:2020:related-domain-attacks,
author = "Marco Squarcina and Mauro Tempesta and Lorenzo Veronese and Stefano Calzavara and Matteo Maffei",
cc-author-affiliation = "TU Wien, Austria; Università Ca’ Foscari Venezia, Italy",
cc-class = "computer-security/internet-security, related-domain attacks",
cc-dataset-used = "hyperlinkgraph/cc-main-2020-feb-mar-may/hostgraph",
pdf = "https://arxiv.org/pdf/2012.01946.pdf",
title = "Can {I} take your subdomain? Exploring related-domain attacks in the modern web",
URL = "https://arxiv.org/abs/2012.01946",
year = "2020",
}
@Misc{cc:web-graphs-oct-nov-dec-2024,
title = "Host- and Domain-Level Web Graphs October, November, December 2024",
URL = "https://commoncrawl.org/blog/host--and-domain-level-web-graphs-october-november-and-december-2024",
}
@Misc{cc:webgraph-statistics,
title = "Web Graphs Statistics",
URL = "https://commoncrawl.github.io/cc-webgraph-statistics/",
}
@Misc{cc:webgraphs,
title = "Web Graphs",
URL = "https://commoncrawl.org/web-graphs",
}
@Misc{commonsearch:2017:web-graph-page-rank,
title = "Common Search: Our first public datasets: Host-level WebGraph and PageRank",
URL = "https://web.archive.org/web/20170729110709/https://about.commonsearch.org/2016/07/our-first-public-datasets-host-level-webgraph-and-pagerank/",
}
@InProceedings{FontanaVignaZacchiroli:2024:WebGraphNextGeneration,
address = "New York, NY, USA",
author = "Fontana, Tommaso and Vigna, Sebastiano and Zacchiroli, Stefano",
booktitle = "Companion Proceedings of the ACM on Web Conference 2024",
doi = "10.1145/3589335.3651581",
ISBN = "9798400701726",
keywords = "big data, compression, graphs, java, rust, social networks, web graphs",
location = "Singapore, Singapore",
numpages = "4",
pages = "686–689",
publisher = "Association for Computing Machinery",
series = "WWW '24",
title = "WebGraph: The Next Generation (Is in Rust)",
URL = "https://doi.org/10.1145/3589335.3651581",
year = "2024",
}
@Misc{Forbes-top-colleges,
title = "Forbes America's Top Colleges List 2025 - Best {US} Universities Ranked",
URL = "https://www.forbes.com/top-colleges/",
}
@Misc{git:commonsearch:cosr-back,
URL = "https://github.com/commonsearch/cosr-back/blob/master/spark/jobs/pagerank.py",
}
@Article{LeeLeonardWangLoguinov:2009:IRLbot,
address = "New York, NY, USA",
articleno = "8",
author = "Lee, Hsin-Tsang and Leonard, Derek and Wang, Xiaoming and Loguinov, Dmitri",
doi = "10.1145/1541822.1541823",
ISSN = "1559-1131",
issue_date = "June 2009",
journal = "ACM Trans. Web",
keywords = "large scale, crawling, IRLbot",
month = jul,
number = "3",
numpages = "34",
pdf = "https://irl.cs.tamu.edu/people/hsin-tsang/papers/www2008.pdf",
publisher = "Association for Computing Machinery",
title = "{IRL}bot: Scaling to 6 Billion Pages and Beyond",
URL = "https://doi.org/10.1145/1541822.1541823",
volume = "3",
year = "2009",
}
@Misc{QS-world-universities-USA,
title = "{QS} World University Rankings: The top 100 universities in the {USA}",
URL = "https://www.topuniversities.com/where-to-study/north-america/united-states/ranked-top-100-us-universities",
}
@Misc{sitemapsorg,
title = "sitemaps.org",
URL = "https://www.sitemaps.org/protocol.html",
}
@Misc{web-data-commons:cc-www-ranking,
title = "The Common Crawl {WWW} Ranking",
URL = "http://wwwranking.webdatacommons.org/",
}
@Misc{web-data-commons:hyperlinkgraph,
title = "Web Data Commons - Hyperlink Graphs",
URL = "https://webdatacommons.org/hyperlinkgraph/index.html",
year = "2013",
}
@Misc{wikipedia:PageRank,
title = "PageRank",
URL = "https://en.wikipedia.org/wiki/PageRank",
}
@Misc{wikipedia:Reverse_domain_name_notation,
title = "Reverse domain name notation",
URL = "https://en.wikipedia.org/wiki/Reverse_domain_name_notation",
}
@Misc{wikipedia:Webgraph,
title = "Webgraph",
URL = "https://en.wikipedia.org/w/index.php?title=Webgraph",
}
@InProceedings{WittenGoriNumerico:2006:Web-dragons,
author = "Ian H. Witten and Marco Gori and Teresa Numerico",
title = "Web Dragons: Inside the Myths of Search Engine Technology",
URL = "https://api.semanticscholar.org/CorpusID:1192963",
year = "2006",
}