You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
author = {Coppa, Emilio and Sokolowski, Daniel and Salvaneschi, Guido},
5
-
title = {Hybrid Fuzzing of Infrastructure as Code Programs (Short Paper)},
8
+
title = {Hybrid Fuzzing of Infrastructure as Code Programs},
9
+
booktitle = {Proceedings of the 34th ACM SIGSOFT International Symposium on Software Testing and Analysis},
10
+
series = {ISSTA Companion '25},
6
11
year = {2025},
7
-
isbn = {9798400714740},
8
-
publisher = {Association for Computing Machinery},
12
+
numpages = {6},
13
+
pages = {92--97},
14
+
location = {Trondheim, Norway},
15
+
publisher = {ACM},
9
16
address = {New York, NY, USA},
10
-
url = {https://doi.org/10.1145/3713081.3731721},
17
+
isbn = {9798400714740},
11
18
doi = {10.1145/3713081.3731721},
12
-
abstract = {Infrastructure as Code (IaC) has become a cornerstone of modern cloud and system deployment, enabling automated and repeatable infrastructure provisioning. However, ensuring the correctness of IaC programs remains challenging due to their complexity and dynamic nature. In particular, IaC programs can exhibit different behaviors depending on the state of the resources they manage. Since these resources are deployed on external providers, accounting for their possible states is difficult, making the testing phase particularly challenging. This paper presents HIT, a novel unit-testing framework for IaC programs that effectively tests IaC code using relevant resource states. HIT combines fuzzing and concolic execution, two effective yet previously unexplored techniques for IaC code. Our experiments confirm that HIT achieves better code coverage than state-of-the-art approaches.},
13
-
booktitle = {Proceedings of the 34th ACM SIGSOFT International Symposium on Software Testing and Analysis},
14
-
pages = {92–97},
15
-
numpages = {6},
16
19
keywords = {fuzzing, infrastructure as code, symbolic execution, DevOps},
17
-
location = {Clarion Hotel Trondheim, Trondheim, Norway},
title={{TerraDS}: A Dataset for Terraform HCL Programs},
26
-
author={Bühler, Christoph and Spielmann, David and Meier, Roland and Salvaneschi, Guido},
27
-
booktitle={Proceedings of the IEEE/ACM 22nd International Conference on Mining Software Repositories (MSR)},
28
-
series={MSR '25},
29
-
year={2025},
30
-
month=apr,
31
-
numpages={5},
32
-
location={Ottawa, Canada},
33
-
publisher={IEEE},
34
-
keywords={Cloud Computing, Configuration Management, Open Source Software, Static Analysis},
35
-
abstract={Infrastructure as Code (IaC) aims to automate infrastructure management by enabling the definition of infrastructure configurations in programs, rather than manually configuring hardware or cloud resources. Terraform is one of the most widely used IaC tools, gaining significant traction in recent years, as highlighted by its large and active user community and widespread adoption in both open-source and enterprise environments. Terraform’s code is written in the HashiCorp Configuration Language (HCL), which defines the infrastructure in a declarative manner. Despite the widespread adoption of Terraform, there is no large-scale dataset available for researchers to study IaC Terraform programs systematically. To address this gap, we present TerraDS, the first dataset of publicly available Terraform programs written in HCL. TerraDS contains the HCL code and the metadata of 67,360 open source repositories with permissive open-source licenses. The dataset includes 279,344 Terraform modules with 1,773,991 registered resources, all compiled into a reusable archive (~335 MB).}
20
+
abstract = {Infrastructure as Code (IaC) has become a cornerstone of modern cloud and system deployment, enabling automated and repeatable infrastructure provisioning. However, ensuring the correctness of IaC programs remains challenging due to their complexity and dynamic nature. In particular, IaC programs can exhibit different behaviors depending on the state of the resources they manage. Since these resources are deployed on external providers, accounting for their possible states is difficult, making the testing phase particularly challenging. This paper presents HIT, a novel unit-testing framework for IaC programs that effectively tests IaC code using relevant resource states. HIT combines fuzzing and concolic execution, two effective yet previously unexplored techniques for IaC code. Our experiments confirm that HIT achieves better code coverage than state-of-the-art approaches.},
abstract={E-graphs are a data structure to compactly represent a program space and reason about equality of program terms. E-graphs have been successfully applied to a number of domains, including program optimization and automated theorem proving. In many applications, however, it is necessary to reason about disequality of terms as well as equality. While disequality reasoning can be encoded, direct support for disequalities increases performance and simplifies the metatheory.\par In this paper, we develop a framework independent of a specific implementation to formally reason about e-graphs. For the first time, we prove the equivalence of e-graphs to the reflexive, symmetric, transitive, and congruent closure of the equivalence relation they are expected to encode. We use these results to present the first formalization of an extension of e-graphs that directly supports disequalities and prove an analytical result about their superior efficiency compared to embedding techniques that are commonly used in SMT solvers and automated verifiers. We further profile an SMT solver and find that it spends a measurable amount of time handling disequalities.\par We implement our approach in an extension to egg, a popular e-graph Rust library. We evaluate our solution in an SMT solver and an automated theorem prover using standard benchmarks. The results indicate that direct support for disequalities outperforms other encodings based on equality embedding, confirming the results obtained analytically.},
45
+
abstract = {E-graphs are a data structure to compactly represent a program space and reason about equality of program terms. E-graphs have been successfully applied to a number of domains, including program optimization and automated theorem proving. In many applications, however, it is necessary to reason about disequality of terms as well as equality. While disequality reasoning can be encoded, direct support for disequalities increases performance and simplifies the metatheory.\par In this paper, we develop a framework independent of a specific implementation to formally reason about e-graphs. For the first time, we prove the equivalence of e-graphs to the reflexive, symmetric, transitive, and congruent closure of the equivalence relation they are expected to encode. We use these results to present the first formalization of an extension of e-graphs that directly supports disequalities and prove an analytical result about their superior efficiency compared to embedding techniques that are commonly used in SMT solvers and automated verifiers. We further profile an SMT solver and find that it spends a measurable amount of time handling disequalities.\par We implement our approach in an extension to egg, a popular e-graph Rust library. We evaluate our solution in an SMT solver and an automated theorem prover using standard benchmarks. The results indicate that direct support for disequalities outperforms other encodings based on equality embedding, confirming the results obtained analytically.},
61
46
}
62
47
48
+
@inproceedings{Bühler:2025:TerraDS,
49
+
acronym = {MSR},
50
+
file = {papers/2025_terra-ds-hcl-dataset.pdf},
51
+
author = {Bühler, Christoph and Spielmann, David and Meier, Roland and Salvaneschi, Guido},
52
+
title = {{TerraDS}: A Dataset for {Terraform} {HCL} Programs},
53
+
booktitle = {Proceedings of the IEEE/ACM 22nd International Conference on Mining Software Repositories},
abstract = {Infrastructure as Code (IaC) aims to automate infrastructure management by enabling the definition of infrastructure configurations in programs, rather than manually configuring hardware or cloud resources. Terraform is one of the most widely used IaC tools, gaining significant traction in recent years, as highlighted by its large and active user community and widespread adoption in both open-source and enterprise environments. Terraform’s code is written in the HashiCorp Configuration Language (HCL), which defines the infrastructure in a declarative manner. Despite the widespread adoption of Terraform, there is no large-scale dataset available for researchers to study IaC Terraform programs systematically. To address this gap, we present TerraDS, the first dataset of publicly available Terraform programs written in HCL. TerraDS contains the HCL code and the metadata of 67,360 open source repositories with permissive open-source licenses. The dataset includes 279,344 Terraform modules with 1,773,991 registered resources, all compiled into a reusable archive (~335 MB).},
author = {Köhler, Mirko and Zakhour, George and Weisenburger, Pascal and Salvaneschi, Guido},
83
88
title = {Consistent Local-First Software: Enforcing Safety and Invariants for Local-First Applications},
84
89
year = {2024},
85
90
month = sep,
91
+
issue_date = {January 2025},
86
92
journal = {IEEE Transactions on Software Engineering},
87
-
numpages = {12},
93
+
volume = {51},
94
+
number = {1},
95
+
numpages = {13},
96
+
pages = {53--65},
97
+
publisher = {IEEE},
98
+
address = {Piscataway, NJ, USA},
88
99
issn = {0098-5589},
89
100
doi = {10.1109/TSE.2024.3477723},
90
101
abstract = {Local-first software embraces data replication as a means to achieve scalability and offline availability. A crucial ingredient of local-first software are mergeable data types, like conflict-free replicated data types (CRDTs), which feature eventual consistency by enabling processes to access data locally and later merge it with other replicas in an asynchronous manner. Notably, the merging process needs to adhere to application constraints for correctness. Ensuring such application-level invariants poses a challenge, as developers must reason about the replicated program state and resort to manual synchronization of specific application components to enforce the invariant.\par This paper introduces ConLoc (Consistent Local-First Software), a novel system designed to automatically enforce safety and maintain invariants in local-first applications. ConLoc effectively addresses the issue of preserving invariants in the execution of programs with replicated data types, including CRDTs. Our approach is able to verify the correctness of many CRDTs examined in the literature and in implementations, such the ones used in the Riak database. ConLoc ensures that applications are automatically synchronized correctly, resulting in substantial latency and throughput improvements when compared to sequential execution, while upholding the same set of invariants.},
0 commit comments