Skip to content

Commit 46ebca5

Browse files
authored
Add heterogeneous doctemplate (#31)
* Add heterogeneous doctemplate * Remove comments
1 parent 7709c1d commit 46ebca5

2 files changed

Lines changed: 312 additions & 0 deletions

File tree

src/main/java/utils/docgen/DocumentGenerator.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import utils.key.SimpleKey;
1919
import utils.val.Cars;
2020
import utils.val.Hotel;
21+
import utils.val.HeterogeneousHotel;
2122
import utils.val.MiniCars;
2223
import utils.val.NimbusM;
2324
import utils.val.NimbusP;
@@ -147,6 +148,8 @@ else if (valClass.equals(NimbusM.class.getSimpleName()))
147148
this.valInstance = NimbusM.class;
148149
else if (valClass.equals(Hotel.class.getSimpleName()))
149150
this.valInstance = Hotel.class;
151+
else if (valClass.equals(HeterogeneousHotel.class.getSimpleName()))
152+
this.valInstance = HeterogeneousHotel.class;
150153
else if (valClass.equals(Cars.class.getSimpleName()))
151154
this.valInstance = Cars.class;
152155
else if (valClass.equals(MiniCars.class.getSimpleName()))
Lines changed: 309 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,309 @@
1+
/*
2+
Primitive type: BOOLEAN, STRING, BIGINT, or DOUBLE
3+
Special type: NULL or MISSING
4+
Composite type: OBJECT, array, or MULTISET
5+
*/
6+
7+
package utils.val;
8+
9+
import java.nio.ByteBuffer;
10+
import java.nio.ByteOrder;
11+
import java.time.LocalDateTime;
12+
import java.time.format.DateTimeFormatter;
13+
import java.time.temporal.ChronoUnit;
14+
import java.util.ArrayList;
15+
import java.util.Arrays;
16+
import java.util.Base64;
17+
import java.util.HashMap;
18+
import java.util.List;
19+
import java.util.Map;
20+
import java.util.Random;
21+
22+
import com.couchbase.client.java.json.JsonArray;
23+
import com.couchbase.client.java.json.JsonObject;
24+
import com.github.javafaker.Faker;
25+
26+
import utils.docgen.WorkLoadSettings;
27+
28+
public class HeterogeneousHotel {
29+
Faker faker;
30+
private Random random;
31+
private ArrayList<String> addresses = new ArrayList<String>();
32+
private ArrayList<String> city = new ArrayList<String>();
33+
private ArrayList<String> country = new ArrayList<String>();
34+
private List<String> htypes = Arrays.asList("Inn", "Hostel", "Place", "Center", "Hotel", "Motel", "Suites");
35+
private ArrayList<String> emails = new ArrayList<String>();
36+
private ArrayList<ArrayList<String>> likes = new ArrayList<ArrayList<String>>();
37+
// private ArrayList<String> names = new ArrayList<String>();
38+
private ArrayList<String> url = new ArrayList<String>();
39+
private ArrayList<ArrayList<JsonObject>> reviews = new ArrayList<ArrayList<JsonObject>>();
40+
private int mutate;
41+
private List<String> mutate_field_list = new ArrayList<>();
42+
43+
public WorkLoadSettings ws;
44+
private float[] flt_buf;
45+
private int flt_buf_length;
46+
47+
private double heterogeneity = 1;
48+
private int hotelRoundRobinIndex = 0;
49+
50+
public HeterogeneousHotel() {
51+
super();
52+
}
53+
54+
public HeterogeneousHotel(WorkLoadSettings ws) {
55+
super();
56+
this.ws = ws;
57+
this.random = new Random();
58+
this.random.setSeed(ws.keyPrefix.hashCode());
59+
faker = new Faker(random);
60+
for (int index = 0; index < 4096; index++) {
61+
addresses.add(faker.address().streetAddress());
62+
city.add(faker.address().city());
63+
country.add(faker.address().country());
64+
String fn = faker.name().firstName();
65+
String ln = faker.name().lastName();
66+
// names.add(faker.name().fullName());
67+
emails.add(fn + '.' + ln + "@heterogeneoushotels.com");
68+
country.add(faker.address().country());
69+
70+
ArrayList<String> temp = new ArrayList<String>();
71+
int numLikes = this.random.nextInt(10);
72+
for (int n = 0; n <= numLikes; n++) {
73+
temp.add(faker.name().fullName());
74+
}
75+
this.likes.add(temp);
76+
url.add(faker.internet().url());
77+
this.setReviewsArray();
78+
}
79+
this.flt_buf = new float[1024 * 1024];
80+
81+
for (int index = 0; index < 1024 * 1024; index++) {
82+
float x = this.random.nextFloat();
83+
this.flt_buf[index] = x;
84+
}
85+
this.flt_buf_length = this.flt_buf.length;
86+
}
87+
88+
public void setReviewsArray() {
89+
int numReviews = this.random.nextInt(10);
90+
LocalDateTime now = LocalDateTime.now();
91+
ArrayList<JsonObject> temp = new ArrayList<JsonObject>();
92+
for (int n = 0; n <= numReviews; n++) {
93+
JsonObject review = JsonObject.create();
94+
review.put("author", faker.name().fullName());
95+
review.put("date",
96+
now.plus(n, ChronoUnit.WEEKS).format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")));
97+
JsonObject ratings = JsonObject.create();
98+
ratings.put("Check in / front desk", this.random.nextInt(5));
99+
ratings.put("Cleanliness", this.random.nextInt(5));
100+
ratings.put("Overall", this.random.nextInt(5));
101+
ratings.put("Rooms", this.random.nextInt(5));
102+
ratings.put("Value", this.random.nextInt(5));
103+
review.put("ratings", ratings);
104+
temp.add(review);
105+
}
106+
this.reviews.add(temp);
107+
}
108+
109+
public static byte[] floatsToBytes(float[] floats) {
110+
byte bytes[] = new byte[Float.BYTES * floats.length];
111+
ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN).asFloatBuffer().put(floats);
112+
return bytes;
113+
}
114+
115+
public static String convertToBase64Bytes(float[] floats) {
116+
return Base64.getEncoder().encodeToString(floatsToBytes(floats));
117+
}
118+
119+
private float[] get_float_array(int length, Random random_obj) {
120+
int _slice = random_obj.nextInt(this.flt_buf_length - length);
121+
// return flt_buf_al.subList(_slice, _slice + length);
122+
return Arrays.copyOfRange(this.flt_buf, _slice, _slice + length);
123+
}
124+
125+
private Object getPrice() {
126+
double stringProbability = 0.5 * this.heterogeneity;
127+
Object price = 500 + this.random.nextInt(1500); // between 500 and 1500
128+
if (this.random.nextDouble() < stringProbability) {
129+
price = String.valueOf(price);
130+
}
131+
return price;
132+
}
133+
134+
private Object getName() {
135+
double dictProbability = 0.5 * this.heterogeneity;
136+
String fn = faker.name().firstName();
137+
String ln = faker.name().lastName();
138+
139+
Object name = fn + " " + ln;
140+
if (this.random.nextDouble() < dictProbability) {
141+
Map<String, Object> nameMap = new HashMap<>();
142+
nameMap.put("firstname", fn);
143+
nameMap.put("lastname", ln);
144+
name = nameMap;
145+
}
146+
return name;
147+
}
148+
149+
private Object getEmail() {
150+
double dictProbability = (1.0 / 3.0) * this.heterogeneity; // Smoothly scales from 0 to 1/3
151+
152+
String email = faker.internet().emailAddress();
153+
154+
double randomValue = this.random.nextDouble();
155+
if (this.heterogeneity == 0.0 || randomValue < 1 - 2 * dictProbability) {
156+
// Always string (if heterogeneity == 0) or chosen as string
157+
return email;
158+
} else if (randomValue < 1 - dictProbability) {
159+
// Return null
160+
return null;
161+
} else {
162+
// Indicate *missing* by returning a special marker
163+
return "MISSING_FIELD"; // Assuming you have a static final Object MISSING_FIELD
164+
}
165+
}
166+
167+
private Object getAvgRating() {
168+
double dictProbability = (1.0 / 3.0) * this.heterogeneity; // Smoothly scales from 0 to 1/3
169+
170+
double rating = this.random.nextDouble() * 5.0; // Example: 0.0 to 5.0
171+
172+
double randomValue = this.random.nextDouble();
173+
if (this.heterogeneity == 0.0 || randomValue < 1 - 2 * dictProbability) {
174+
// Always double (if heterogeneity == 0) or chosen as double
175+
return rating;
176+
} else if (randomValue < 1 - dictProbability) {
177+
// Return string version of double
178+
return String.format("%.2f", rating);
179+
} else {
180+
// Return null
181+
return null;
182+
}
183+
}
184+
185+
private Object getOverallRating() {
186+
int[] possibleRatings = {1, 2, 3, 4, 5};
187+
int index = hotelRoundRobinIndex % 5;
188+
hotelRoundRobinIndex++;
189+
190+
return possibleRatings[index];
191+
}
192+
193+
private Object freeParking() {
194+
double dictProbability = (1.0 / 3.0) * this.heterogeneity;
195+
boolean boolValue = this.random.nextBoolean();
196+
197+
double randomValue = this.random.nextDouble();
198+
if (this.heterogeneity == 0.0 || randomValue < 1 - 2 * dictProbability) {
199+
return boolValue;
200+
} else if (randomValue < 1 - dictProbability) {
201+
return null;
202+
} else {
203+
return "MISSING_FIELD";
204+
}
205+
}
206+
207+
private Object publicLikes() {
208+
int outerSize = 5 + this.random.nextInt(4); // random(5,8)
209+
210+
List<Object> outerList = new ArrayList<>();
211+
for (int i = 0; i < outerSize; i++) {
212+
if (this.heterogeneity == 1.0 && this.random.nextBoolean()) {
213+
// Nested list of strings (one level deep)
214+
int innerSize = 2 + this.random.nextInt(3); // random(2,4) for variety
215+
List<String> innerList = new ArrayList<>();
216+
for (int j = 0; j < innerSize; j++) {
217+
innerList.add(faker.lorem().word());
218+
}
219+
outerList.add(innerList);
220+
} else {
221+
// Simple string
222+
outerList.add(faker.lorem().word());
223+
}
224+
}
225+
return outerList;
226+
}
227+
228+
229+
public JsonObject next(String key) {
230+
this.random = new Random();
231+
JsonObject jsonObject = JsonObject.create();
232+
this.random.setSeed((key).hashCode());
233+
if (this.ws.mutated > 0) {
234+
this.random.setSeed((key + Integer.toString(this.ws.mutated)).hashCode());
235+
}
236+
int index = this.random.nextInt(4096);
237+
jsonObject.put("free_breakfast", this.random.nextBoolean());
238+
Object freeParking = freeParking();
239+
if (freeParking != "MISSING_FIELD") {
240+
jsonObject.put("free_parking", freeParking);
241+
}
242+
jsonObject.put("phone", faker.phoneNumber().phoneNumber());
243+
jsonObject.put("name", getName());
244+
jsonObject.put("price", getPrice());
245+
jsonObject.put("avg_rating", getAvgRating());
246+
jsonObject.put("overall_rating", getOverallRating());
247+
jsonObject.put("address", this.addresses.get(index));
248+
jsonObject.put("city", this.city.get(index));
249+
jsonObject.put("country", this.country.get(index));
250+
Object email = getEmail();
251+
if (email != "MISSING_FIELD") {
252+
jsonObject.put("email", email);
253+
}
254+
jsonObject.put("public_likes", publicLikes());
255+
jsonObject.put("reviews", this.reviews.get(index));
256+
jsonObject.put("type", this.htypes.get(index % htypes.size()));
257+
jsonObject.put("url", this.url.get(index));
258+
jsonObject.put("mutate", this.mutate);
259+
if (this.ws.mutated > 0 && !this.ws.mutate_field.isEmpty()) {
260+
this.random.setSeed((key).hashCode());
261+
index = this.random.nextInt(4096);
262+
mutate_field_list = Arrays.asList(this.ws.mutate_field.split(","));
263+
if (!mutate_field_list.contains("address"))
264+
jsonObject.put("address", this.addresses.get(index));
265+
if (!mutate_field_list.contains("city"))
266+
jsonObject.put("city", this.city.get(index));
267+
if (!mutate_field_list.contains("country"))
268+
jsonObject.put("country", this.country.get(index));
269+
if (!mutate_field_list.contains("email"))
270+
jsonObject.put("email", getEmail());
271+
if (!mutate_field_list.contains("name"))
272+
jsonObject.put("name", getName());
273+
if (!mutate_field_list.contains("public_likes"))
274+
jsonObject.put("public_likes", this.likes.get(index));
275+
if (!mutate_field_list.contains("reviews"))
276+
jsonObject.put("reviews", this.reviews.get(index));
277+
if (!mutate_field_list.contains("type"))
278+
jsonObject.put("type", this.htypes.get(index % htypes.size()));
279+
if (!mutate_field_list.contains("url"))
280+
jsonObject.put("url", this.url.get(index));
281+
if (!mutate_field_list.contains("free_breakfast"))
282+
jsonObject.put("free_breakfast", this.random.nextBoolean());
283+
if (!mutate_field_list.contains("free_parking"))
284+
jsonObject.put("free_parking", this.random.nextBoolean());
285+
if (!mutate_field_list.contains("phone"))
286+
jsonObject.put("phone", faker.phoneNumber().phoneNumber());
287+
if (!mutate_field_list.contains("price"))
288+
jsonObject.put("price", getPrice());
289+
if (!mutate_field_list.contains("avg_rating"))
290+
jsonObject.put("avg_rating", this.random.nextFloat() * 5);
291+
}
292+
jsonObject.put("mutate", this.ws.mutated);
293+
294+
if (this.ws.mockVector) {
295+
float[] vector = null;
296+
vector = this.get_float_array(this.ws.dim, this.random);
297+
if (this.ws.base64)
298+
jsonObject.put("vector", convertToBase64Bytes(vector));
299+
else {
300+
JsonArray floatVector = JsonArray.create();
301+
for (int i = 0; i < vector.length; i++) {
302+
floatVector.add(Float.valueOf(vector[i]));
303+
}
304+
jsonObject.put("vector", floatVector);
305+
}
306+
}
307+
return jsonObject;
308+
}
309+
}

0 commit comments

Comments
 (0)