Skip to content

Commit 8c17a0b

Browse files
committed
[ADD] Add files for release 4.2.2
1 parent eb9391b commit 8c17a0b

19 files changed

+498
-157
lines changed

AUTHORS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,4 @@ Alberto Ottimo, contributor
1919
Matteo Della Bartola, contributor
2020
Simone Frassinelli, contributor
2121
Yuriy Rymarchuk, contributor
22+
Andrea Filippi, contributor

tests/join_tests/join_common.hpp

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -42,17 +42,6 @@ struct tuple_t
4242
int64_t value;
4343
};
4444

45-
template<>
46-
struct std::hash<tuple_t>
47-
{
48-
size_t operator()(const tuple_t &t) const
49-
{
50-
size_t h1 = std::hash<int64_t>()(t.value);
51-
size_t h2 = std::hash<size_t>()(t.key);
52-
return h1 ^ h2;
53-
}
54-
};
55-
5645
struct res_t
5746
{
5847
size_t key;

wf/interval_join.hpp

Lines changed: 12 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -78,22 +78,16 @@ class IJoin_Replica: public Basic_Replica
7878
using iterator_t = typename container_t::iterator; // iterator type for accessing wrapped tuples in the archive
7979
using compare_func_t = std::function<bool(const wrapper_t &, const uint64_t &)>; // function type to compare wrapped tuple to an uint64
8080

81-
/**
82-
* @brief Structure to store statistics about an archive.
83-
*/
84-
struct Archive_Stats
81+
struct Archive_Stats // structure to store statistics about an archive
8582
{
86-
// Total size of the archive.
87-
size_t size;
83+
size_t size; // total size of the archive
84+
uint64_t size_count; // number of times the size of the archive has been recorded
8885

89-
// Number of times the size of the archive has been recorded
90-
uint64_t size_count;
91-
92-
// Default constructor for Archive_Stats
86+
// Constructor
9387
Archive_Stats():
9488
size(0),
9589
size_count(0) {}
96-
90+
9791
// Records the size of the archive
9892
void recordSize(uint64_t _size)
9993
{
@@ -114,13 +108,15 @@ class IJoin_Replica: public Basic_Replica
114108
{
115109
JoinArchive<tuple_t, compare_func_t> archiveA; // archive of stream A tuples of this key
116110
JoinArchive<tuple_t, compare_func_t> archiveB; // archive of stream B tuples of this key
117-
Archive_Stats archive_metrics;
111+
Archive_Stats archive_metrics; // archive of statistics for this key
112+
uint64_t partitioning_counter; // counter used in DP mode to establish which replica will save the given tuple
118113

119114
// Constructor
120115
Key_Descriptor(compare_func_t _compare_func):
121116
archiveA(_compare_func),
122117
archiveB(_compare_func),
123-
archive_metrics(Archive_Stats()) {}
118+
archive_metrics(Archive_Stats()),
119+
partitioning_counter(0) {}
124120

125121
// recordSize method
126122
void recordSize()
@@ -139,21 +135,6 @@ class IJoin_Replica: public Basic_Replica
139135
size_t id_inner; // id_inner value
140136
size_t num_inner; // num_inner value
141137

142-
// Calculates the FNV-1a hash value for the given key
143-
const size_t fnv1a_hash(const void* key,
144-
const size_t len = sizeof(uint64_t))
145-
{
146-
const char* data = (char *)key;
147-
const size_t prime = 0x1000193;
148-
size_t hash = 0x811c9dc5;
149-
for(int i = 0; i<len; i++) {
150-
uint8_t value = data[i];
151-
hash = hash ^ value;
152-
hash *= prime;
153-
}
154-
return hash;
155-
}
156-
157138
// Checks if the given Join_Stream_t is Stream A
158139
bool isStreamA(Join_Stream_t stream) const
159140
{
@@ -334,21 +315,9 @@ class IJoin_Replica: public Basic_Replica
334315
insertIntoBuffer(key_d, wrapper_t(_tuple, _timestamp), _tag);
335316
}
336317
else if (joinMode == Join_Mode_t::DP) {
337-
if constexpr(if_defined_hash<tuple_t>) {
338-
// compute the hash index of the tuple given a defined hash function specialization for the tuple_t
339-
size_t hash = std::hash<tuple_t>()(_tuple);
340-
size_t hash_idx = (hash % num_inner);
341-
if (hash_idx == id_inner) {
342-
insertIntoBuffer(key_d, wrapper_t(_tuple, _timestamp), _tag);
343-
}
344-
}
345-
else {
346-
// compute the hash index of the tuple using FNV-1a hash function using the timestamp
347-
size_t hash = fnv1a_hash(&_timestamp);
348-
size_t hash_idx = (hash % num_inner);
349-
if (hash_idx == id_inner) {
350-
insertIntoBuffer(key_d, wrapper_t(_tuple, _timestamp), _tag);
351-
}
318+
key_d.partitioning_counter++;
319+
if (key_d.partitioning_counter % num_inner == id_inner) {
320+
insertIntoBuffer(key_d, wrapper_t(_tuple, _timestamp), _tag);
352321
}
353322
}
354323
if (this->execution_mode == Execution_Mode_t::DEFAULT) {
@@ -363,17 +332,6 @@ class IJoin_Replica: public Basic_Replica
363332
last_time = _timestamp;
364333
}
365334
}
366-
#if defined (WF_JOIN_MEASUREMENT)
367-
// Measure the size of the archives every 200ms
368-
uint64_t delta = (current_time_nsecs() - last_measured_size_time) / 1e06; //ms
369-
if (delta >= 200) {
370-
for (auto &k: keyMap) {
371-
Key_Descriptor &key_m_d = (k.second);
372-
(key_m_d.recordSize());
373-
}
374-
last_measured_size_time = current_time_nsecs();
375-
}
376-
#endif
377335
}
378336

379337
double getArchiveMeanSize() const
@@ -576,61 +534,11 @@ class Interval_Join: public Basic_Operator
576534
// Destructor
577535
~Interval_Join() override
578536
{
579-
#if defined(WF_JOIN_MEASUREMENT)
580-
if (this->isTerminated()) {
581-
printArchivePerKeyStats();
582-
}
583-
#endif
584537
for (auto *r: replicas) { // delete all the replicas
585538
delete r;
586539
}
587540
}
588541

589-
/**
590-
* @brief Print statistics of the archive per key.
591-
*
592-
* This function calculates and prints various statistics about the archive per key.
593-
* It calculates the mean archive size for each replica and checks the distribution
594-
* of the archive sizes and determines if it is balanced or not.
595-
*
596-
* @note This function assumes that the `replicas` vector is already populated with
597-
* valid replica objects.
598-
*
599-
* @note The balance check is determined based on the coefficient of variation (cv) value.
600-
* If the cv is less than 20, it is considered balanced and marked with a checkmark,
601-
* otherwise it is considered unbalanced and marked with a cross.
602-
*
603-
* @note The function uses the `std::cout` stream to print the statistics.
604-
*/
605-
void printArchivePerKeyStats()
606-
{
607-
std::cout << "***" << std::endl;
608-
std::cout << "Archive Stats: " << std::endl;
609-
uint64_t num_replicas = replicas.size();
610-
double acc_mean = 0.0;
611-
int i = 0;
612-
for (auto *r: replicas) {
613-
auto mean = r->getArchiveMeanSize();
614-
std::cout << (i+1) << " Replica mean -> " << mean << std::endl;
615-
acc_mean += mean;
616-
i++;
617-
}
618-
double mean_size = acc_mean / num_replicas;
619-
double size_in_mb = mean_size * sizeof(tuple_t) / 1024;
620-
std::cout << "Global Mean Archive Size -> " << mean_size << " | " << size_in_mb << " KB" << std::endl;
621-
// Check distribution
622-
double variance = 0;
623-
for (auto *r: replicas) {
624-
variance += std::pow(r->getArchiveMeanSize() - mean_size, 2);
625-
}
626-
variance /= num_replicas;
627-
double cv = variance != 0 ? sqrt(variance) / mean_size * 100 : 0.0; //coefficient of variation
628-
std::string check_balance = cv < 20 ? "" : "";
629-
std::cout << std::fixed << std::setprecision(2);
630-
std::cout << "Variance -> " << variance << " | Coefficient of variation -> " << cv << "| Balanced Distribution ->" << check_balance << std::endl;
631-
std::cout << "***" << std::endl;
632-
}
633-
634542
/**
635543
* \brief Get the type of the Interval Join as a string
636544
* \return type of the Interval Join

wf/persistent/builders_rocksdb.hpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**************************************************************************************
2-
* Copyright (c) 2024- Gabriele Mencagli and Simone Frassinelli
2+
* Copyright (c) 2019- Gabriele Mencagli and Simone Frassinelli
33
*
44
* This file is part of WindFlow.
55
*
@@ -1283,6 +1283,7 @@ class P_Keyed_Windows_Builder: public P_Basic_Builder<P_Keyed_Windows_Builder, w
12831283
uint64_t slide_len=0; // slide length in number of tuples or in time units
12841284
uint64_t lateness=0; // lateness in time units
12851285
Win_Type_t winType=Win_Type_t::CB; // window type (CB or TB)
1286+
size_t cacheCapacity = 0; // capacity of the internal cache
12861287

12871288
public:
12881289
/**
@@ -1386,6 +1387,7 @@ class P_Keyed_Windows_Builder: public P_Basic_Builder<P_Keyed_Windows_Builder, w
13861387
new_builder.slide_len = slide_len;
13871388
new_builder.lateness = lateness;
13881389
new_builder.winType = winType;
1390+
new_builder.cacheCapacity = cacheCapacity;
13891391
return new_builder;
13901392
}
13911393

@@ -1451,6 +1453,18 @@ class P_Keyed_Windows_Builder: public P_Basic_Builder<P_Keyed_Windows_Builder, w
14511453
return *this;
14521454
}
14531455

1456+
/**
1457+
* \brief Set the cache capacity
1458+
*
1459+
* \param _cacheCapacity number of keys to be cached by each replica of this operator
1460+
* \return a reference to the builder object
1461+
*/
1462+
auto &withCacheCapacity(size_t _cacheCapacity)
1463+
{
1464+
cacheCapacity = _cacheCapacity;
1465+
return *this;
1466+
}
1467+
14541468
/**
14551469
* \brief Create the P_Keyed_Windows
14561470
*
@@ -1505,7 +1519,8 @@ class P_Keyed_Windows_Builder: public P_Basic_Builder<P_Keyed_Windows_Builder, w
15051519
win_len,
15061520
slide_len,
15071521
lateness,
1508-
winType);
1522+
winType,
1523+
cacheCapacity);
15091524
}
15101525
};
15111526

wf/persistent/cache/cache.hpp

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/**************************************************************************************
2+
* Copyright (c) 2019- Gabriele Mencagli and Andrea Filippi
3+
*
4+
* This file is part of WindFlow.
5+
*
6+
* WindFlow is free software dual licensed under the GNU LGPL or MIT License.
7+
* You can redistribute it and/or modify it under the terms of the
8+
* * GNU Lesser General Public License as published by
9+
* the Free Software Foundation, either version 3 of the License, or
10+
* (at your option) any later version
11+
* OR
12+
* * MIT License: https://github.com/ParaGroup/WindFlow/blob/master/LICENSE.MIT
13+
*
14+
* WindFlow is distributed in the hope that it will be useful,
15+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
16+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17+
* GNU Lesser General Public License for more details.
18+
* You should have received a copy of the GNU Lesser General Public License and
19+
* the MIT License along with WindFlow. If not, see <http://www.gnu.org/licenses/>
20+
* and <http://opensource.org/licenses/MIT/>.
21+
**************************************************************************************
22+
*/
23+
24+
/**
25+
* @file cache.hpp
26+
* @author Gabriele Mencagli and Andrea Filippi
27+
*
28+
* @brief class defining the general interface of a cache used by the
29+
* P_Keyed_Windows with non-incremental processing
30+
*
31+
* @section Cache (Description)
32+
*
33+
* Abstract class of a cache used by the P_Keyed_Windows operator with non-incremental
34+
* processing functions.
35+
*/
36+
37+
#ifndef CACHE_H
38+
#define CACHE_H
39+
40+
#include<optional>
41+
42+
namespace wf {
43+
44+
// class Cache
45+
template<typename key_t, typename value_t>
46+
class Cache
47+
{
48+
public:
49+
// Destructor
50+
virtual ~Cache() = default;
51+
52+
// Insert a new value associated with key in the cache
53+
virtual void put(const key_t &_key, const value_t &_value) = 0;
54+
55+
// Read the value associated with a key (returns std::nullopt otherwise)
56+
virtual std::optional<value_t> get(const key_t &_key) = 0;
57+
58+
// Remove a key entry from the cache
59+
virtual void remove(const key_t &_key) = 0;
60+
61+
// Empty the cache
62+
virtual void clear() = 0;
63+
64+
// Check if a key is present in the cache
65+
virtual bool exists(const key_t &_key) const = 0;
66+
67+
// Get the number of elements in the cache
68+
virtual size_t size() const = 0;
69+
70+
// Get the maximum capacity of the cache
71+
virtual size_t capacity() const = 0;
72+
};
73+
74+
} // namespace wf
75+
76+
#endif

0 commit comments

Comments
 (0)