Skip to content

Commit 83d4346

Browse files
committed
it just works
1 parent fb9e52b commit 83d4346

File tree

3 files changed

+106
-69
lines changed

3 files changed

+106
-69
lines changed
Lines changed: 88 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,96 +1,128 @@
1-
use std::{io::{SeekFrom, Write}, os::unix::fs::OpenOptionsExt};
1+
use std::{alloc::{self, Layout}, io::{Read, Seek, SeekFrom, Write}, os::unix::fs::OpenOptionsExt};
22

3-
use bytes::{BufMut, BytesMut};
3+
use bytes::{BufMut, Bytes, BytesMut};
44
use iggy::error::IggyError;
55
use tracing::warn;
6-
use tokio::{fs::OpenOptions, io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt, BufReader}};
6+
use tokio::{fs::OpenOptions, io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt, BufReader}, task::spawn_blocking};
77
use crate::streaming::batching::message_batch::{RetainedMessageBatch, RETAINED_BATCH_OVERHEAD};
88

99
#[derive(Debug, Default)]
1010
pub struct DirectIOStorage {
1111
}
1212

1313
impl DirectIOStorage {
14-
pub async fn read_batches(&self, file_path: &str, start_position: u64, end_offset: u64) -> Result<Vec<RetainedMessageBatch>, IggyError> {
15-
let file = OpenOptions::new().read(true).custom_flags(libc::O_DIRECT).open(file_path).await?;
16-
warn!("start_position: {}", start_position);
17-
18-
let sector_size = 4096;
14+
pub async fn read_batches(&self, file_path: &str, start_position: u64, end_position: u64) -> Result<Vec<RetainedMessageBatch>, IggyError> {
15+
//let mut file = OpenOptions::new().read(true).custom_flags(libc::O_DIRECT).open(file_path).await?;
16+
let mut file = std::fs::File::options().read(true).custom_flags(libc::O_DIRECT).open(file_path)?;
17+
file.seek(SeekFrom::Start(start_position))?;
1918
let mut batches = Vec::new();
20-
let file_size = file.metadata().await?.len();
19+
let file_size = file.metadata()?.len();
2120
if file_size == 0 {
22-
warn!("file_size is 0");
2321
return Ok(batches);
2422
}
23+
// Aloc the buf
24+
let buf_size = if start_position == end_position {
25+
file_size - start_position
26+
} else {
27+
end_position - start_position
28+
};
29+
let sector_size = 4096;
30+
let alignment = buf_size % sector_size;
31+
assert!(alignment == 0);
32+
33+
let layout = Layout::from_size_align(buf_size as _, sector_size as _).unwrap();
34+
let ptr = unsafe { alloc::alloc(layout) };
35+
// Not sure if this is required
36+
unsafe { std::ptr::write_bytes(ptr, 0, buf_size as _) };
37+
let mut bytes = unsafe {Vec::from_raw_parts(ptr, buf_size as _, buf_size as _)};
38+
let result = spawn_blocking(move || {
39+
if let Err(e) = file.read_exact(&mut bytes) {
40+
warn!("error reading batch: {}", e);
41+
}
42+
Self::serialize_batches(bytes, &mut batches);
43+
Ok(batches)
44+
}).await.unwrap();
45+
result
46+
}
2547

26-
let mut reader = BufReader::with_capacity(4096 * 1000, file);
27-
reader
28-
.seek(SeekFrom::Start(start_position as u64))
29-
.await?;
48+
fn serialize_batches(bytes: Vec<u8>, batches: &mut Vec<RetainedMessageBatch>) {
49+
let len = bytes.len();
50+
let mut read_bytes = 0;
51+
let sector_size = 4096;
3052

31-
let mut read_bytes = start_position as u64;
32-
let mut last_batch_to_read = false;
33-
while !last_batch_to_read {
34-
let Ok(batch_base_offset) = reader.read_u64_le().await else {
35-
break;
36-
};
37-
let batch_length = reader
38-
.read_u32_le()
39-
.await
40-
.map_err(|_| IggyError::CannotReadBatchLength)?;
41-
let last_offset_delta = reader
42-
.read_u32_le()
43-
.await
44-
.map_err(|_| IggyError::CannotReadLastOffsetDelta)?;
45-
let max_timestamp = reader
46-
.read_u64_le()
47-
.await
48-
.map_err(|_| IggyError::CannotReadMaxTimestamp)?;
53+
while read_bytes < len {
54+
// Read batch_base_offset
55+
let batch_base_offset = u64::from_le_bytes(
56+
bytes[read_bytes..read_bytes + 8]
57+
.try_into()
58+
.expect("Failed to read batch_base_offset"),
59+
);
60+
read_bytes += 8;
4961

50-
let last_offset = batch_base_offset + (last_offset_delta as u64);
62+
// Read batch_length
63+
let batch_length = u32::from_le_bytes(
64+
bytes[read_bytes..read_bytes + 4]
65+
.try_into()
66+
.expect("Failed to read batch_length"),
67+
);
68+
read_bytes += 4;
69+
70+
// Read last_offset_delta
71+
let last_offset_delta = u32::from_le_bytes(
72+
bytes[read_bytes..read_bytes + 4]
73+
.try_into()
74+
.expect("Failed to read last_offset_delta"),
75+
);
76+
read_bytes += 4;
77+
78+
// Read max_timestamp
79+
let max_timestamp = u64::from_le_bytes(
80+
bytes[read_bytes..read_bytes + 8]
81+
.try_into()
82+
.expect("Failed to read max_timestamp"),
83+
);
84+
read_bytes += 8;
85+
86+
// Calculate last_offset and other values
5187
let total_batch_size = batch_length + RETAINED_BATCH_OVERHEAD;
5288
let sectors = total_batch_size.div_ceil(sector_size);
5389
let adjusted_size = sector_size * sectors;
54-
warn!("adjusted_size: {}", adjusted_size);
5590
let diff = adjusted_size - total_batch_size;
5691

92+
// Read payload
5793
let payload_len = batch_length as usize;
58-
let mut payload = BytesMut::with_capacity(payload_len);
59-
payload.put_bytes(0, payload_len);
60-
if let Err(error) = reader.read_exact(&mut payload).await {
94+
let payload_start = read_bytes;
95+
let payload_end = read_bytes + payload_len;
96+
if payload_end > len {
6197
warn!(
62-
"Cannot read batch payload for batch with base offset: {batch_base_offset}, last offset delta: {last_offset_delta}, max timestamp: {max_timestamp}, batch length: {batch_length} and payload length: {payload_len}.\nProbably OS hasn't flushed the data yet, try setting `enforce_fsync = true` for partition configuration if this issue occurs again.\n{error}",
98+
"Cannot read batch payload for batch with base offset: {batch_base_offset}, last offset delta: {last_offset_delta}, max timestamp: {max_timestamp}, batch length: {batch_length} and payload length: {payload_len}.\nProbably OS hasn't flushed the data yet, try setting `enforce_fsync = true` for partition configuration if this issue occurs again."
6399
);
64100
break;
65101
}
66-
// TEMP
67-
let mut temp = BytesMut::with_capacity(diff as _);
68-
temp.put_bytes(0, diff as _);
69-
if let Err(e) = reader.read_exact(&mut temp).await {
70-
warn!("lol error reading padding");
71-
}
72-
73-
read_bytes += 8 + 4 + 4 + 8 + payload_len as u64;
74-
last_batch_to_read = read_bytes >= file_size || last_offset == end_offset;
75102

103+
// Ergh....
104+
let payload = Bytes::copy_from_slice(&bytes[payload_start..payload_end]);
105+
read_bytes = payload_end + diff as usize;
76106
let batch = RetainedMessageBatch::new(
77107
batch_base_offset,
78108
last_offset_delta,
79109
max_timestamp,
80110
batch_length,
81-
payload.freeze(),
111+
payload,
82112
);
83113
batches.push(batch);
84114
}
85-
Ok(batches)
86115
}
87116

88-
pub async fn write_batches(&self, file_path: &str, bytes: &[u8]) -> Result<u32, IggyError> {
89-
//let mut std_file = std::fs::File::options().append(true).custom_flags(libc::O_DIRECT).open(file_path)?;
90-
let mut file = OpenOptions::new().append(true).custom_flags(libc::O_DIRECT).open(file_path).await?;
91-
if let Err(e) = file.write_all(bytes).await {
92-
warn!("error writing: {}", e);
93-
}
94-
Ok(bytes.len() as _)
117+
pub async fn write_batches(&self, file_path: &str, bytes: Vec<u8>) -> Result<u32, IggyError> {
118+
let mut std_file = std::fs::File::options().append(true).custom_flags(libc::O_DIRECT).open(file_path)?;
119+
//let mut file = OpenOptions::new().append(true).custom_flags(libc::O_DIRECT).open(file_path).await?;
120+
let size = bytes.len() as _;
121+
spawn_blocking(move || {
122+
if let Err(e) = std_file.write_all(&bytes) {
123+
warn!("error writing: {}", e);
124+
}
125+
}).await.unwrap();
126+
Ok(size)
95127
}
96128
}

server/src/streaming/segments/index.rs

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use crate::streaming::segments::segment::Segment;
22
use iggy::error::IggyError;
33
use iggy::error::IggyError::InvalidOffset;
4+
use tracing::warn;
45

56
#[derive(Debug, Eq, Clone, Copy, Default)]
67
pub struct Index {
@@ -32,10 +33,19 @@ impl Segment {
3233
let ending_offset_idx = binary_search_index(indices, end_offset);
3334

3435
match (starting_offset_idx, ending_offset_idx) {
35-
(Some(starting_offset_idx), Some(ending_offset_idx)) => Ok(IndexRange {
36-
start: indices[starting_offset_idx],
37-
end: indices[ending_offset_idx],
38-
}),
36+
(Some(starting_offset_idx), Some(ending_offset_idx)) =>
37+
{
38+
// UGLY AS FOOOOOOOOOOOOOK, but will deal with it later on.
39+
let end_idx = if ending_offset_idx == indices.len() - 1 {
40+
ending_offset_idx
41+
} else {
42+
ending_offset_idx + 1
43+
};
44+
Ok(IndexRange {
45+
start: indices[starting_offset_idx],
46+
end: indices[end_idx],
47+
})
48+
},
3949
(Some(starting_offset_idx), None) => Ok(IndexRange {
4050
start: indices[starting_offset_idx],
4151
end: *indices.last().unwrap(),

server/src/streaming/segments/messages.rs

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -167,21 +167,16 @@ impl Segment {
167167
let messages_count = (start_offset + end_offset) as usize;
168168
let path = self.log_path.as_str();
169169
let start_position = index_range.start.position;
170-
let end_offset = index_range.end.offset as u64 + self.start_offset;
170+
let end_position = index_range.end.position;
171171
let batch = self
172172
.direct_io_storage
173-
.read_batches(path, start_position as _, end_offset)
173+
.read_batches(path, start_position as _, end_position as _)
174174
.await?;
175-
error!("batches_count: {}", batch.len());
176175
let messages = batch
177176
.iter()
178-
.to_messages();
179-
/*
180177
.to_messages_with_filter(messages_count, &|msg| {
181178
msg.offset >= start_offset && msg.offset <= end_offset
182179
});
183-
*/
184-
error!("messages len: {}", messages.len());
185180
trace!(
186181
"Loaded {} messages from disk, segment start offset: {}, end offset: {}.",
187182
messages.len(),
@@ -247,7 +242,7 @@ impl Segment {
247242
}
248243

249244
pub async fn persist_messages(&mut self, fsync: bool) -> Result<usize, IggyError> {
250-
let sector_size = 512;
245+
let sector_size = 4096;
251246
let storage = self.direct_io_storage.clone();
252247
let index_storage = self.storage.segment.clone();
253248
if self.unsaved_messages.is_none() {
@@ -285,7 +280,7 @@ impl Segment {
285280
let mut bytes = unsafe {Vec::from_raw_parts(ptr, adjusted_size as _, adjusted_size as _)};
286281
let diff = bytes.len() as u32 - batch_size;
287282
batch.extend2(&mut bytes);
288-
let saved_bytes = storage.write_batches(self.log_path.as_str(), &bytes).await?;
283+
let saved_bytes = storage.write_batches(self.log_path.as_str(), bytes).await?;
289284
index_storage.save_index(&self.index_path, index).await?;
290285
self.last_index_position += adjusted_size;
291286
let size_increment = RETAINED_BATCH_OVERHEAD + diff;

0 commit comments

Comments
 (0)