|
1 |
| -use std::{io::{SeekFrom, Write}, os::unix::fs::OpenOptionsExt}; |
| 1 | +use std::{alloc::{self, Layout}, io::{Read, Seek, SeekFrom, Write}, os::unix::fs::OpenOptionsExt}; |
2 | 2 |
|
3 |
| -use bytes::{BufMut, BytesMut}; |
| 3 | +use bytes::{BufMut, Bytes, BytesMut}; |
4 | 4 | use iggy::error::IggyError;
|
5 | 5 | use tracing::warn;
|
6 |
| -use tokio::{fs::OpenOptions, io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt, BufReader}}; |
| 6 | +use tokio::{fs::OpenOptions, io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt, BufReader}, task::spawn_blocking}; |
7 | 7 | use crate::streaming::batching::message_batch::{RetainedMessageBatch, RETAINED_BATCH_OVERHEAD};
|
8 | 8 |
|
9 | 9 | #[derive(Debug, Default)]
|
10 | 10 | pub struct DirectIOStorage {
|
11 | 11 | }
|
12 | 12 |
|
13 | 13 | impl DirectIOStorage {
|
14 |
| - pub async fn read_batches(&self, file_path: &str, start_position: u64, end_offset: u64) -> Result<Vec<RetainedMessageBatch>, IggyError> { |
15 |
| - let file = OpenOptions::new().read(true).custom_flags(libc::O_DIRECT).open(file_path).await?; |
16 |
| - warn!("start_position: {}", start_position); |
17 |
| - |
18 |
| - let sector_size = 4096; |
| 14 | + pub async fn read_batches(&self, file_path: &str, start_position: u64, end_position: u64) -> Result<Vec<RetainedMessageBatch>, IggyError> { |
| 15 | + //let mut file = OpenOptions::new().read(true).custom_flags(libc::O_DIRECT).open(file_path).await?; |
| 16 | + let mut file = std::fs::File::options().read(true).custom_flags(libc::O_DIRECT).open(file_path)?; |
| 17 | + file.seek(SeekFrom::Start(start_position))?; |
19 | 18 | let mut batches = Vec::new();
|
20 |
| - let file_size = file.metadata().await?.len(); |
| 19 | + let file_size = file.metadata()?.len(); |
21 | 20 | if file_size == 0 {
|
22 |
| - warn!("file_size is 0"); |
23 | 21 | return Ok(batches);
|
24 | 22 | }
|
| 23 | + // Aloc the buf |
| 24 | + let buf_size = if start_position == end_position { |
| 25 | + file_size - start_position |
| 26 | + } else { |
| 27 | + end_position - start_position |
| 28 | + }; |
| 29 | + let sector_size = 4096; |
| 30 | + let alignment = buf_size % sector_size; |
| 31 | + assert!(alignment == 0); |
| 32 | + |
| 33 | + let layout = Layout::from_size_align(buf_size as _, sector_size as _).unwrap(); |
| 34 | + let ptr = unsafe { alloc::alloc(layout) }; |
| 35 | + // Not sure if this is required |
| 36 | + unsafe { std::ptr::write_bytes(ptr, 0, buf_size as _) }; |
| 37 | + let mut bytes = unsafe {Vec::from_raw_parts(ptr, buf_size as _, buf_size as _)}; |
| 38 | + let result = spawn_blocking(move || { |
| 39 | + if let Err(e) = file.read_exact(&mut bytes) { |
| 40 | + warn!("error reading batch: {}", e); |
| 41 | + } |
| 42 | + Self::serialize_batches(bytes, &mut batches); |
| 43 | + Ok(batches) |
| 44 | + }).await.unwrap(); |
| 45 | + result |
| 46 | + } |
25 | 47 |
|
26 |
| - let mut reader = BufReader::with_capacity(4096 * 1000, file); |
27 |
| - reader |
28 |
| - .seek(SeekFrom::Start(start_position as u64)) |
29 |
| - .await?; |
| 48 | + fn serialize_batches(bytes: Vec<u8>, batches: &mut Vec<RetainedMessageBatch>) { |
| 49 | + let len = bytes.len(); |
| 50 | + let mut read_bytes = 0; |
| 51 | + let sector_size = 4096; |
30 | 52 |
|
31 |
| - let mut read_bytes = start_position as u64; |
32 |
| - let mut last_batch_to_read = false; |
33 |
| - while !last_batch_to_read { |
34 |
| - let Ok(batch_base_offset) = reader.read_u64_le().await else { |
35 |
| - break; |
36 |
| - }; |
37 |
| - let batch_length = reader |
38 |
| - .read_u32_le() |
39 |
| - .await |
40 |
| - .map_err(|_| IggyError::CannotReadBatchLength)?; |
41 |
| - let last_offset_delta = reader |
42 |
| - .read_u32_le() |
43 |
| - .await |
44 |
| - .map_err(|_| IggyError::CannotReadLastOffsetDelta)?; |
45 |
| - let max_timestamp = reader |
46 |
| - .read_u64_le() |
47 |
| - .await |
48 |
| - .map_err(|_| IggyError::CannotReadMaxTimestamp)?; |
| 53 | + while read_bytes < len { |
| 54 | + // Read batch_base_offset |
| 55 | + let batch_base_offset = u64::from_le_bytes( |
| 56 | + bytes[read_bytes..read_bytes + 8] |
| 57 | + .try_into() |
| 58 | + .expect("Failed to read batch_base_offset"), |
| 59 | + ); |
| 60 | + read_bytes += 8; |
49 | 61 |
|
50 |
| - let last_offset = batch_base_offset + (last_offset_delta as u64); |
| 62 | + // Read batch_length |
| 63 | + let batch_length = u32::from_le_bytes( |
| 64 | + bytes[read_bytes..read_bytes + 4] |
| 65 | + .try_into() |
| 66 | + .expect("Failed to read batch_length"), |
| 67 | + ); |
| 68 | + read_bytes += 4; |
| 69 | + |
| 70 | + // Read last_offset_delta |
| 71 | + let last_offset_delta = u32::from_le_bytes( |
| 72 | + bytes[read_bytes..read_bytes + 4] |
| 73 | + .try_into() |
| 74 | + .expect("Failed to read last_offset_delta"), |
| 75 | + ); |
| 76 | + read_bytes += 4; |
| 77 | + |
| 78 | + // Read max_timestamp |
| 79 | + let max_timestamp = u64::from_le_bytes( |
| 80 | + bytes[read_bytes..read_bytes + 8] |
| 81 | + .try_into() |
| 82 | + .expect("Failed to read max_timestamp"), |
| 83 | + ); |
| 84 | + read_bytes += 8; |
| 85 | + |
| 86 | + // Calculate last_offset and other values |
51 | 87 | let total_batch_size = batch_length + RETAINED_BATCH_OVERHEAD;
|
52 | 88 | let sectors = total_batch_size.div_ceil(sector_size);
|
53 | 89 | let adjusted_size = sector_size * sectors;
|
54 |
| - warn!("adjusted_size: {}", adjusted_size); |
55 | 90 | let diff = adjusted_size - total_batch_size;
|
56 | 91 |
|
| 92 | + // Read payload |
57 | 93 | let payload_len = batch_length as usize;
|
58 |
| - let mut payload = BytesMut::with_capacity(payload_len); |
59 |
| - payload.put_bytes(0, payload_len); |
60 |
| - if let Err(error) = reader.read_exact(&mut payload).await { |
| 94 | + let payload_start = read_bytes; |
| 95 | + let payload_end = read_bytes + payload_len; |
| 96 | + if payload_end > len { |
61 | 97 | warn!(
|
62 |
| - "Cannot read batch payload for batch with base offset: {batch_base_offset}, last offset delta: {last_offset_delta}, max timestamp: {max_timestamp}, batch length: {batch_length} and payload length: {payload_len}.\nProbably OS hasn't flushed the data yet, try setting `enforce_fsync = true` for partition configuration if this issue occurs again.\n{error}", |
| 98 | + "Cannot read batch payload for batch with base offset: {batch_base_offset}, last offset delta: {last_offset_delta}, max timestamp: {max_timestamp}, batch length: {batch_length} and payload length: {payload_len}.\nProbably OS hasn't flushed the data yet, try setting `enforce_fsync = true` for partition configuration if this issue occurs again." |
63 | 99 | );
|
64 | 100 | break;
|
65 | 101 | }
|
66 |
| - // TEMP |
67 |
| - let mut temp = BytesMut::with_capacity(diff as _); |
68 |
| - temp.put_bytes(0, diff as _); |
69 |
| - if let Err(e) = reader.read_exact(&mut temp).await { |
70 |
| - warn!("lol error reading padding"); |
71 |
| - } |
72 |
| - |
73 |
| - read_bytes += 8 + 4 + 4 + 8 + payload_len as u64; |
74 |
| - last_batch_to_read = read_bytes >= file_size || last_offset == end_offset; |
75 | 102 |
|
| 103 | + // Ergh.... |
| 104 | + let payload = Bytes::copy_from_slice(&bytes[payload_start..payload_end]); |
| 105 | + read_bytes = payload_end + diff as usize; |
76 | 106 | let batch = RetainedMessageBatch::new(
|
77 | 107 | batch_base_offset,
|
78 | 108 | last_offset_delta,
|
79 | 109 | max_timestamp,
|
80 | 110 | batch_length,
|
81 |
| - payload.freeze(), |
| 111 | + payload, |
82 | 112 | );
|
83 | 113 | batches.push(batch);
|
84 | 114 | }
|
85 |
| - Ok(batches) |
86 | 115 | }
|
87 | 116 |
|
88 |
| - pub async fn write_batches(&self, file_path: &str, bytes: &[u8]) -> Result<u32, IggyError> { |
89 |
| - //let mut std_file = std::fs::File::options().append(true).custom_flags(libc::O_DIRECT).open(file_path)?; |
90 |
| - let mut file = OpenOptions::new().append(true).custom_flags(libc::O_DIRECT).open(file_path).await?; |
91 |
| - if let Err(e) = file.write_all(bytes).await { |
92 |
| - warn!("error writing: {}", e); |
93 |
| - } |
94 |
| - Ok(bytes.len() as _) |
| 117 | + pub async fn write_batches(&self, file_path: &str, bytes: Vec<u8>) -> Result<u32, IggyError> { |
| 118 | + let mut std_file = std::fs::File::options().append(true).custom_flags(libc::O_DIRECT).open(file_path)?; |
| 119 | + //let mut file = OpenOptions::new().append(true).custom_flags(libc::O_DIRECT).open(file_path).await?; |
| 120 | + let size = bytes.len() as _; |
| 121 | + spawn_blocking(move || { |
| 122 | + if let Err(e) = std_file.write_all(&bytes) { |
| 123 | + warn!("error writing: {}", e); |
| 124 | + } |
| 125 | + }).await.unwrap(); |
| 126 | + Ok(size) |
95 | 127 | }
|
96 | 128 | }
|
0 commit comments