-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtmp.txt
More file actions
377 lines (340 loc) · 13.6 KB
/
tmp.txt
File metadata and controls
377 lines (340 loc) · 13.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
#include "disk_manager.h"
/*
* DiskManager — raw I/O abstraction over a single database file.
*
* Responsibilities:
* - Translates logical page_id values into byte offsets:
* offset = page_id * PAGE_SIZE
* and issues pread/pwrite syscalls of exactly PAGE_SIZE bytes.
* - Owns the free-page list. allocatePage() prefers reusing a deallocated
* page over extending the file; otherwise it bumps next_page_id_ and
* truncates/extends the file.
* - Serialises concurrent I/O if you add threads later (single fd_ today,
* add a mutex when needed).
*
* What DiskManager deliberately does NOT do:
* - It does not cache anything. Every call is a real syscall.
* BufferPoolManager is the cache; DiskManager is the backing store.
* - It does not know what is inside a page (no header parsing, no slot logic).
* - It does not manage transactions or logs.
*
* File layout on disk:
* [DB header page — page_id 0] (optional: stores free-list metadata)
* [page 1][page 2]...[page N]
* Each page is exactly PAGE_SIZE bytes, no gaps, no padding.
*
* Design decision — free page tracking:
* free_pages_ is kept in memory only. On a clean shutdown you can persist it
* into page 0. On a crash you lose it and leak disk pages; the WAL recovery
* phase can reconstruct it, or you accept the leak for now. Decide before
* implementing page 0.
*/
/*
* Internal data
* int fd_;
GlobalMetadata global_metadata_;
*/
/*
* Opens (or creates) the database file at file_path.
* On creation, initialises the file with a header page at offset 0.
* On open, reads next_page_id_ from the header so allocation resumes
* where it left off. Throws std::runtime_error if the file cannot be
* opened or if the header is corrupt.
*/
DiskManager::DiskManager(const std::string& file_path){
int fd = open(file_path.c_str(), O_WRONLY || O_CREAT | O_TRUNC, 0644);
if(fd == -1){
throw std::runtime_error("Error opening file");
}
cout << "File descriptor: " << fd << endl;
ssize_t bytesRead = read(fd, &global_metadata_, sizeof(GlobalMetadata));
if(bytesRead == -1){
throw std::runtime_error("Could not read from file");
}else if(bytesRead < sizeof(GlobalMetadata)){
throw std::runtime_error("Could not read entire global header from file");
}
if(global_metadata_.magic_number != MAGIC_NUMBER){ //uint32_t
throw std::runtime_error("Magic number from global header wrong");
}
fd_ = fd;
}
/*
* Closes the file descriptor. Does NOT flush any caller-held buffers —
* BufferPoolManager must flush all dirty pages before destroying this.
*/
DiskManager::~DiskManager(){
UpdateMetadata();
close(fd_);
}
/*
* Writes exactly PAGE_SIZE bytes from `data` to the file at:
* offset = page_id * PAGE_SIZE
* Uses pwrite() so position is not shared with concurrent readers.
* Caller must ensure `data` points to at least PAGE_SIZE bytes.
* Throws on I/O error. Does NOT call fsync — durability is LogManager's job.
* - yeah not so sure about not throwing fsync
*/
void DiskManager::writePage(page_id_t page_id, const char* data){
size_t offset = page_id * PAGE_SIZE;
ssize_t bytes_written = pwrite(fd_, data, PAGE_SIZE, offset);
if(bytes_written != PAGE_SIZE){
if(bytes_written == -1){
throw std::system_error(errno, std::generic_category(), "Critical: pwrite failed on page " + std::to_string(page_id));
} else{
throw std::runtime_error("Partial write occured: wrote " + std::to_string(bytes_written) + " of " + std::to_string(PAGE_SIZE) + " on page " + std::to_string(page_id));
}
}
//update page metadata
}
/*
* Reads exactly PAGE_SIZE bytes from offset (page_id * PAGE_SIZE) into
* `data`. Uses pread(). `data` must point to a PAGE_SIZE buffer (i.e.
* Page::data_). Throws on I/O error or if page_id >= next_page_id_.
*/
void DiskManager::readPage(page_id_t page_id, char* data){
if(page_id >= global_metadata_.next_page_id){
throw std::runtime_error("Page read out of allowed range");
}
size_t offset = page_id * PAGE_SIZE;
ssize_t bytes_read = pread(fd_, data, PAGE_SIZE, offset);
if(bytes_read != PAGE_SIZE){
if(bytes_read == -1){
throw std::system_error(errno, std::generic_category(), "Critical: pread failed on page " + std::to_string(page_id));
}else if(bytes_read == 0){
throw std::runtime_error("Read past EOF");
}else{
throw std::runtime_error("Partial read: Page is likely truncated or corrupted");
}
}
}
/*
* Returns a page_id for a fresh, writable page.
* Allocation order:
* 1. Find next free in freelist (reuse deallocated space).
* 2. Otherwise, return next_page_id_++ and extend the file.
* Does NOT zero-initialise the page bytes — caller must treat the
* contents as undefined and initialise before writing.
* - current implementation, look back and verify this is a good idea
*
* CURRENTLY USES MANUAL BUFFER, REFACTOR WHEN BUFFERPOOL IS CREATED
* broken_comment();
*/
page_id_t DiskManager::allocatePage(){
page_id_t freelist_head_ = global_metadata_.freelist_head;
if(freelist_head_ == INVALID_PAGE_ID){
//no freelist, just make new id manually
page_id_t new_id = global_metadata_.next_page_id++;
UpdateMetadata();
return new_id;
}
//manual read b/c no buffer pool
uint8_t buffer[PAGE_SIZE];
ssize_t r = pread(fd_, buffer, PAGE_SIZE, freelist_head_ * PAGE_SIZE);
HandleReadError(r, freelist_head_);
auto* f_page = reinterpret_cast<Freelist_Page*>(buffer);
page_id_t recycled_id = f_page->free_page_ids[--f_page->current_id_count];
if(f_page->current_id_count==0){
//freelist page empty
page_id_t old_head = freelist_head_;
freelist_head_ = f_page->next_freelist_page;
//recycle freelist page itself
recycled_id = old_head;
}else{
//still has some ids
ssize_t w = pwrite(fd_, buffer, PAGE_SIZE, freelist_head_ * PAGE_SIZE);
HandleWriteError(w, freelist_head_);
}
UpdateMetadata();
return recycled_id;
}
/*
* Does NOT zero the bytes on disk; the page is simply available for
* reallocation. Caller must ensure no live references remain before
* calling this (BufferPoolManager must have evicted the page first).
*/
void DiskManager::deallocatePage(page_id_t page_id){
page_id_t head_id = global_metadata_.freelist_head;
if(head_id == INVALID_PAGE_ID){
//no head -> make head
Freelist_Page new_f_page;
new_f_page.current_id_count = 0;
new_f_page.next_freelist_page = INVALID_PAGE_ID;
ssize_t w = pwrite(fd_, &new_f_page, PAGE_SIZE, page_id * PAGE_SIZE);
HandleWriteError(w, page_id);
global_metadata_.freelist_head = page_id;
UpdateMetadata();
return;
}
//read in current head
Freelist_Page f_page;
ssize_t r = pread(fd_, &f_page, PAGE_SIZE, head_id * PAGE_SIZE);
HandleReadError(r, head_id);
if(f_page.current_id_count == Freelist_Page::MAX_FREE_IDS){
//Head is full - deallocated page is new freelist head
Freelist_Page new_head;
new_head.current_id_count = 0;
new_head.next_freelist_page = head_id;
ssize_t w = pread(fd_, &new_head, PAGE_SIZE, page_id * PAGE_SIZE);
global_metadata_.freelist_head = page_id;
UpdateMetadata();
}else{
//Head has room - just add deallocated page to freelist
f_page.free_page_ids[f_page.current_id_count++] = page_id;
ssize_t w = pwrite(fd_, &f_page, PAGE_SIZE, head_id * PAGE_SIZE);
HandleWriteError(w, head_id);
}
}
/*
* Returns the number of pages currently allocated (including free pages
* that haven't been reclaimed yet). Useful for testing and benchmarking.
*/
page_id_t DiskManager::getPageCount() const{
return global_metadata_.next_page_id; //[0, next_page_id-1] are allocated
}
void DiskManager::HandleWriteError(ssize_t bytes_written, page_id_t page_id){
if(bytes_written == PAGE_SIZE){
fsync(fd_);
return;
}
if(bytes_written == -1){
throw std::system_error(errno, std::generic_category(), "Critical pwrite failure on Page " + std::to_string(page_id));
}else{
// Partial write - the "Torn Page" scenario
throw std::runtime_error("Partial write on Page " + std::to_string(page_id) + ": Wrote " + std::to_string(bytes_written) + " bytes.");
}
}
void DiskManager::HandleReadError(ssize_t bytes_read, page_id_t page_id) {
if (bytes_read == PAGE_SIZE) return; // Perfect read
if (bytes_read == -1) {
throw std::system_error(errno, std::generic_category(),
"Critical pread failure on Page " + std::to_string(page_id));
} else {
// Short read (likely hit EOF prematurely)
throw std::runtime_error("Incomplete read on Page " + std::to_string(page_id) +
": Got " + std::to_string(bytes_read) + " bytes.");
}
}
#pragma once
#include "common/types.h"
#include "common/config.h"
#include "storage/freelist_page.h"
#include <string>
#include <unordered_set>
#include <iostream>
#include <fstream>
#include <filesystem>
#include <fcntl.h> //open()
#include <unistd.h> //close()
#include <sys/stat.h> //mode constants
#include <system_error>
namespace fs = std::filesystem;
using std::cout;
using std::cerr;
using std::endl;
using std::ofstream;
/*
* DiskManager — raw I/O abstraction over a single database file.
*
* Responsibilities:
* - Translates logical page_id values into byte offsets:
* offset = page_id * PAGE_SIZE
* and issues pread/pwrite syscalls of exactly PAGE_SIZE bytes.
* - Owns the free-page list. allocatePage() prefers reusing a deallocated
* page over extending the file; otherwise it bumps next_page_id_ and
* truncates/extends the file.
* - Serialises concurrent I/O if you add threads later (single fd_ today,
* add a mutex when needed).
*
* What DiskManager deliberately does NOT do:
* - It does not cache anything. Every call is a real syscall.
* BufferPoolManager is the cache; DiskManager is the backing store.
* - It does not know what is inside a page (no header parsing, no slot logic).
* - It does not manage transactions or logs.
*
* File layout on disk:
* [DB header page — page_id 0] (optional: stores free-list metadata)
* [page 1][page 2]...[page N]
* Each page is exactly PAGE_SIZE bytes, no gaps, no padding.
*
* Design decision — free page tracking:
* free_pages_ is kept in memory only. On a clean shutdown you can persist it
* into page 0. On a crash you lose it and leak disk pages; the WAL recovery
* phase can reconstruct it, or you accept the leak for now. Decide before
* implementing page 0.
*/
struct alignas(PAGE_SIZE) GlobalMetadata {
//maybe add like a "dirty" flag ?? b/c this is recently updated but its gotta be pushed to disk eventually ><
//and updating it constantly is kind of expensive >< ><
uint32_t magic_number;
uint32_t db_version;
page_id_t root_page_id;
page_id_t freelist_head; //head of freelist chain
page_id_t next_page_id; //The "High Water Mark"
static constexpr size_t FIXED_SIZE = sizeof(uint32_t) * 2 + sizeof(page_id_t) * 3;
uint8_t unused_padding[PAGE_SIZE - FIXED_SIZE];
};
static_assert(sizeof(GlobalMetadata) == PAGE_SIZE, "GlobalMetadata is not exactly PAGE_SIZE!");
class DiskManager {
public:
/*
* Opens (or creates) the database file at file_path.
* On creation, initialises the file with a header page at offset 0.
* On open, reads next_page_id_ from the header so allocation resumes
* where it left off. Throws std::runtime_error if the file cannot be
* opened or if the header is corrupt.
*/
explicit DiskManager(const std::string& file_path);
/*
* Closes the file descriptor. Does NOT flush any caller-held buffers —
* BufferPoolManager must flush all dirty pages before destroying this.
*/
~DiskManager();
/*
* Writes exactly PAGE_SIZE bytes from `data` to the file at:
* offset = page_id * PAGE_SIZE
* Uses pwrite() so position is not shared with concurrent readers.
* Caller must ensure `data` points to at least PAGE_SIZE bytes.
* Throws on I/O error. Does NOT call fsync — durability is LogManager's job.
*/
void writePage(page_id_t page_id, const char* data);
/*
* Reads exactly PAGE_SIZE bytes from offset (page_id * PAGE_SIZE) into
* `data`. Uses pread(). `data` must point to a PAGE_SIZE buffer (i.e.
* Page::data_). Throws on I/O error or if page_id >= next_page_id_.
*/
void readPage(page_id_t page_id, char* data);
/*
* Returns a page_id for a fresh, writable page.
* Allocation order:
* 1. Pop from free_pages_ if non-empty (reuse deallocated space).
* 2. Otherwise, return next_page_id_++ and extend the file.
* Does NOT zero-initialise the page bytes — caller must treat the
* contents as undefined and initialise before writing.
*/
page_id_t allocatePage();
/*
* Marks page_id as free for future reuse. Inserts into free_pages_.
* Does NOT zero the bytes on disk; the page is simply available for
* reallocation. Caller must ensure no live references remain before
* calling this (BufferPoolManager must have evicted the page first).
*/
void deallocatePage(page_id_t page_id);
/*
* Returns the number of pages currently allocated (including free pages
* that haven't been reclaimed yet). Useful for testing and benchmarking.
*/
page_id_t getPageCount() const;
private:
int fd_;
GlobalMetadata global_metadata_;
//Writes global_metadata_ into page 0 (master)
//defined in .h b/c I suspect this will be used by many other classes
void UpdateMetadata(){
ssize_t w = pwrite(fd_, &global_metadata_, PAGE_SIZE, 0);
HandleWriteError(w, 0);
//optional for durability
fsync(fd_);
}
void HandleWriteError(ssize_t bytes_written, page_id_t page_id);
void HandleReadError(ssize_t bytes_read, page_id_t page_id);
};