Skip to content

Commit 8e83d60

Browse files
committed
feat: add seek and tell to FS.Handle
This PR adds `IO.FS.Handle.seek` and `IO.FS.Handle.tell` functions, to allow positioning the file cursor and querying its location respectively.
1 parent 5bd331e commit 8e83d60

File tree

3 files changed

+232
-0
lines changed

3 files changed

+232
-0
lines changed

src/Init/System/IO.lean

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ prelude
99
public import Init.System.IOError
1010
public import Init.System.FilePath
1111
public import Init.Data.Ord.UInt
12+
public import Init.Data.SInt
1213
import Init.Data.String.TakeDrop
1314
import Init.Data.String.Search
1415

@@ -829,6 +830,85 @@ cursor includes buffered writes. However, buffered writes followed by `IO.FS.Han
829830
`IO.FS.Handle.flush` before truncating.
830831
-/
831832
@[extern "lean_io_prim_handle_truncate"] opaque truncate (h : @& Handle) : IO Unit
833+
834+
/--
835+
Specifies the reference point for a file seek operation.
836+
837+
**Operating System Specifis:**
838+
* Windows: [`_fseeki64](https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/fseek-fseeki64?view=msvc-170)
839+
* Linux: [`fseeko`](https://linux.die.net/man/3/fseeko)
840+
-/
841+
inductive SeekMode where
842+
/--
843+
Seek from the beginning of the file.
844+
845+
The read/write cursor is positioned relative to the beginning of the file.
846+
An offset of zero is the first byte of the file. Offsets should be
847+
non-negative.
848+
849+
* `fseeko` / `_fseeki64` value: `SEEK_SET`
850+
-/
851+
| start
852+
/--
853+
Seek from the current file position.
854+
855+
The read/write cursor is positioned relative to the current file position.
856+
An offset of zero is the current byte of the file. Negative offsets are
857+
permitted.
858+
859+
* `fseek` / `_fseeki64` value: `SEEK_CUR`
860+
-/
861+
| cur
862+
/--
863+
Seek from the end of the file.
864+
865+
The read/write cursor is positioned relative to the end of the file. An
866+
offset of zero is one byte past the end of the file, while an offset of
867+
(-1) is the is the last byte of the file. Positive offsets will move
868+
past the end of the file.
869+
870+
* `fseek` / `_fseeki64` value: `SEEK_END`
871+
-/
872+
| end
873+
deriving Repr, BEq, Inhabited
874+
875+
/--
876+
Moves the read/write cursor to a specific position (FFI definition).
877+
878+
The FFI definition of `seek` requires `(offset : UInt64)` due to FFI
879+
limitations. However, in reality `(offset : Int64)` is the correct type.
880+
This function avoids that confusion.
881+
-/
882+
@[extern "lean_io_prim_handle_seek"]
883+
private opaque primSeek (h : @& Handle) (mode : SeekMode) (offset : UInt64) : IO Unit
884+
885+
/--
886+
Moves the read/write cursor to a specific position in the file.
887+
888+
The `offset` is a signed 64-bit value in bytes. It is interpreted relative to:
889+
* the beginning of the file if `mode = .start`,
890+
* the current position if `mode = .cur`,
891+
* the end of the file if `mode = .end`.
892+
893+
**Operating System Specifis:**
894+
* Windows: [`_fseeki64`](https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/fseek-fseeki64?view=msvc-170)
895+
* Linux: [`fseeko`](https://linux.die.net/man/3/fseeko)
896+
-/
897+
@[inline] def seek (h : Handle) (mode : SeekMode) (offset : Int64) : IO Unit :=
898+
primSeek h mode offset.toUInt64
899+
900+
/--
901+
Returns the current position of the read/write cursor in the file.
902+
903+
The position is measured in bytes from the beginning of the file.
904+
905+
**Operating System Specifis:**
906+
* Windows: [`_ftelli64`](https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/ftell-ftelli64?view=msvc-170)
907+
* Linux: [`ftello`](https://linux.die.net/man/3/ftello)
908+
-/
909+
@[extern "lean_io_prim_handle_tell"]
910+
opaque tell (h : @& Handle) : IO UInt64
911+
832912
/--
833913
Reads up to the given number of bytes from the handle. If the returned array is empty, an
834914
end-of-file marker (EOF) has been reached.

src/runtime/io.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,61 @@ extern "C" LEAN_EXPORT obj_res lean_io_prim_handle_truncate(b_obj_arg h) {
580580
}
581581
}
582582

583+
/* Handle.seek : (@& Handle) → SeekMode → UInt64 → IO Unit
584+
*
585+
* FFI note: the offset is passed as a uint64_t, but it is really an
586+
* int64_t two's-complement bit pattern.
587+
*/
588+
extern "C" LEAN_EXPORT obj_res lean_io_prim_handle_seek(b_obj_arg h, uint8_t mode, uint64_t offset) {
589+
FILE * fp = io_get_handle(h);
590+
591+
// Decode the mode value indicating the seek reference point.
592+
int whence;
593+
switch (mode) {
594+
case 0: whence = SEEK_SET; break; // SeekMode.start
595+
case 1: whence = SEEK_CUR; break; // SeekMode.cur
596+
case 2: whence = SEEK_END; break; // SeekMode.end
597+
default: return io_result_mk_error("invalid seek mode");
598+
}
599+
600+
// The offset is passed unsigned, to overcome FFI limitations, but
601+
// we need to reinterpret it as signed, assuming 2s complement.
602+
int64_t signedOffset = (int64_t)offset;
603+
604+
#ifdef LEAN_WINDOWS
605+
// NOTE: We do not use std::fseek here because, on modern Windows
606+
// systems, std::fseek accepts a 32-bit long, which would
607+
// limit us to ~2GiB files.
608+
int ret = _fseeki64(fp, (__int64)signedOffset, whence);
609+
#else
610+
int ret = fseeko(fp, (off_t)signedOffset, whence);
611+
#endif
612+
613+
if (ret == 0) {
614+
return io_result_mk_ok(box(0));
615+
} else {
616+
return io_result_mk_error(decode_io_error(errno, nullptr));
617+
}
618+
}
619+
620+
/* Handle.tell : (@& Handle) → IO UInt64 */
621+
extern "C" LEAN_EXPORT obj_res lean_io_prim_handle_tell(b_obj_arg h) {
622+
FILE * fp = io_get_handle(h);
623+
#ifdef LEAN_WINDOWS
624+
// NOTE: We do not use std::ftell here because, on modern Windows
625+
// systems, std::ftell accepts a 32-bit long, which would
626+
// limit us to ~2GiB files.
627+
int64_t pos = _ftelli64(fp);
628+
#else
629+
off_t pos = ftello(fp);
630+
#endif
631+
if (pos >= 0) {
632+
return io_result_mk_ok(lean_box_uint64((uint64_t)pos));
633+
} else {
634+
return io_result_mk_error(decode_io_error(errno, nullptr));
635+
}
636+
}
637+
583638
/* Handle.read : (@& Handle) → USize → IO ByteArray */
584639
extern "C" LEAN_EXPORT obj_res lean_io_prim_handle_read(b_obj_arg h, usize nbytes) {
585640
FILE * fp = io_get_handle(h);

tests/lean/run/ioHandleSeek.lean

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/-!
2+
# File Seeking and Position Tests
3+
4+
These are tests for
5+
- `IO.FS.Handle.seek`
6+
- `IO.FS.Handle.tell`
7+
8+
These provide random access to file contents via the POSIX `fseek` and
9+
`ftell` functions. We assume that the underlying POSIX implementation
10+
is correct.
11+
-/
12+
13+
/--
14+
Run `action` on a temporary file containing the text `"Seek Test"`.
15+
16+
This function:
17+
* creates a temporary directory
18+
* writes `"Seek Test"` to a temporary file within the directory
19+
* re-opens the file in read mode and passes its handle to `action`
20+
21+
The temporary directory and file are cleaned up automatically when
22+
`action` finishes.
23+
-/
24+
private def withSeekTestFile (action : IO.FS.Handle → IO α) : IO α :=
25+
IO.FS.withTempDir fun tmpDir => do
26+
-- Create the file in write mode
27+
let fileName := tmpDir / "seek_test_file.txt"
28+
let contents := "Seek Test"
29+
IO.FS.withFile fileName .write fun h => h.putStr contents
30+
31+
-- Open the file in read mode to run the action
32+
IO.FS.withFile fileName .read action
33+
34+
/--
35+
Test that, on opening a file, the initial position is zero.
36+
-/
37+
def testTell0 : IO Unit :=
38+
withSeekTestFile fun h => do
39+
let pos0 ← h.tell
40+
assert! (pos0 == 0)
41+
42+
#eval testTell0
43+
44+
/--
45+
Test seeking relative to the start location.
46+
-/
47+
def testSeekStart : IO Unit :=
48+
withSeekTestFile fun h => do
49+
-- Go to byte 5, which is 'T'
50+
h.seek .start 5
51+
let pos5 ← h.tell
52+
let byteArr ← h.read 1
53+
assert! (pos5 == 5)
54+
assert! (byteArr == "T".toUTF8)
55+
56+
#eval testSeekStart
57+
58+
/--
59+
Test seeking relative to the end location.
60+
-/
61+
def testSeekEnd : IO Unit :=
62+
withSeekTestFile fun h => do
63+
-- Go to byte -4 from the end, which is 'T'
64+
h.seek .end (-4)
65+
let pos5 ← h.tell
66+
let byteArr ← h.read 1
67+
assert! (pos5 == 5)
68+
assert! (byteArr == "T".toUTF8)
69+
70+
#eval testSeekEnd
71+
72+
/--
73+
Test seeking relative to the current location.
74+
75+
Both positive and negative offsets are tested.
76+
-/
77+
def testSeekCur : IO Unit :=
78+
withSeekTestFile fun h => do
79+
-- Go to byte 5 from the current location, which is 'T'
80+
h.seek .cur 5
81+
let pos5 ← h.tell
82+
let byteArr5 ← h.read 1
83+
assert! (pos5 == 5)
84+
assert! (byteArr5 == "T".toUTF8)
85+
86+
-- After reading a byte, the position is advanced by 1.
87+
let pos6 ← h.tell
88+
assert! (pos6 == 6)
89+
90+
-- Go to byte -3 from the current location, which is 'k'
91+
h.seek .cur (-3)
92+
let pos3 ← h.tell
93+
let byteArr3 ← h.read 1
94+
assert! (pos3 == 3)
95+
assert! (byteArr3 == "k".toUTF8)
96+
97+
#eval testSeekCur

0 commit comments

Comments
 (0)