Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions darshan-runtime/lib/darshan-posix.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ DARSHAN_FORWARD_DECL(creat64, int, (const char* path, mode_t mode));
DARSHAN_FORWARD_DECL(dup, int, (int oldfd));
DARSHAN_FORWARD_DECL(dup2, int, (int oldfd, int newfd));
DARSHAN_FORWARD_DECL(dup3, int, (int oldfd, int newfd, int flags));
DARSHAN_FORWARD_DECL(fcntl, int, (int, int, ...));
DARSHAN_FORWARD_DECL(fcntl64, int, (int, int, ...));
DARSHAN_FORWARD_DECL(fileno, int, (FILE *stream));
DARSHAN_FORWARD_DECL(mkstemp, int, (char *template));
DARSHAN_FORWARD_DECL(mkostemp, int, (char *template, int flags));
Expand Down Expand Up @@ -787,6 +789,85 @@ int DARSHAN_DECL(dup3)(int oldfd, int newfd, int flags)
return(ret);
}

/* wrapping fcntl is a little strange for two related reasons:
* - the code can do a lot of different things based on 'cmd'
* - some of those 'cmd' (not all) take a third argument
*
* There are some worrying notes in the documentation about calling va_args
* when there's no third (variable) argument, but we observed glibc doing that
* and our testing seems to indicate it's ok to do so.
*
* So we'll always grab the variable argument, even if it's not there, and
* always pass whatever we get to the real fcntl. Then we'll figure out if the
* command was something we should log or not, and update our stats accordingly
*/

int DARSHAN_DECL(fcntl)(int fd, int cmd, ...)
{
int ret;
double tm1, tm2;
va_list arg;
void *next;

MAP_OR_FAIL(fcntl);

va_start(arg, cmd);
next = va_arg(arg, void*);
va_end(arg);

tm1 = POSIX_WTIME();
ret = __real_fcntl(fd, cmd, next);
tm2 = POSIX_WTIME();

struct posix_file_record_ref *rec_ref;

/* some code (e.g. python) prefers (portabilty? functionality?) to
* duplicate the file descriptor via fcntl instead of dup/dup2/dup3 */
if (ret >= 0 && (cmd == F_DUPFD || cmd == F_DUPFD_CLOEXEC)) {
POSIX_PRE_RECORD();
rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash,
&fd, sizeof(fd));

POSIX_RECORD_REFOPEN(ret, rec_ref, tm1, tm2, POSIX_DUPS);
POSIX_POST_RECORD();
}

return ret;
}

int DARSHAN_DECL(fcntl64)(int fd, int cmd, ...)
{
int ret;
double tm1, tm2;
va_list arg;
void *next;

MAP_OR_FAIL(fcntl64);

va_start(arg, cmd);
next = va_arg(arg, void*);
va_end(arg);

tm1 = POSIX_WTIME();
ret = __real_fcntl64(fd, cmd, next);
tm2 = POSIX_WTIME();

struct posix_file_record_ref *rec_ref;

/* this is a duplicate of code in fcntl, but when I try to break this out
* into a separate 'fcntl_common' routine, POSIX_PRE_RECORD() macro cannot
* find the __darshan_disabled variable, and furthermore I get weird
* "failed to seek" errors. Am I going to have to make this a macro? */
if (ret >= 0 && (cmd == F_DUPFD || cmd == F_DUPFD_CLOEXEC)) {
POSIX_PRE_RECORD();
rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash,
&fd, sizeof(fd));
POSIX_RECORD_REFOPEN(ret, rec_ref, tm1, tm2, POSIX_DUPS);
POSIX_POST_RECORD();
}

return ret;
}
int DARSHAN_DECL(fileno)(FILE *stream)
{
int ret;
Expand Down
2 changes: 2 additions & 0 deletions darshan-runtime/share/ld-opts/darshan-posix-ld-opts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
--wrap=dup
--wrap=dup2
--wrap=dup3
--wrap=fcntl
--wrap=fcntl64
--wrap=mkstemp
--wrap=mkostemp
--wrap=mkstemps
Expand Down
15 changes: 15 additions & 0 deletions darshan-test/python_mpi_scripts/runtime_prog_issue_1072.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/usr/bin/env python3

from mpi4py import MPI
import numpy as np
import os

def main():
comm = MPI.COMM_WORLD
arr1 = np.arange(50)
np.save('single_array.npy', arr1)
os.remove('single_array.npy')


if __name__ == "__main__":
main()
45 changes: 45 additions & 0 deletions darshan-test/python_runtime_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,3 +231,48 @@ def test_forked_process_mpi(tmpdir):
# regression test for gh-786, mpi version
darshan_install_path = os.environ.get("DARSHAN_INSTALL_PATH")
do_forked_process_test(tmpdir, darshan_install_path)

def test_os_dup(tmpdir):
# numpy calls Python's os.dup() which in turn was duplicating a file
# descriptor via fcntl
n_ranks = 1
root_path = os.environ.get("DARSHAN_ROOT_PATH")
darshan_install_path = os.environ.get("DARSHAN_INSTALL_PATH")
test_script_path = os.path.join(root_path,
"darshan-test",
"python_mpi_scripts",
"runtime_prog_issue_690.py")
darshan_lib_path = os.path.join(darshan_install_path,
"lib",
"libdarshan.so")
hdf5_lib_path = os.environ.get("HDF5_LIB")

with tmpdir.as_cwd():
cwd = os.getcwd()
subprocess.check_output(["mpirun",
"--allow-run-as-root",
"-n",
f"{n_ranks}",
"-x",
f"LD_PRELOAD={darshan_lib_path}:{hdf5_lib_path}",
"-x",
f"DARSHAN_LOGPATH={cwd}",
"python",
f"{test_script_path}"])

log_file_list = glob.glob("*.darshan")
# only a single log file should be generated
# by darshan
assert len(log_file_list) == 1
path_to_log = os.path.join(cwd, log_file_list[0])
# numpy will read a bunch of python files but we only care about the
# numpy file
target_filename = "single_array.npy"
report = darshan.DarshanReport(path_to_log, filter_patterns=[target_filename], filter_mode='include')
# common stuff done. Real check for the "dup via fcntl" issue

io_module = "POSIX"
key = "POSIX_BYTES_WRITTEN"
#value = report.records[io_module].to_dict()[0]["counters"][key]
#print(f"key '{key}' has value of '{value}'")
#assert value == 528
Loading