Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions builtin/func_hay.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,15 @@ def _Call(self, path):

# TODO: need to close the file!
try:
f = self.fd_state.Open(path)
f, fd = self.fd_state.Open(path, persistent=True)
except (IOError, OSError) as e:
msg = posix.strerror(e.errno)
raise error.Expr("Couldn't open %r: %s" % (path, msg), call_loc)

arena = self.parse_ctx.arena
line_reader = reader.FileLineReader(f, arena)
file_line_reader = reader.FileLineReader(f, arena)
line_reader = file_line_reader
self.fd_state.SetCallback(fd, file_line_reader)

parse_opts = state.MakeYshParseOpts()
# Note: runtime needs these options and totally different memory
Expand Down
9 changes: 6 additions & 3 deletions builtin/meta_oils.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,16 +194,19 @@ def LoadEmbeddedFile(self, embed_path, blame_loc):
def _LoadDiskFile(self, fs_path, blame_loc):
# type: (str, loc_t) -> Tuple[mylib.LineReader, cmd_parse.CommandParser]
try:
# Shell can't use descriptors 3-9
f = self.fd_state.Open(fs_path)
# The file being sourced needs to be open across several commands
# that can potentially move its fd, so open it persistently
f, fd = self.fd_state.Open(fs_path, persistent=True)
except (IOError, OSError) as e:
self.errfmt.Print_(
'%s %r failed: %s' %
(self.builtin_name, fs_path, pyutil.strerror(e)),
blame_loc=blame_loc)
return None, None

line_reader = reader.FileLineReader(f, self.arena)
file_line_reader = reader.FileLineReader(f, self.arena)
line_reader = file_line_reader
self.fd_state.SetCallback(fd, file_line_reader)
c_parser = self.parse_ctx.MakeOshParser(line_reader)
return f, c_parser

Expand Down
7 changes: 5 additions & 2 deletions core/main_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,13 +459,16 @@ def EvalFile(
ok: whether processing should continue
"""
try:
f = fd_state.Open(fs_path)
f, fd = fd_state.Open(fs_path, persistent=True)
except (IOError, OSError) as e:
print_stderr("%s: Couldn't open %r for --eval: %s" %
(lang, fs_path, posix.strerror(e.errno)))
return False, -1

line_reader = reader.FileLineReader(f, cmd_ev.arena)
file_line_reader = reader.FileLineReader(f, cmd_ev.arena)
line_reader = file_line_reader
fd_state.SetCallback(fd, file_line_reader)

c_parser = parse_ctx.MakeOshParser(line_reader)

# TODO:
Expand Down
74 changes: 54 additions & 20 deletions core/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,17 +71,20 @@
from builtin import trap_osh
from core import optview
from core.util import _DebugFile
from frontend import reader
from osh.cmd_eval import CommandEvaluator

NO_FD = -1

# Minimum file descriptor that the shell can use. Other descriptors can be
# directly used by user programs, e.g. exec 9>&1
# directly used by user programs, e.g. exec 99>&1
#
# Oils uses 100 because users are allowed TWO digits in frontend/lexer_def.py.
# This is a compromise between bash (unlimited, but requires crazy
# bookkeeping), and dash/zsh (10) and mksh (24)
_SHELL_MIN_FD = 100
# Oils uses 10 as a compromise - we shift internal fds when they're requested
# by users so that we don't need an explicit divide between user-usable and
# internal file descriptors (users are currently allowed to use fds up to 99),
# but file descriptors below 10 are requested by scripts so often that it
# doesn't make much sense taking them up only to shift fds later.
_SHELL_MIN_FD = 10

# Style for 'jobs' builtin
STYLE_DEFAULT = 0
Expand Down Expand Up @@ -211,11 +214,16 @@ def __init__(
self.tracer = tracer
self.waiter = waiter
self.exec_opts = exec_opts
self.persistent = {} # type: Dict[int, bool]
self.callbacks = {} # type: Dict[int, reader.FileLineReader]
self.c_mode = {} # type: Dict[int, str]

def Open(self, path):
# type: (str) -> mylib.LineReader
def Open(self, path, persistent=False):
# type: (str, bool) -> Tuple[mylib.LineReader, int]
"""Opens a path for read, but moves it out of the reserved 3-9 fd
range.
range. If the opened file is going to be "persistent" then a callback
has to be provided in SetCallback(), which will be invoked when the
file descriptor is moved when requested by the user.

Returns:
A Python file object. The caller is responsible for Close().
Expand All @@ -224,31 +232,41 @@ def Open(self, path):
IOError or OSError if the path can't be found. (This is Python-induced wart)
"""
fd_mode = O_RDONLY
f = self._Open(path, 'r', fd_mode)
f, fd = self._Open(path, 'r', fd_mode, persistent)

# Hacky downcast
return cast('mylib.LineReader', f)
return (cast('mylib.LineReader', f), fd)

# used for util.DebugFile
def OpenForWrite(self, path):
# type: (str) -> mylib.Writer
def OpenForWrite(self, path, persistent=False):
# type: (str, bool) -> mylib.Writer
fd_mode = O_CREAT | O_RDWR
f = self._Open(path, 'w', fd_mode)
f, _ = self._Open(path, 'w', fd_mode, persistent)

# Hacky downcast
return cast('mylib.Writer', f)

def _Open(self, path, c_mode, fd_mode):
# type: (str, str, int) -> IO[str]
def _Open(self, path, c_mode, fd_mode, persistent):
# type: (str, str, int, bool) -> Tuple[IO[str], int]
fd = posix.open(path, fd_mode, 0o666) # may raise OSError

# Immediately move it to a new location
new_fd = SaveFd(fd)
posix.close(fd)
self.persistent[new_fd] = persistent
self.c_mode[new_fd] = c_mode

# Return a Python file handle
f = posix.fdopen(new_fd, c_mode) # may raise IOError
return f
return (f, new_fd)

def _IsPersistent(self, fd):
# type: (int) -> bool
return fd in self.persistent and self.persistent[fd]

def SetCallback(self, f, callback):
# type: (int, reader.FileLineReader) -> None
self.callbacks[f] = callback

def _WriteFdToMem(self, fd_name, fd):
# type: (str, int) -> None
Expand Down Expand Up @@ -282,13 +300,29 @@ def _PushSave(self, fd):
if e.errno != EBADF:
raise
if ok:
if self._IsPersistent(fd):
# Invoke the callback with the new Python file descriptor
f = cast('mylib.LineReader',
posix.fdopen(new_fd, self.c_mode[fd]))
self.callbacks.get(fd).ReplaceFd(f)

# Move persistent status and callback to the new fd
mylib.dict_erase(self.persistent, fd)
self.persistent[new_fd] = True
self.callbacks[new_fd] = self.callbacks[fd]
mylib.dict_erase(self.callbacks, fd)
self.c_mode[new_fd] = self.c_mode[fd]
mylib.dict_erase(self.c_mode, fd)
# No need to push the _RedirFrame - we are not restoring this
# fd to its previous value
else:
self.cur_frame.saved.append(_RedirFrame(new_fd, fd, True))
posix.close(fd)
self.cur_frame.saved.append(_RedirFrame(new_fd, fd, True))
else:
# if we got EBADF, we still need to close the original on Pop()
self._PushClose(fd)

return ok
return (ok and not self._IsPersistent(new_fd))

def _PushDup(self, fd1, blame_loc):
# type: (int, redir_loc_t) -> int
Expand Down Expand Up @@ -726,7 +760,7 @@ def _Exec(self, argv0_path, argv, argv0_loc, environ, should_retry):
if len(self.hijack_shebang):
opened = True
try:
f = self.fd_state.Open(argv0_path)
f, _ = self.fd_state.Open(argv0_path)
except (IOError, OSError) as e:
opened = False

Expand Down Expand Up @@ -2093,7 +2127,7 @@ def WaitForOne(self, waitpid_options=0):
NoChildren -- ECHILD - no more
| Exited(int pid) -- process done - call job_list.PopStatus() for status
# do we also we want ExitedWithSignal() ?
| Stopped(int pid)
| Stopped(int pid)
| Interrupted(int sig_num) -- may or may not retry
| UntrappedSigwinch -- ignored

Expand Down
13 changes: 9 additions & 4 deletions core/shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,15 +151,18 @@ def SourceStartupFile(
# Bash also has --login.

try:
f = fd_state.Open(rc_path)
f, fd = fd_state.Open(rc_path, persistent=True)
except (IOError, OSError) as e:
# TODO: Could warn about nonexistent explicit --rcfile?
if e.errno != ENOENT:
raise # Goes to top level. Handle this better?
return

arena = parse_ctx.arena
rc_line_reader = reader.FileLineReader(f, arena)
file_line_reader = reader.FileLineReader(f, arena)
rc_line_reader = file_line_reader
fd_state.SetCallback(fd, file_line_reader)

rc_c_parser = parse_ctx.MakeOshParser(rc_line_reader)

with alloc.ctx_SourceCode(arena, source.MainFile(rc_path)):
Expand Down Expand Up @@ -1068,12 +1071,14 @@ def Main(
else:
src = source.MainFile(script_name)
try:
f = fd_state.Open(script_name)
f, fd = fd_state.Open(script_name, persistent=True)
except (IOError, OSError) as e:
print_stderr("%s: Couldn't open %r: %s" %
(lang, script_name, posix.strerror(e.errno)))
return 1
line_reader = reader.FileLineReader(f, arena)
file_line_reader = reader.FileLineReader(f, arena)
line_reader = file_line_reader
fd_state.SetCallback(fd, file_line_reader)

# Pretend it came from somewhere else
if flag.location_str is not None:
Expand Down
6 changes: 6 additions & 0 deletions frontend/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,12 @@ def __init__(self, f, arena):
self.f = f
self.last_line_hint = False

def ReplaceFd(self, fd):
# type: (mylib.LineReader) -> None
tell = self.f.tell()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm I actually don't see why this works? Why do we need tell() and seek() ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Essentially - we want to restore the position we've read the previous file up to.

the position that DUP preserves doesn't really cover this use case - it's a bit confusing because what gets preserved automatically is how far Python has internally read the file into a buffer, and not how much we have actually read from the file. Here, without restoring our position explicitly, we lose most of the file because to the OS, we've already read it to the end:

>>> import posix
>>> import fcntl
>>> fd = posix.open('test.osh', posix.O_RDONLY, 0o666)
>>> f = posix.fdopen(fd)
>>> f.readline()
'# first line\n'
>>> f.readline()
'# second line\n'
>>> f.tell()
27
>>> new_fd = fcntl.fcntl(fd, fcntl.F_DUPFD, 25)
>>> new_fd
25
>>> g = posix.fdopen(new_fd)
>>> g.tell()
3229
>>> g.readline()
'' <<< (should be '# third line', instead it's EOF)

(it's a bit different in the C++ implementation, but it's the same principle)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah OK, that is interesting, but based on experience, I'm a bit "scared" of doing this

Actually one thing that became apparent to me is that shells PREDATE libc, and they don't use libc I/O -- they use POSIX I/O

Oils is unusual in that it uses libc I/O (which has buffering), and I might want to change that

that is

  • libc: FILE* fwrite, fread, fseek, ftell
  • posix: read, write, lseek, fcntl

i.e. it might be a bit bad that we are mixiing fcntl and fwrite/fwrite -- most shells don't do that

There can be buffering bugs


That said, this is a great change to explore the problem

I'm glad you were able to fix the test so quickly, and also understand mycpp so quickly! (even with some unfortunate warts)


I think we should look into the busybox ash algorithm, as mentioned on Zulip

A funny thing is that it's basically the dash source concatenated into a big 13K line file

https://github.com/brgl/busybox/blob/master/shell/ash.c

(btw one thing I did once is just upload this whole file into Claude, and then ask it questions ... it does hallucinate and get confused of course, but I think it was net helpful at least a few times ... I have to be real skeptical)

fd.seek(tell)
self.f = fd

def _GetLine(self):
# type: () -> Optional[str]
line = self.f.readline()
Expand Down
8 changes: 8 additions & 0 deletions mycpp/gc_mylib.cc
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,14 @@ LineReader* open(BigStr* path) {
return reinterpret_cast<LineReader*>(Alloc<CFile>(f));
}

void CFile::seek(int offset) {
fseek(f_, offset, SEEK_SET);
}

int CFile::tell() {
return ftell(f_);
}

BigStr* CFile::readline() {
char* line = nullptr;
size_t allocated_size = 0; // unused
Expand Down
16 changes: 16 additions & 0 deletions mycpp/gc_mylib.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ class File {

// Reader
virtual BigStr* readline() = 0;
virtual void seek(int offset) = 0;
virtual int tell() = 0;

// Both
virtual bool isatty() = 0;
Expand All @@ -161,6 +163,8 @@ class CFile : public File {

// Reader
BigStr* readline() override;
void seek(int offset) override;
int tell() override;

// Both
bool isatty() override;
Expand Down Expand Up @@ -213,6 +217,12 @@ class BufLineReader : public LineReader {
}
virtual void close() {
}
void seek(int offset) {
pos_ = offset;
}
int tell() {
return pos_;
}

BigStr* s_;
int pos_;
Expand Down Expand Up @@ -248,6 +258,12 @@ class Writer : public File {
BigStr* readline() override {
CHECK(false); // should not happen
}
void seek(int offset) override {
CHECK(false); // should not happen
}
int tell() override {
CHECK(false); // should not happen
}

static constexpr ObjHeader obj_header() {
return ObjHeader::ClassFixed(field_mask(), sizeof(Writer));
Expand Down
12 changes: 12 additions & 0 deletions mycpp/mylib.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,18 @@ def isatty(self):
# type: () -> bool
raise NotImplementedError()

def tell(self):
# type: () -> int
raise NotImplementedError()

def seek(self, offset):
# type: (int) -> None
raise NotImplementedError()

def fileno(self):
# type: () -> int
raise NotImplementedError()


if TYPE_CHECKING:

Expand Down
13 changes: 13 additions & 0 deletions spec/builtin-eval-source.test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,19 @@ echo status=$?
## stdout: status=1
## OK dash/zsh/mksh stdout: status=0

#### source'd fd should be shifted when requested by the script
cd $TMP
cat >out.osh <<EOF
exec 10>out
echo hello>&10
cat out
EOF
. ./out.osh
## stdout: hello
## N-I dash/zsh status: 127
## N-I dash/zsh STDOUT:
## END

#### sourcing along PATH should ignore directories

mkdir -p _tmp/shell
Expand Down
4 changes: 2 additions & 2 deletions spec/builtin-process.test.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
## compare_shells: dash bash mksh zsh
## oils_failures_allowed: 2
## oils_cpp_failures_allowed: 3
## oils_failures_allowed: 1
## oils_cpp_failures_allowed: 2
# case #24 with ulimit -f 1 is different under C++ for some reason - could be due to the python2
# intepreter and SIGXFSZ

Expand Down
11 changes: 11 additions & 0 deletions spec/interactive.test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,17 @@ $SH --rcfile myrc -i -c 'show-shell-state main'
# comparisons.
# The --details flag is useful

#### rc file's fd is moved out of the way when requested by the user
cat >$TMP/oshrc <<EOF
exec 10>out
echo hello>&10
cat out
EOF
$SH --rcfile $TMP/oshrc -i -c 'echo hello'
## STDOUT:
hello
hello
## END

#### HISTFILE is written in interactive shell

Expand Down
Loading