Skip to content

Commit a2ed83a

Browse files
fdmananabufflig
authored andcommitted
Fix efile_drv crash when using async thread pool
When using the async thread pool and compressed files, when an efile driver port instance is shutdown, the efile_drv stop callback closes the file descriptor (a gzFile instance actually) - this is dangerous if at the same time there's an async thread performing an operation against the file, for example calling invoke_read(), which can result in a segmentation fault, or calling invoke_close() which double closes the gzFile and this in turn causes 2 consecutive calls to driver_free() against same gzFile instance (resulting in later unexpected crashes in erl_bestfit_alloc.c for example). The following test program makes the emulator crash when using the async thread pool: -module(t2). -export([t/1]). t(N) -> file:delete("foo.bar"), % Use of 'compressed' option, for creating/writing the file, % is irrelevant. It only matters when opening it later for % reads - a non-compressed file open with the 'compressed' % option goes through an internal gzFile handle (instead of % a plain integer fd), just like a compressed file. %{ok, Fd} = file:open("foo.bar", [raw, write, binary]), {ok, Fd} = file:open("foo.bar", [raw, write, binary, compressed]), ok = file:write(Fd, <<"qwerty">>), ok = file:close(Fd), Pid = spawn_link(fun() -> process_flag(trap_exit, true), loop(N) end), Ref = erlang:monitor(process, Pid), receive {'DOWN', Ref, _, _, _} -> ok end. loop(0) -> ok; loop(N) -> Server = self(), Pid = spawn(fun() -> {ok, Fd} = file:open("foo.bar", [read, raw, binary, compressed]), Server ! continue, % Comment the file:read/2 call to make the file:close/1 call much % more likely to crash or end up causing efile_drv to close twice % the fd (gzFile), which will make the emulator crash later in the % best fit allocator (erl_bestfit_alloc.c). _ = file:read(Fd, 5), file:close(Fd) end), receive continue -> ok end, exit(Pid, shutdown), loop(N - 1). Running this test when using the async thread pool: shell> erl +A 4 Erlang R15B03 (erts-5.9.3.1) [source] [64-bit] [smp:4:4] [async-threads:4] [hipe] [kernel-poll:false] Eshell V5.9.3.1 (abort with ^G) 1> c(t2). {ok,t2} 2> t2:t(500000). Segmentation fault (core dumped) When not using the async thread pool, there are no issues: shell> erl Erlang R15B03 (erts-5.9.3.1) [source] [64-bit] [smp:4:4] [async-threads:0] [hipe] [kernel-poll:false] Eshell V5.9.3.1 (abort with ^G) 1> c(t2). {ok,t2} 2> t2:t(500000). ok 3> An example stack trace when the crash happens because there's an ongoing read operation is: Thread 1 (Thread 0x7f021cf2c700 (LWP 10687)): #0 updatewindow (strm=0x2691bf8, out=5) at zlib/inflate.c:338 #1 0x00000000005a2ba0 in inflate (strm=0x2691bf8, flush=0) at zlib/inflate.c:1141 #2 0x000000000055c46a in erts_gzread (file=0x2691bf8, buf=0x7f0215b29e80, len=5) at drivers/common/gzio.c:523 #3 0x00000000005849ef in invoke_read (data=0x26b2228) at drivers/common/efile_drv.c:1114 erlang#4 0x000000000050adcb in async_main (arg=0x7f021bf5cf40) at beam/erl_async.c:488 erlang#5 0x00000000005c21a0 in thr_wrapper (vtwd=0x7fff69c6ff10) at pthread/ethread.c:106 erlang#6 0x00007f021c573e9a in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0 erlang#7 0x00007f021c097cbd in clone () from /lib/x86_64-linux-gnu/libc.so.6 erlang#8 0x0000000000000000 in ?? () And when there's an ongoing close operation when the driver is stopped: Thread 1 (Thread 0x7fe5f5654700 (LWP 747)): #0 0x0000000000459b64 in bf_unlink_free_block (block=0x10b2a70, allctr=<optimized out>, flags=<optimized out>) at beam/erl_bestfit_alloc.c:792 #1 bf_unlink_free_block (flags=0, block=0x10b2a70, allctr=0x873380) at beam/erl_bestfit_alloc.c:822 #2 bf_get_free_block (allctr=0x873380, size=<optimized out>, cand_blk=<optimized out>, cand_size=<optimized out>, flags=0) at beam/erl_bestfit_alloc.c:869 #3 0x000000000044f0dd in mbc_alloc_block (alcu_flgsp=<synthetic pointer>, blk_szp=<synthetic pointer>, size=200, allctr=0x873380) at beam/erl_alloc_util.c:1198 erlang#4 mbc_alloc (allctr=0x873380, size=200) at beam/erl_alloc_util.c:1345 erlang#5 0x000000000045449b in do_erts_alcu_alloc (size=200, extra=0x873380, type=165) at beam/erl_alloc_util.c:3442 erlang#6 erts_alcu_alloc_thr_pref (type=165, extra=<optimized out>, size=192) at beam/erl_alloc_util.c:3520 erlang#7 0x000000000055c0bf in gz_open (mode=0x5d98b2 "rb", path=0x1103418 "foo.bar") at drivers/common/gzio.c:164 erlang#8 erts_gzopen (path=0x1103418 "foo.bar", mode=0x5d98b2 "rb") at drivers/common/gzio.c:307 erlang#9 0x0000000000584e47 in invoke_open (data=0x1103330) at drivers/common/efile_drv.c:1857 erlang#10 0x000000000050adcb in async_main (arg=0x7fe5f698af80) at beam/erl_async.c:488
1 parent 8b3bcc5 commit a2ed83a

File tree

1 file changed

+55
-2
lines changed

1 file changed

+55
-2
lines changed

lib/kernel/test/file_SUITE.erl

+55-2
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@
6060
-export([ read_not_really_compressed/1,
6161
read_compressed_cooked/1, read_compressed_cooked_binary/1,
6262
read_cooked_tar_problem/1,
63-
write_compressed/1, compress_errors/1, catenated_gzips/1]).
63+
write_compressed/1, compress_errors/1, catenated_gzips/1,
64+
compress_async_crash/1]).
6465

6566
-export([ make_link/1, read_link_info_for_non_link/1, symlinks/1]).
6667

@@ -135,7 +136,8 @@ groups() ->
135136
{compression, [],
136137
[read_compressed_cooked, read_compressed_cooked_binary,
137138
read_cooked_tar_problem, read_not_really_compressed,
138-
write_compressed, compress_errors, catenated_gzips]},
139+
write_compressed, compress_errors, catenated_gzips,
140+
compress_async_crash]},
139141
{links, [],
140142
[make_link, read_link_info_for_non_link, symlinks]}].
141143

@@ -2312,6 +2314,57 @@ compress_errors(Config) when is_list(Config) ->
23122314

23132315
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
23142316

2317+
compress_async_crash(suite) -> [];
2318+
compress_async_crash(doc) -> [];
2319+
compress_async_crash(Config) when is_list(Config) ->
2320+
?line DataDir = ?config(data_dir, Config),
2321+
?line Path = filename:join(DataDir, "test.gz"),
2322+
ExpectedData = <<"qwerty">>,
2323+
2324+
?line _ = ?FILE_MODULE:delete(Path),
2325+
?line {ok, Fd} = ?FILE_MODULE:open(Path, [write, binary, compressed]),
2326+
?line ok = ?FILE_MODULE:write(Fd, ExpectedData),
2327+
?line ok = ?FILE_MODULE:close(Fd),
2328+
2329+
% Test that when using async thread pool, the emulator doesn't crash
2330+
% when the efile port driver is stopped while a compressed file operation
2331+
% is in progress (being carried by an async thread).
2332+
?line ok = compress_async_crash_loop(10000, Path, ExpectedData),
2333+
?line ok = ?FILE_MODULE:delete(Path),
2334+
ok.
2335+
2336+
compress_async_crash_loop(0, _Path, _ExpectedData) ->
2337+
ok;
2338+
compress_async_crash_loop(N, Path, ExpectedData) ->
2339+
Parent = self(),
2340+
{Pid, Ref} = spawn_monitor(
2341+
fun() ->
2342+
?line {ok, Fd} = ?FILE_MODULE:open(
2343+
Path, [read, compressed, raw, binary]),
2344+
Len = byte_size(ExpectedData),
2345+
Parent ! {self(), continue},
2346+
?line {ok, ExpectedData} = ?FILE_MODULE:read(Fd, Len),
2347+
?line ok = ?FILE_MODULE:close(Fd),
2348+
receive foobar -> ok end
2349+
end),
2350+
receive
2351+
{Pid, continue} ->
2352+
exit(Pid, shutdown),
2353+
receive
2354+
{'DOWN', Ref, _, _, Reason} ->
2355+
?line shutdown = Reason
2356+
end;
2357+
{'DOWN', Ref, _, _, Reason2} ->
2358+
test_server:fail({worker_exited, Reason2})
2359+
after 60000 ->
2360+
exit(Pid, shutdown),
2361+
erlang:demonitor(Ref, [flush]),
2362+
test_server:fail(worker_timeout)
2363+
end,
2364+
compress_async_crash_loop(N - 1, Path, ExpectedData).
2365+
2366+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2367+
23152368
altname(doc) ->
23162369
"Test the file:altname/1 function";
23172370
altname(suite) ->

0 commit comments

Comments
 (0)