Skip to content

Commit eb0f56b

Browse files
committed
Tar.extract_file: read the content of individual file(s),
that match a predicate, from the tarball archive.
1 parent 37766a2 commit eb0f56b

File tree

4 files changed

+131
-1
lines changed

4 files changed

+131
-1
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "Tar"
22
uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
33
authors = ["Stefan Karpinski <[email protected]>"]
4-
version = "1.9.0"
4+
version = "1.10.0"
55

66
[deps]
77
ArgTools = "0dad84c5-d112-42e6-8d28-ef12dabb789f"

README.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,24 @@ will also not be copied and will instead be skipped. By default, `extract` will
9898
detect whether symlinks can be created in `dir` or not and will automatically
9999
copy symlinks if they cannot be created.
100100

101+
### `Tar.extract_file`
102+
103+
```jl
104+
extract_file(predicate::Function, tarball, out) -> Vector{Header}
105+
extract_file(predicate::AbstractString, tarball, out) -> Header
106+
```
107+
* `predicate :: Union{Function, AbstractString}`
108+
* `tarball :: Union{AbstractString, AbstractCmd, IO}`
109+
* `out :: Union{AbstractString, AbstractCmd, IO}`
110+
111+
Read file(s) matching the predicate from `tarball` and write to `out`.
112+
Return the [`Header`](@ref)s of the matchin files.
113+
114+
If `predicate::Function` it should take a `Header` as the only input
115+
argument and return `true`/`false`.
116+
If `predicate::String` it is interpreted as a path relative the
117+
tarball root and must only match a single entry.
118+
101119
### Tar.list
102120

103121
```jl

src/extract.jl

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,3 +590,46 @@ function read_data(
590590
r < n && error("premature end of tar file")
591591
return view(buf, 1:size)
592592
end
593+
594+
595+
"""
596+
extract_file(predicate::Funtion, tarball, out) -> Vector{Header}
597+
extract_file(predicate::AbstractString, tarball, out) -> Header
598+
599+
Accepted argument types:
600+
* `predicate :: Union{AbstractString, Function}`
601+
* `tarball :: Union{AbstractString, IO, Cmd}`
602+
* `out :: Union{AbstractString, IO, Cmd}`
603+
604+
Read file(s) matching the predicate from `tarball` and write to `out`.
605+
Return the [`Header`](@ref)s of the matchin files.
606+
607+
If `predicate::Function` it should take a `Header` as the only input
608+
argument and return `true`/`false`.
609+
If `predicate::String` it is interpreted as a path relative the
610+
tarball root and must only match a single entry.
611+
"""
612+
function extract_file(predicate::Function, tarball::ArgRead, out::ArgWrite)::Vector{Header}
613+
headers = Header[]
614+
arg_read(tarball) do tar; arg_write(out) do io
615+
read_tarball(predicate, tar) do hdr, _
616+
if hdr.type == :file # TODO: read symlinks??
617+
push!(headers, hdr)
618+
read_data(tar, io, size=hdr.size)
619+
end
620+
end
621+
end end
622+
return headers
623+
end
624+
function extract_file(predicate::AbstractString, tarball::ArgRead, out::ArgWrite)::Header
625+
parts = filter!(x -> x != ".", splitpath(predicate))
626+
headers = extract_file(tarball, out) do hdr
627+
hdr_parts = filter!(x -> x != ".", splitpath(hdr.path))
628+
hdr.type == :file && parts == hdr_parts
629+
end
630+
if length(headers) != 1
631+
s = length(headers) == 0 ? "no" : "multiple"
632+
throw(ArgumentError("$s files in the tarball matches the filename $predicate"))
633+
end
634+
return headers[1]
635+
end

test/runtests.jl

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,75 @@ end
601601
end
602602
end
603603

604+
@testset "API: extract_file" begin
605+
mktempdir() do dir
606+
open(joinpath(dir, "file.txt"), "w") do io
607+
write(io, "file at the root")
608+
end
609+
dir2 = mkdir(joinpath(dir, "directory"))
610+
open(joinpath(dir2, "file2.txt"), "w") do io
611+
write(io, "file in directory")
612+
end
613+
tarball = Tar.create(dir)
614+
615+
for tar in (()->tarball, ()->open(tarball))
616+
## predicate::String
617+
io = IOBuffer()
618+
for pred in ("file.txt", "./file.txt")
619+
hdr = Tar.extract_file(pred, tar(), io)
620+
@test hdr.path == "file.txt"
621+
@test hdr.size == 16
622+
@test String(take!(io)) == "file at the root"
623+
end
624+
625+
for pred in ("directory/file2.txt", "./directory/file2.txt")
626+
hdr = Tar.extract_file(pred, tar(), io)
627+
@test hdr.path == "directory/file2.txt"
628+
@test hdr.size == 17
629+
@test String(take!(io)) == "file in directory"
630+
end
631+
632+
@test_throws ArgumentError("no files in the tarball matches the filename nope") Tar.extract_file("nope", tar(), io)
633+
634+
# predicate::Function
635+
hdrs = Tar.extract_file(tar(), io) do hdr
636+
hdr.path == "file.txt"
637+
end
638+
@test length(hdrs) == 1
639+
@test hdrs[1].path == "file.txt"
640+
@test hdrs[1].size == 16
641+
@test String(take!(io)) == "file at the root"
642+
643+
hdrs = Tar.extract_file(hdr -> true, tar(), io)
644+
@test length(hdrs) == 2
645+
str = String(take!(io))
646+
@test occursin("file at the root", str)
647+
@test occursin("file in directory", str)
648+
@test sum(h.size for h in hdrs) == sizeof(str)
649+
650+
hdrs = Tar.extract_file(hdr -> true, tar(), io)
651+
@test length(hdrs) == 2
652+
str = String(take!(io))
653+
@test occursin("file at the root", str)
654+
@test occursin("file in directory", str)
655+
@test sum(h.size for h in hdrs) == sizeof(str)
656+
657+
hdrs = Tar.extract_file(hdr -> false, tar(), io)
658+
@test length(hdrs) == 0
659+
@test sizeof(String(take!(io))) == 0
660+
661+
# Non-IO output
662+
mktempdir() do tmpd
663+
o = joinpath(tmpd, "out.data")
664+
Tar.extract_file("file.txt", tar(), o)
665+
@test read(o, String) == "file at the root"
666+
Tar.extract_file(hdr -> hdr.path == "file.txt", tar(), o)
667+
@test read(o, String) == "file at the root"
668+
end
669+
end
670+
end
671+
end
672+
604673
@testset "API: rewrite" begin
605674
# reference standard tarball
606675
reference, hash₁ = make_test_tarball()

0 commit comments

Comments
 (0)