Skip to content

Commit

Permalink
indexdir uses md5
Browse files Browse the repository at this point in the history
  • Loading branch information
nicholas-leonard committed Feb 10, 2015
1 parent 994b158 commit 33daf63
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 15 deletions.
35 changes: 21 additions & 14 deletions indexdir.lua
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,10 @@
-- e.g. fileList = torch.indexdir("/path/to/files/", 'png', true)
-- index the directory by creating a chartensor of files paths.
-- returns an object with can be used to efficiently list files in dir
function paths.indexdir(path, extensionList, use_cache)
function paths.indexdir(pathList, extensionList, use_cache)
extensionList = extensionList or {'jpg', 'png','JPG','PNG','JPEG', 'ppm', 'PPM', 'bmp', 'BMP'}
extensionList = (torch.type(extensionList) == 'string') and {extensionList} or extensionList

-- repository name makes cache file unique
local findFile = path:gsub('/', '-th-') .. '---' .. table.concat(extensionList) .. '.txt'
findFile = paths.concat(paths.dirname(os.tmpname()), findFile)
-- find the image path names
local fileList = torch.CharTensor() -- path to each image in dataset

-- define command-line tools, try your best to maintain OSX compatibility
local wc = 'wc'
local cut = 'cut'
Expand All @@ -22,19 +16,32 @@ function paths.indexdir(path, extensionList, use_cache)
cut = 'gcut'
find = 'gfind'
end

local fileList = torch.CharTensor() -- path to each image in dataset
pathList = (torch.type(pathList) == 'string') and {pathList} or pathList
-- repository name makes cache file unique
local unique = table.concat(paths)..table.concat(extensionList)
-- use hash to make string shorter
local findFile = torch.md5.sumhexa(unique)
findFile = paths.concat(paths.dirname(os.tmpname()), findFile)

if not (use_cache and paths.filep(findFile)) then
if paths.filep(findFile) then
os.execute("rm "..findFile)
end

if not (use_cache and paths.filep(findFile)) then
-- Options for the GNU find command
local findOptions = ' -iname "*.' .. extensionList[1] .. '"'
for i=2,#extensionList do
findOptions = findOptions .. ' -o -iname "*.' .. extensionList[i] .. '"'
end

-- run "find" on each class directory, and concatenate all
-- those filenames into a single file containing all file paths
local command = find .. ' "' .. path .. '" ' .. findOptions .. ' > "' .. findFile .. '"'

os.execute(command)

for i, path in ipairs(pathList) do
-- run "find" on each directory, and concatenate all
-- those filenames into a single file containing all file paths
local command = find .. ' "' .. path .. '" ' .. findOptions .. ' >> "' .. findFile .. '"'
os.execute(command)
end
end

-- load the large concatenated list of file paths to fileList
Expand Down
2 changes: 1 addition & 1 deletion init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ torch.include('torchx', 'indexdir.lua')

torch.include('torchx', 'test.lua')

md5 = require'torchx.md5'
torch.md5 = require 'torchx.md5'

0 comments on commit 33daf63

Please sign in to comment.