Skip to content

Commit

Permalink
Adds image extraction code
Browse files Browse the repository at this point in the history
  • Loading branch information
abhshkdz committed Apr 27, 2017
1 parent 3c0cad7 commit bf0ba7d
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 3 deletions.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,12 @@ If you just want to run the model on your own images, download links to pretrain

### Extract image features

TODO
Since we don't finetune the CNN, training is significantly faster if image features are pre-extracted. We use image features from VGG-19. The model can be downloaded and features extracted using:

```
sh scripts/download_vgg19.sh
th prepro_img.lua -image_root /path/to/coco/images/ -gpuid 0
```

### Preprocess VQA dataset

Expand Down
4 changes: 2 additions & 2 deletions prepro.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,8 @@ def main(params):
parser.add_argument('--input_test_json', required=True, help='input json file to process into hdf5')
parser.add_argument('--num_ans', required=True, type=int, help='number of top answers for the final classifications.')

parser.add_argument('--output_json', default='params.json', help='output json file')
parser.add_argument('--output_h5', default='qa.h5', help='output h5 file')
parser.add_argument('--output_json', default='data/params.json', help='output json file')
parser.add_argument('--output_h5', default='data/qa.h5', help='output h5 file')

# options
parser.add_argument('--max_length', default=26, type=int, help='max length of a caption, in number of words. captions longer than this get clipped.')
Expand Down
123 changes: 123 additions & 0 deletions prepro_img.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
require 'nn'
require 'xlua'
require 'math'
require 'hdf5'
require 'image'
require 'loadcaffe'
cjson = require('cjson')

-------------------------------------------------------------------------------
-- Input arguments and options
-------------------------------------------------------------------------------
cmd = torch.CmdLine()
cmd:text()
cmd:text('Options')
cmd:option('-input_json','data/params.json','path to the json file containing vocab and answers')
cmd:option('-image_root','/path/to/coco/images/','path to the image root')
cmd:option('-cnn_proto', 'models/vgg19/VGG_ILSVRC_19_layers_deploy.prototxt', 'path to the cnn prototxt')
cmd:option('-cnn_model', 'models/vgg19/VGG_ILSVRC_19_layers.caffemodel', 'path to the cnn model')
cmd:option('-batch_size', 20, 'batch_size')

cmd:option('-out_name_train', 'data/img_train.h5', 'output name')
cmd:option('-out_name_test', 'data/img_test.h5', 'output name')
cmd:option('-gpuid', 0, 'which gpu to use. -1 = use CPU')
cmd:option('-backend', 'nn', 'nn|cudnn')

opt = cmd:parse(arg)
print(opt)

net = loadcaffe.load(opt.cnn_proto, opt.cnn_model, opt.backend);
for i = 1, 9 do
net:remove()
end
print(net)

if opt.gpuid >= 0 then
require 'cutorch'
require 'cunn'
cutorch.setDevice(opt.gpuid+1)
net = net:cuda()
end
net:evaluate()

function loadim(imname)
im = image.load(imname)
im = image.scale(im,448,448)
if im:size(1) == 1 then
im2=torch.cat(im,im,1)
im2=torch.cat(im2,im,1)
im=im2
elseif im:size(1) == 4 then
im=im[{{1,3},{},{}}]
end
im = im * 255;
im2 = im:clone()
im2[{{3},{},{}}] = im[{{1},{},{}}]-123.68
im2[{{2},{},{}}] = im[{{2},{},{}}]-116.779
im2[{{1},{},{}}] = im[{{3},{},{}}]-103.939
return im2
end

local image_root = opt.image_root

local file = io.open(opt.input_json, 'r')
local text = file:read()
file:close()
json_file = cjson.decode(text)

local train_list={}
for i,imname in pairs(json_file['unique_img_train']) do
table.insert(train_list, image_root .. imname)
end

local test_list={}
for i,imname in pairs(json_file['unique_img_test']) do
table.insert(test_list, image_root .. imname)
end

local batch_size = opt.batch_size
local sz=#train_list
local feat_train=torch.FloatTensor(sz, 512, 14, 14)
print(string.format('processing %d images...',sz))
for i = 1, sz, batch_size do
xlua.progress(i, sz)
r = math.min(sz, i+batch_size-1)
ims = torch.DoubleTensor(r-i+1, 3, 448,448)
for j = 1, r-i+1 do
ims[j] = loadim(train_list[i+j-1])
end
if opt.gpuid >= 0 then
ims = ims:cuda()
end
net:forward(ims)
feat_train[{{i,r}, {}}] = net.output:float()
collectgarbage()
end

local train_h5_file = hdf5.open(opt.out_name_train, 'w')
train_h5_file:write('/images_train', feat_train)
train_h5_file:close()

print('DataLoader loading h5 file: ', 'data_train')
local sz = #test_list
local feat_test = torch.FloatTensor(sz, 512, 14, 14)
print(string.format('processing %d images...',sz))
for i = 1, sz, batch_size do
xlua.progress(i, sz)
r = math.min(sz, i + batch_size-1)
ims = torch.DoubleTensor(r-i+1, 3, 448, 448)
for j = 1, r-i+1 do
ims[j] = loadim(test_list[i+j-1])
end
if opt.gpuid >= 0 then
ims = ims:cuda()
end
net:forward(ims)
feat_test[{{i,r}, {}}] = net.output:float()
collectgarbage()
end

local test_h5_file = hdf5.open(opt.out_name_test, 'w')
test_h5_file:write('/images_test', feat_test)
test_h5_file:close()

6 changes: 6 additions & 0 deletions scripts/download_vgg19.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/sh
mkdir -p models/vgg19
cd models/vgg19
wget https://gist.githubusercontent.com/ksimonyan/3785162f95cd2d5fee77/raw/f43eeefc869d646b449aa6ce66f87bf987a1c9b5/VGG_ILSVRC_19_layers_deploy.prototxt
wget http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_19_layers.caffemodel
cd ../..

0 comments on commit bf0ba7d

Please sign in to comment.