Initial commit

brownvc · Feb 8, 2022 · b5d0614 · b5d0614
commit b5d0614
Show file tree

Hide file tree

Showing 14 changed files with 2,609 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,8 @@
+.DS_Store
+**/__pycache__/
+env/
+*.pyc
+sample_images/
+midas-net.pt
+bg-images/
+**/.vscode/
diff --git a/README.md b/README.md
@@ -0,0 +1,64 @@
+### GaussiGAN - Interactive Flask App
+
+An interactive Flask app for the paper [GaussiGAN: Controllable Image Synthesis with 3D Gaussians from Unposed Silhouettes](https://arxiv.org/abs/2106.13215)
+
+[[Project Page]](https://visual.cs.brown.edu/projects/gaussigan-webpage/) [[Paper]](https://arxiv.org/ftp/arxiv/papers/2106/2106.13215.pdf)  [[Main GitHub]](https://github.com/AlamiMejjati/GaussiGAN)
+
+<img src="docs/interaction_4x.gif" width="800"/>
+
+
+## Setup
+
+#### Environment
+
+A conda environment YAML (tested on Ubuntu 20.04.3 LTS) is provided. To use it, first run `conda env create -f environment.yml`, then activate it by running `conda activate gaussigan_flask`.
+
+If building your own environment, the following packages must be installed:
+
+- Python 3.7
+- Numpy
+- TensorFlow 1.15
+- Tensorpack
+- OpenCV
+- Flask
+- Pillow
+
+GPU is not a requirement for running the application, and the CPU-only installation of TensorFlow will suffice.
+
+#### Model weights
+
+Model weights can be downloaded from the "Releases" page of this GitHub repo. Place each unzipped directory in `gaussigan_gui/static/models/` without modifying the unzipped directory's name. The app will search this `models/` directory for weights and list them in the "Select model" drop-down list in the GUI.
+
+## Usage
+
+### Starting the Flask app
+
+Within your environment, and from the base directory of the repo, run `flask run`. This will print a message that includes the line:
+
+```
+* Running on <URL> (Press CTRL+C to quit)
+```
+
+Enter the printed URL into your browser to open the app.
+
+### Interaction
+
+On the left-hand side of the GUI, there are two interaction modes which the user may switch between using the switch button element:
+
+##### "Draw bounding box" mode
+
+- Click and drag on the background image to draw a bounding box.
+- Click and drag within a drawn bounding box to translate it.
+- Double-click the background image to draw a bounding box that maximally fits the center of the background image.
+- Double-click within a drawn bounding box to erase it.
+
+##### "Edit Gaussians" mode
+
+- Click and drag Gaussians to translate them along an xy-plane parallel to the camera plane.
+- Hold <kbd>Shift</kbd> while dragging a Gaussian up and down to translate it along the z-axis relative to the camera (i.e., its distance from the camera).
+- Hold <kbd>Alt</kbd> or <kbd>Option</kbd> to enable arcball rotation of Gaussians.
+- Hold <kbd>1</kbd>, <kbd>2</kbd>, or <kbd>3</kbd> while dragging a Gaussian to modify its scale along its three principle axes respectively. In other words, these modes allow for scaling of the eigenvalues in our eigendecomposition of the Gaussian covariance matrix.
+
+##### Other key press interactions:
+
+- Press <kbd>Enter</kbd> or <kbd>Return</kbd> to generate the mask and texture of the current Gaussian configuration.
diff --git a/app.py b/app.py
@@ -0,0 +1,2 @@
+from gaussigan_gui import app
+app.run()
diff --git a/docs/interaction_4x.gif b/docs/interaction_4x.gif
diff --git a/environment.yml b/environment.yml
@@ -0,0 +1,13 @@
+name: gaussigan_flask
+channels:
+  - hcc
+  - defaults
+dependencies:
+  - python=3.7.11
+  - pip=21.2.2
+  - flask=1.1.2
+  - pillow=8.4.0
+  - tensorflow=1.15.0
+  - hcc::tensorpack=0.11
+  - pip:
+    - opencv-python
diff --git a/gaussigan_gui/__init__.py b/gaussigan_gui/__init__.py
@@ -0,0 +1,213 @@
+"""
+Initialization for Flask app.
+"""
+
+import time
+import os
+import glob
+
+import numpy as np
+from flask import Flask, render_template, request, jsonify, \
+        redirect, url_for, send_from_directory
+from PIL import Image
+import gaussigan_gui.model_runner as model_runner
+import base64
+from io import BytesIO
+import cv2
+
+app = Flask(__name__)
+
+@app.route('/')
+def main():
+    """
+    Setup the webpage.
+    """
+
+    img_shape = 256
+    bg_img_path = "../static/defaults/bg.jpg"
+    bg_img = model_runner.bg_img
+    if bg_img is None:
+      bg_img = np.array(Image.open("gaussigan_gui/static/defaults/bg.jpg"))
+      bg_img = model_runner.update_bg_image(bg_img, 500, 400)
+    bg_img_width = bg_img.shape[1]
+    bg_img_height = bg_img.shape[0]
+    model_list = glob.glob("gaussigan_gui/static/models/*/")
+
+    num_gaussians = model_runner.nb_landmarks
+    if num_gaussians is None:
+      num_gaussians = model_runner.load_model(model_list[0])
+
+    for i, model in enumerate(model_list):
+        model_list[i] = model.split("/")[-2]
+
+    model_list_string = "["
+    for i, model in enumerate(model_list):
+        model_list_string += "\""
+        model_list_string += model
+        model_list_string += "\""
+        if i != len(model_list) - 1:
+            model_list_string += ", "
+    model_list_string += "]"
+
+    return render_template(
+        'interactive.html.jinja',
+        img_shape=img_shape,
+        bg_img_height=str(bg_img_height),
+        bg_img_width=str(bg_img_width),
+        bg_img_path=bg_img_path,
+        num_gaussians=num_gaussians,
+        model_list=model_list)
+
+
+@app.route('/api/run_gaussians', methods=['POST'])
+def run_gaussians():
+    """
+    Sample Gaussian landmark parameters.
+    """
+
+    mu3d, sigma3d, theta3d = model_runner.sample_l()
+
+    return jsonify({
+        "mu3d": mu3d.tolist(),
+        "sigma3d": sigma3d.tolist(),
+        "theta3d": theta3d
+    })
+
+
+@app.route('/api/run_mask', methods=['POST'])
+def run_mask():
+    """
+    Run generative model to create mask image.
+    """
+
+    request_content = request.get_json()
+    mu2d = np.array(request_content["gaussian_mu2ds"])
+    sigma2d = np.array(request_content["gaussian_sigma2ds"])
+    mask = model_runner.generate_mask(mu2d, sigma2d)
+    return jsonify({"mask_img": encode_img(mask)});
+
+
+@app.route('/api/sample_z', methods=['POST'])
+def sample_z():
+    """
+    Run generative model to create texture image.
+    """
+
+    request_content = request.get_json()
+    bbx_region = np.array(request_content["bbx_region"])
+    mu2d = np.array(request_content["gaussian_mu2ds"])
+    sigma2d = np.array(request_content["gaussian_sigma2ds"])
+
+    model_runner.sample_z()
+
+    texture_img = model_runner.generate_texture(
+            bbx_region, mu2d, sigma2d)
+
+    return jsonify({"texture_img": encode_img(texture_img)})
+
+
+@app.route('/api/run_texture', methods=['POST'])
+def run_texture():
+    """
+    Run generative model to create texture image.
+    """
+
+    request_content = request.get_json()
+    bbx_region = np.array(request_content["bbx_region"])
+    mu2d = np.array(request_content["gaussian_mu2ds"])
+    sigma2d = np.array(request_content["gaussian_sigma2ds"])
+
+    texture_img = model_runner.generate_texture(
+            bbx_region, mu2d, sigma2d)
+
+    return jsonify({"texture_img": encode_img(texture_img)})
+
+
+@app.route('/api/run_mask_and_texture', methods=['POST'])
+def run_mask_and_texture():
+    """
+    Run generative models to create mask and texture images.
+    """
+
+    request_content = request.get_json()
+    bbx_region = np.array(request_content["bbx_region"])
+    mu2d = np.array(request_content["gaussian_mu2ds"])
+    sigma2d = np.array(request_content["gaussian_sigma2ds"])
+
+    mask, texture = \
+        model_runner.generate_mask_and_texture(bbx_region, mu2d, sigma2d)
+
+    return jsonify({
+        "mask_img": encode_img(mask),
+        "texture_img": encode_img(texture)
+    })
+
+@app.route('/api/run_all', methods=['POST'])
+def run_all():
+    """
+    Sample Gaussians and generate mask/texture images.
+    """
+
+    request_content = request.get_json()
+    bbx_region = np.array(request_content["bbx_region"])
+
+    return_vals = model_runner.generate_all(bbx_region)
+
+    return jsonify({
+        "mu3d": return_vals[0].tolist(),
+        "sigma3d": return_vals[1].tolist(),
+        "theta3d": return_vals[2],
+        "mask_img": encode_img(return_vals[3]),
+        "texture_img": encode_img(return_vals[4])
+    })
+
+@app.route('/api/update_bg', methods=['POST'])
+def update_bg():
+    image_file = request.files['bg_file'].read()
+    np_file = np.fromstring(image_file, np.uint8)
+    img = cv2.imdecode(np_file, cv2.IMREAD_COLOR)
+    img = img[..., ::-1]
+
+    resized_img = model_runner.update_bg_image(img, 500, 400)
+
+    return jsonify({"resized_img": encode_img(resized_img)})
+
+@app.route('/api/get_bg', methods=['GET'])
+def get_bg():
+    return jsonify({"bg_img": encode_img(model_runner.get_bg_image())})
+
+@app.route('/api/load_model', methods=['POST'])
+def load_model():
+    model_name = request.get_json()['model_name']
+
+    model_path = os.path.join("gaussigan_gui/static/models/", model_name)
+    if os.path.isdir(model_path):
+        num_gaussians = model_runner.load_model("gaussigan_gui/static/models/" + model_name)
+    else:
+        return jsonify({"successful": False})
+
+    return jsonify({"successful": True, "num_gaussians": num_gaussians})
+
+@app.route('/display/<filename>', methods=['GET'])
+def display_image(filename):
+    return redirect(url_for('static', filename=filename), code=301)
+
+
+@app.route('/favicon.ico')
+def favicon():
+    return send_from_directory(
+        os.path.join(app.root_path, 'static'),
+        'defaults/favicon.ico')
+
+
+def encode_img(img):
+    """
+    Encode numpy image to base64.
+    """
+
+    if np.max(img) <= 1.:
+        img *= 255.
+    gauss_maps_img = Image.fromarray(img.astype(np.uint8))
+    buffer = BytesIO()
+    gauss_maps_img.save(buffer, format="PNG")
+    return base64.b64encode(buffer.getvalue()).decode('utf-8')
diff --git a/gaussigan_gui/dataloader.py b/gaussigan_gui/dataloader.py
@@ -0,0 +1,73 @@
+import cv2
+from tensorpack import *
+import numpy as np
+import os
+import pickle
+import random
+
+
+def load_mask(mask_file, shape):
+    """
+    Args:
+        files (list): list of file paths.
+        channel (int): 1 or 3. Will convert grayscale to RGB images if channel==3.
+            Will produce (h, w, 1) array if channel==1.
+        resize (tuple): int or (h, w) tuple. If given, resize the image.
+    """
+    m = cv2.imread(mask_file, cv2.IMREAD_GRAYSCALE)
+    box = find_bbx(m)
+    m = m[box[0]:box[2], box[1]:box[3]]
+    maskj = cv2.resize(m, (shape, shape))
+    maskj = np.expand_dims(maskj, axis=-1)
+
+    return maskj[None, :, :, :]
+
+def find_bbx(maskj):
+
+    maskj = np.expand_dims(maskj, axis=-1)
+    box = np.array([0, 0, 0, 0])
+
+    # Compute Bbx coordinates
+    margin = 3
+    xs = np.nonzero(np.sum(maskj, axis=0))[0]
+    ys = np.nonzero(np.sum(maskj, axis=1))[0]
+    box[1] = xs.min() - margin
+    box[3] = xs.max() + margin
+    box[0] = 0
+    box[2] = maskj.shape[0]
+
+    if box[0] < 0: box[0] = 0
+    if box[1] < 0: box[1] = 0
+
+    h = box[2] - box[0]
+    w = box[3] - box[1]
+    if h < w:
+        diff = w - h
+        half = int(diff / 2)
+        box[0] -= half
+        if box[0] < 0:
+            box[2] -= box[0]
+            box[0] = 0
+        else:
+            box[2] += diff - half
+
+        if box[2] > maskj.shape[0]:
+            box[2] = maskj.shape[0]
+    else:
+        diff = h - w
+        half = int(diff / 2)
+        box[1] -= half
+        if box[1] < 0:
+            box[3] -= box[1]
+            box[1] = 0
+        else:
+            box[3] += diff - half
+        if box[3] > maskj.shape[1]:
+            box[3] = maskj.shape[1]
+
+    if box[3] == box[1]:
+        box[3] += 1
+    if box[0] == box[2]:
+        box[2] += 1
+
+    return box