Jittor · Exusial · Nov 20, 2022 · Nov 26, 2022 · Nov 27, 2022
diff --git a/.gitignore b/.gitignore
@@ -6,4 +6,5 @@ python/build
 python/dist
 *.egg-info
 *.pkl
-log*/
+log*/
+*.bat
diff --git a/README.md b/README.md
@@ -13,7 +13,7 @@ JNeRF is an NeRF benchmark based on [Jittor](https://github.com/Jittor/jittor).
 ## Install
 JNeRF environment requirements:
 
-* System: **Linux**(e.g. Ubuntu/CentOS/Arch), **macOS**, or **Windows Subsystem of Linux (WSL)**
+* System: **Linux**(e.g. Ubuntu/CentOS/Arch), **macOS**, **Windows**, or **Windows Subsystem of Linux (WSL)**
 * Python version >= 3.7
 * CPU compiler (require at least one of the following)
     * g++ (>=5.4.0)

diff --git a/contrib/mipnerf/python/jnerf/models/position_encoders/hash_encoder/grid_encode.py b/contrib/mipnerf/python/jnerf/models/position_encoders/hash_encoder/grid_encode.py
@@ -61,7 +61,10 @@ def __init__(self, hash_func_header, aabb_scale=1, n_pos_dims=3, n_features_per_
         self.m_grid_gradient = jt.empty([m_n_params], self.grad_type)
         self.m_stochastic_interpolation = 0
         header_path = os.path.join(os.path.dirname(__file__), 'op_header')
-        proj_options[f"FLAGS: -I{header_path}"]=1
+        if sys.platform == "linux":
+            proj_options[f"FLAGS: -I{header_path}"]=1
+        else:
+            proj_options[f'FLAGS: -I"{header_path}"']=1
 
     def execute(self, x,m_grid):
         self.num_elements=x.shape[0]
@@ -87,7 +90,7 @@ def execute(self, x,m_grid):
 		const uint32_t blocks = div_round_up(num_elements, threads.x);
         extract_position<float,N_POS_DIMS><<<blocks, threads, 0, stream>>>(
 			num_elements,
-		{{in1_p,in1_shape1}},
+		{{in1_p,(size_t)in1_shape1}},
 			m_positions_p
 		);
         static constexpr uint32_t N_THREADS_HASHGRID = 512;
@@ -115,7 +118,7 @@ def execute(self, x,m_grid):
         const dim3 threads_transpose = {{ {self.m_n_levels}, 8, 1 }};
 		const uint32_t blocks_transpose = div_round_up(num_elements, threads_transpose.y);
 
-        PitchedPtr<grad_t> outputs{{ out0_p,out0_shape1 }};
+        PitchedPtr<grad_t> outputs{{ out0_p,(size_t)out0_shape1 }};
 
 	    transpose_encoded_position<vector_t<grad_t,N_FEATURES_PER_LEVEL>><<<blocks_transpose, threads_transpose, 0, stream>>>(
 			num_elements,
@@ -149,7 +152,7 @@ def grad(self, grad_x):
         const unsigned int N_FEATURES_PER_LEVEL={self.N_FEATURES_PER_LEVEL};                 
         cudaStream_t stream=0;
 	    const dim3 threads_transpose ={{  {self.m_n_levels} , 8, 1}};
-        PitchedPtr<grad_t> dL_dy{{ in2_p,in2_shape1 }};
+        PitchedPtr<grad_t> dL_dy{{ in2_p,(size_t)in2_shape1 }};
         cudaMemsetAsync(out0_p, 0, out0->size);    
         const uint32_t blocks_transpose = div_round_up(num_elements, threads_transpose.y);  
         transpose_gradients<vector_t<grad_t, N_FEATURES_PER_LEVEL>><<<blocks_transpose, threads_transpose, 0, stream>>>(

diff --git a/contrib/mipnerf/python/jnerf/models/position_encoders/sh_encoder/sh_encoder.py b/contrib/mipnerf/python/jnerf/models/position_encoders/sh_encoder/sh_encoder.py
@@ -2,6 +2,7 @@
 import jittor as jt
 from jittor import Function
 import numpy as np
+import sys
 from jnerf.ops.code_ops.global_vars import global_headers,proj_options
 from jnerf.utils.config import get_cfg
 from jnerf.utils.registry import ENCODERS
@@ -20,7 +21,10 @@ def __init__(self) :
         else:
             self.grad_type='float32'
         header_path = os.path.join(os.path.dirname(__file__), 'op_header')
-        proj_options[f"FLAGS: -I{header_path}"]=1
+        if sys.platform == "linux":
+            proj_options[f"FLAGS: -I{header_path}"]=1
+        else:
+            proj_options[f'FLAGS: -I"{header_path}"']=1
         self.out_dim=self.m_n_padded_output_dims
 
     def execute(self,x) :
@@ -37,8 +41,8 @@ def execute(self,x) :
 
         cudaStream_t stream=0;
 
-        PitchedPtr<const float> inputs={{in0_p,in0_shape1}};
-		PitchedPtr<grad_t> outputs={{out_p,out_shape1}};
+        PitchedPtr<const float> inputs={{in0_p,(size_t)in0_shape1}};
+		PitchedPtr<grad_t> outputs={{out_p,(size_t)out_shape1}};
 		float* dy_dx = nullptr;
         linear_kernel(kernel_sh<grad_t>, 0, stream,
 			num_elements,

diff --git a/contrib/mipnerf/python/jnerf/models/samplers/density_grid_sampler/density_grid_sampler.py b/contrib/mipnerf/python/jnerf/models/samplers/density_grid_sampler/density_grid_sampler.py
@@ -91,7 +91,10 @@ def __init__(self, update_den_freq=16, update_block_size=5000000):
         self.dataset_ray_data = False  # 数据集是否包含光线信息
 
         header_path = os.path.join(os.path.dirname(__file__), 'op_header')
-        proj_options[f"FLAGS: -I{header_path}"]=1
+        if sys.platform == "linux":
+            proj_options[f"FLAGS: -I{header_path}"]=1
+        else:
+            proj_options[f'FLAGS: -I"{header_path}"']=1
 
         self.density_grad_header = f"""
         inline constexpr __device__ uint32_t NERF_GRIDSIZE() {{ return {self.NERF_GRIDSIZE}; }} // size of the density/occupancy grid.

diff --git a/contrib/mipnerf/python/jnerf/ops/code_ops/global_vars.py b/contrib/mipnerf/python/jnerf/ops/code_ops/global_vars.py
@@ -1,27 +1,53 @@
 import os
+import sys
 import jittor as jt
 jt.flags.use_cuda = 1
 
-global_headers = """
+export_gloabl = ''
+import_global = 'extern'
+if sys.platform == "win32":
+    export_gloabl = '__declspec(dllexport)'
+    import_global = '__declspec(dllimport) extern'
+
+global_headers = f"""
 #include "pcg32.h"
-namespace jittor {
-extern int global_var1;
-extern pcg32 rng;
-}
+namespace jittor {{
+EXTERN_LIB int global_var1;
+EXTERN_LIB pcg32 rng;
+}}
 """
 
-global_src = """
-namespace jittor {
-int global_var1 = 123;
-pcg32 rng{1337};
-}
+global_decl_headers = f"""
+#include "pcg32.h"
+namespace jittor {{
+{export_gloabl} int global_var1;
+{export_gloabl} pcg32 rng;
+}}
+"""
+
+global_src = f"""
+#include "pcg32.h"
+namespace jittor {{
+{export_gloabl} int global_var1 = 123;
+{export_gloabl} pcg32 rng{{1337}};
+}}
 """
 
 proj_path = os.path.join(os.path.dirname(__file__), '..', 'op_include')
-proj_options = { f"FLAGS: -I{proj_path}/eigen -I{proj_path}/include -I{proj_path}/pcg32 -I{proj_path}/../op_header -DGLOBAL_VAR --extended-lambda --expt-relaxed-constexpr": 1 }
+if sys.platform == "linux":
+    proj_options = { f"FLAGS: -I{proj_path}/eigen -I{proj_path}/include -I{proj_path}/pcg32 -I{proj_path}/../op_header -DGLOBAL_VAR --extended-lambda --expt-relaxed-constexpr": 1 }
+else:
+    proj_options = { f'FLAGS: -I"{proj_path}/eigen" -I"{proj_path}/include" -I"{proj_path}/pcg32" -I"{proj_path}/../op_header" -DGLOBAL_VAR --extended-lambda --expt-relaxed-constexpr': 1 }
+
+jt.profiler.start()
 gv = jt.code([1], int, 
-    cuda_header=global_headers+global_src, 
+    cuda_header=global_src, 
     cuda_src="""
 """)
 gv.compile_options = proj_options
 gv.sync()
+jt.profiler.stop()
+
+if os.name == "nt":
+    dll_name = jt.profiler.report()[-1][-10].replace(".cc", "")
+    proj_options[f'FLAGS: -l{dll_name} '] = 1
diff --git a/python/jnerf/__init__.py b/python/jnerf/__init__.py
@@ -5,7 +5,7 @@
 dirname = os.path.dirname(__file__)
 LOG.i(f"JNeRF({__version__}) at {dirname}")
 import sys
-assert sys.platform == "linux", "Windows/MacOS is not supported yet, everyone is welcome to contribute to this"
+assert sys.platform == "linux" or sys.platform == "win32" # "MacOS is not supported yet, everyone is welcome to contribute to this"
 
 sp_char = ' "\''
 for char in sp_char:

diff --git a/python/jnerf/dataset/dataset.py b/python/jnerf/dataset/dataset.py
@@ -113,7 +113,7 @@ def load_data(self,root_dir=None):
             matrix=np.array(frame['transform_matrix'],np.float32)[:-1, :]
             self.transforms_gpu.append(
                             self.matrix_nerf2ngp(matrix, self.scale, self.offset))
-                           
+
         self.resolution=[self.W,self.H]
         self.resolution_gpu=jt.array(self.resolution)
         metadata=np.empty([11],np.float32)

diff --git a/python/jnerf/models/position_encoders/hash_encoder/grid_encode.py b/python/jnerf/models/position_encoders/hash_encoder/grid_encode.py
@@ -61,7 +61,10 @@ def __init__(self, hash_func_header, aabb_scale=1, n_pos_dims=3, n_features_per_
         self.m_grid_gradient = jt.empty([m_n_params], self.grad_type)
         self.m_stochastic_interpolation = 0
         header_path = os.path.join(os.path.dirname(__file__), 'op_header')
-        proj_options[f"FLAGS: -I{header_path}"]=1
+        if sys.platform == "linux":
+            proj_options[f"FLAGS: -I{header_path}"]=1
+        else:
+            proj_options[f'FLAGS: -I"{header_path}"']=1
 
     def execute(self, x,m_grid):
         self.num_elements=x.shape[0]
@@ -87,7 +90,7 @@ def execute(self, x,m_grid):
 		const uint32_t blocks = div_round_up(num_elements, threads.x);
         extract_position<float,N_POS_DIMS><<<blocks, threads, 0, stream>>>(
 			num_elements,
-		{{in1_p,in1_shape1}},
+		{{in1_p,(size_t)in1_shape1}},
 			m_positions_p
 		);
         static constexpr uint32_t N_THREADS_HASHGRID = 512;
@@ -115,7 +118,7 @@ def execute(self, x,m_grid):
         const dim3 threads_transpose = {{ {self.m_n_levels}, 8, 1 }};
 		const uint32_t blocks_transpose = div_round_up(num_elements, threads_transpose.y);
 
-        PitchedPtr<grad_t> outputs{{ out0_p,out0_shape1 }};
+        PitchedPtr<grad_t> outputs{{ out0_p,(size_t)out0_shape1 }};
 
 	    transpose_encoded_position<vector_t<grad_t,N_FEATURES_PER_LEVEL>><<<blocks_transpose, threads_transpose, 0, stream>>>(
 			num_elements,
@@ -149,7 +152,7 @@ def grad(self, grad_x):
         const unsigned int N_FEATURES_PER_LEVEL={self.N_FEATURES_PER_LEVEL};                 
         cudaStream_t stream=0;
 	    const dim3 threads_transpose ={{  {self.m_n_levels} , 8, 1}};
-        PitchedPtr<grad_t> dL_dy{{ in2_p,in2_shape1 }};
+        PitchedPtr<grad_t> dL_dy{{ in2_p,(size_t)in2_shape1 }};
         cudaMemsetAsync(out0_p, 0, out0->size);    
         const uint32_t blocks_transpose = div_round_up(num_elements, threads_transpose.y);  
         transpose_gradients<vector_t<grad_t, N_FEATURES_PER_LEVEL>><<<blocks_transpose, threads_transpose, 0, stream>>>(

diff --git a/python/jnerf/models/position_encoders/sh_encoder/sh_encoder.py b/python/jnerf/models/position_encoders/sh_encoder/sh_encoder.py
@@ -2,6 +2,7 @@
 import jittor as jt
 from jittor import Function
 import numpy as np
+import sys
 from jnerf.ops.code_ops.global_vars import global_headers,proj_options
 from jnerf.utils.config import get_cfg
 from jnerf.utils.registry import ENCODERS
@@ -20,7 +21,10 @@ def __init__(self) :
         else:
             self.grad_type='float32'
         header_path = os.path.join(os.path.dirname(__file__), 'op_header')
-        proj_options[f"FLAGS: -I{header_path}"]=1
+        if sys.platform == "linux":
+            proj_options[f"FLAGS: -I{header_path}"]=1
+        else:
+            proj_options[f'FLAGS: -I"{header_path}"']=1
         self.out_dim=self.m_n_padded_output_dims
 
     def execute(self,x) :
@@ -37,8 +41,8 @@ def execute(self,x) :
 
         cudaStream_t stream=0;
 
-        PitchedPtr<const float> inputs={{in0_p,in0_shape1}};
-		PitchedPtr<grad_t> outputs={{out_p,out_shape1}};
+        PitchedPtr<const float> inputs={{in0_p,(size_t)in0_shape1}};
+		PitchedPtr<grad_t> outputs={{out_p,(size_t)out_shape1}};
 		float* dy_dx = nullptr;
         linear_kernel(kernel_sh<grad_t>, 0, stream,
 			num_elements,

diff --git a/python/jnerf/models/samplers/density_grid_sampler/density_grid_sampler.py b/python/jnerf/models/samplers/density_grid_sampler/density_grid_sampler.py
@@ -1,5 +1,6 @@
 import os
 import jittor as jt
+import sys
 from jittor import nn
 from .ema_grid_samples_nerf import ema_grid_samples_nerf
 from .generate_grid_samples_nerf_nonuniform import generate_grid_samples_nerf_nonuniform
@@ -91,7 +92,10 @@ def __init__(self, update_den_freq=16, update_block_size=5000000):
         self.dataset_ray_data = False  # 数据集是否包含光线信息
 
         header_path = os.path.join(os.path.dirname(__file__), 'op_header')
-        proj_options[f"FLAGS: -I{header_path}"]=1
+        if sys.platform == "linux":
+            proj_options[f"FLAGS: -I{header_path}"]=1
+        else:
+            proj_options[f'FLAGS: -I"{header_path}"']=1
 
         self.density_grad_header = f"""
         inline constexpr __device__ uint32_t NERF_GRIDSIZE() {{ return {self.NERF_GRIDSIZE}; }} // size of the density/occupancy grid.

diff --git a/python/jnerf/models/samplers/density_grid_sampler/generate_grid_samples_nerf_nonuniform.py b/python/jnerf/models/samplers/density_grid_sampler/generate_grid_samples_nerf_nonuniform.py
@@ -46,11 +46,8 @@ def execute(self, density_grid, n_elements, density_grid_ema_step, max_cascade,
 
 
         """)
+        # print(proj_options)
         output[0].compile_options = proj_options
         output[0].sync()
         output[1].sync()
         return output
-
-
-
-
diff --git a/python/jnerf/ops/code_ops/fully_fused_mlp.py b/python/jnerf/ops/code_ops/fully_fused_mlp.py
@@ -21,8 +21,8 @@ def __init__(self, weights, check_mid="0", output_activation="Activation::None")
         self.width = 0
         self.output_intermediate = None
         con_weights = []
-        self.code_path = pathlib.Path(__file__+"/../op_header").resolve()
-        self.so_name = os.path.join(pathlib.Path(__file__+"/../op_header").resolve(), "fully_fused_mlp_function.o")
+        self.code_path = pathlib.Path(__file__+"/../").resolve()
+        self.so_name = os.path.join(pathlib.Path(__file__+"/../op_header").resolve(), "fully_fused_mlp_function.cc")
         for i in range(len(weights)):
             if i == 0:
                 self.weight_shape0 = weights[0].shape[0]
@@ -81,7 +81,7 @@ def execute(self, a, con_weights):
         else:
             self.padded_input = self.input
         self.outputs, self.output_intermediate = jt.code([(self.padded_input.shape[0], 16), (self.padded_input.shape[0] * (len(self.weights) - 1), self.width)], [a.dtype, a.dtype], [self.padded_input, con_weights], cuda_header=cuda_header, cuda_src=cuda_src)
-        self.outputs.compile_options = {f"FLAGS: -I{self.code_path} -Xlinker {self.so_name} ":1}
+        self.outputs.compile_options = {f"FLAGS: -I{self.code_path}":1}
         self.con_weights = con_weights
         return self.outputs[:self.input.shape[0]]
 
@@ -115,7 +115,7 @@ def grad(self, grads):
         );
         '''
         output, grad_temps = jt.code([(self.padded_input.shape[0], self.input.shape[1]), ((len(self.weights)-1) * self.padded_input.shape[0],  self.width)], [self.input.dtype, self.input.dtype], [grads.transpose(), self.con_weights, self.output_intermediate], cuda_header=cuda_header, cuda_src=cuda_src)
-        output.compile_options = {f"FLAGS: -I{self.code_path} -Xlinker {self.so_name} ":1}
+        output.compile_options = {f"FLAGS: -I{self.code_path}":1}
         if self.check_mid == "1":
             self.grad_temps = grad_temps
         if not need_last:
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,4 +6,5 @@ python/build @@
     python/dist
     *.egg-info
     *.pkl
-    log*/
+    log*/
+    *.bat