jit_prepare for array op

cjld · cjld · commit 8d64d98a356e · 2020-10-12T18:01:14.000+08:00
diff --git a/python/jittor/__init__.py b/python/jittor/__init__.py
@@ -7,7 +7,7 @@
 # This file is subject to the terms and conditions defined in
 # file 'LICENSE.txt', which is part of this source code package.
 # ***************************************************************
-__version__ = '1.2.0.0'
+__version__ = '1.2.0.1'
 from . import lock
 with lock.lock_scope():
     from . import compiler
diff --git a/python/jittor/nn.py b/python/jittor/nn.py
@@ -342,62 +342,45 @@ def execute(self, x):
         
 class BatchNorm1d(Module):
     def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=None, is_train=True, sync=True):
-        assert affine == None
         self.sync = sync
         self.num_features = num_features
         self.is_train = is_train
         self.eps = eps
         self.momentum = momentum
-        self.weight = init.constant((num_features,), "float32", 1.0)
-        self.bias = init.constant((num_features,), "float32", 0.0)
+        self.affine = affine
+        if affine:
+            self.weight = init.constant((num_features,), "float32", 1.0)
+            self.bias = init.constant((num_features,), "float32", 0.0)
         self.running_mean = init.constant((num_features,), "float32", 0.0).stop_grad()
         self.running_var = init.constant((num_features,), "float32", 1.0).stop_grad()
 
     def execute(self, x):
         if len(x.shape) == 3:
-            if self.is_train:
-                xmean = jt.mean(x, dims=[0, 2], keepdims=1)
-                x2mean = jt.mean(x*x, dims=[0, 2], keepdims=1)
-
-                if self.sync and jt.in_mpi:
-                    xmean = xmean.mpi_all_reduce("mean")
-                    x2mean = x2mean.mpi_all_reduce("mean")
-
-                xvar = x2mean-xmean*xmean
-                norm_x = (x-xmean)/jt.sqrt(xvar+self.eps)
-                self.running_mean.update(self.running_mean + 
-                    (xmean.sum([0, 2])-self.running_mean)*self.momentum)
-                self.running_var.update(self.running_var + 
-                    (xvar.sum([0, 2])-self.running_var)*self.momentum)
-            else:
-                running_mean = self.running_mean.broadcast(x, [0, 2])
-                running_var = self.running_var.broadcast(x, [0, 2])
-                norm_x = (x-running_mean)/jt.sqrt(running_var+self.eps)
-            w = self.weight.broadcast(x, [0, 2])
-            b = self.bias.broadcast(x, [0, 2])
-        else: 
-            if self.is_train:
-                xmean = jt.mean(x, dims=[0], keepdims=1)
-                x2mean = jt.mean(x*x, dims=[0], keepdims=1)
-
-                if self.sync and jt.in_mpi:
-                    xmean = xmean.mpi_all_reduce("mean")
-                    x2mean = x2mean.mpi_all_reduce("mean")
-
-                xvar = x2mean-xmean*xmean
-                norm_x = (x-xmean)/jt.sqrt(xvar+self.eps)
-                self.running_mean.update(self.running_mean + 
-                    (xmean.sum([0])-self.running_mean)*self.momentum)
-                self.running_var.update(self.running_var + 
-                    (xvar.sum([0])-self.running_var)*self.momentum)
-            else:
-                running_mean = self.running_mean.broadcast(x, [0])
-                running_var = self.running_var.broadcast(x, [0])
-                norm_x = (x-running_mean)/jt.sqrt(running_var+self.eps)
-            w = self.weight.broadcast(x, [0])
-            b = self.bias.broadcast(x, [0])
+            dims = [0, 2]
+        else:
+            dims = [0]
+        if self.is_train:
+            xmean = jt.mean(x, dims=dims, keepdims=1)
+            x2mean = jt.mean(x*x, dims=dims, keepdims=1)
+
+            if self.sync and jt.in_mpi:
+                xmean = xmean.mpi_all_reduce("mean")
+                x2mean = x2mean.mpi_all_reduce("mean")
+
+            xvar = x2mean-xmean*xmean
+            norm_x = (x-xmean)/jt.sqrt(xvar+self.eps)
+            self.running_mean.update(self.running_mean + 
+                (xmean.sum(dims)-self.running_mean)*self.momentum)
+            self.running_var.update(self.running_var + 
+                (xvar.sum(dims)-self.running_var)*self.momentum)
+        else:
+            running_mean = self.running_mean.broadcast(x, dims)
+            running_var = self.running_var.broadcast(x, dims)
+            norm_x = (x-running_mean)/jt.sqrt(running_var+self.eps)
         if not self.affine:
             return norm_x
+        w = self.weight.broadcast(x, dims)
+        b = self.bias.broadcast(x, dims)
         return norm_x * w + b
 
 class InstanceNorm2d(Module):
diff --git a/python/jittor/test/__main__.py b/python/jittor/test/__main__.py
@@ -6,6 +6,7 @@
 
 if __name__ == "__main__":
     import unittest, os
+    unittest.TestLoader.sortTestMethodsUsing = None
 
     suffix = "__main__.py"
     assert __file__.endswith(suffix)
@@ -22,17 +23,19 @@
     suite = unittest.TestSuite()
     
     for _, test_file in enumerate(test_files):
+        test_name = test_file.split(".")[0]
+        tests = unittest.defaultTestLoader.loadTestsFromName(
+            "jittor.test."+test_name)
+            
         if not test_file.startswith("test_"):
             continue
         if _ < skip_l or _ > skip_r:
             continue
-        test_name = test_file.split(".")[0]
         if test_only and test_name not in test_only:
             continue
 
         print("Add Test", _, test_name)
-        suite.addTest(unittest.defaultTestLoader.loadTestsFromName(
-            "jittor.test."+test_name))
+        suite.addTest(tests)
 
     result = unittest.TextTestRunner(verbosity=3).run(suite)
     if len(result.errors) or len(result.failures):
diff --git a/python/jittor/test/test_affine_grid.py b/python/jittor/test/test_affine_grid.py
@@ -9,10 +9,18 @@
 from jittor.nn import affine_grid,grid_sample
 
 
+skip_this_test = False
+
+try:
+    jt.dirty_fix_pytorch_runtime_error()
+    import torch.nn.functional as F
+    import torch
+except:
+    skip_this_test = True
+
+@unittest.skipIf(skip_this_test, "No Torch found")
 class TestAffineGrid(unittest.TestCase):
     def test_affine_grid_2d(self):
-        import torch.nn.functional as F
-        import torch
         N = 8
         C = 3
         H = 256
@@ -37,8 +45,6 @@ def test_affine_grid_2d(self):
 
 
     def test_affine_grid_3d(self):
-        import torch.nn.functional as F
-        import torch
         N = 8
         C = 3
         D = 64
diff --git a/python/jittor/test/test_concat_op.py b/python/jittor/test/test_concat_op.py
@@ -51,6 +51,8 @@ def check(tmp, dim=0):
         check([jt.array(np.array(range(5))).reshape((5,1)), jt.array(np.array(range(1))).reshape((1,1))])
         print('concat success...')
 
+    
+    @unittest.skipIf(not jt.has_cuda, "No CUDA found")
     @jt.flag_scope(use_cuda = 1)
     def test_concat_perf(self):
         def check(dim, size, backward=False):
@@ -106,6 +108,7 @@ def check(dim, size, backward=False):
 
         '''
 
+    @unittest.skipIf(not jt.has_cuda, "No CUDA found")
     @jt.flag_scope(use_cuda = 1)
     def test_concat2_perf(self):
         def check(dim, size, backward=False):
diff --git a/python/jittor/test/test_contrib.py b/python/jittor/test/test_contrib.py
@@ -20,7 +20,7 @@ def check(shape, dim, n):
                 arr2.append(jt.array(a))
             x = np.concatenate(tuple(arr1), dim)
             y = jt.contrib.concat(arr2, dim)
-            assert (x==y.data).all()
+            assert (x==y.data).all(), (x, y.data, arr1, arr2)
         check([2,3,4], 0, 2)
         check([2,3,4], 1, 3)
         check([2,3,4], 2, 4)
diff --git a/python/jittor/test/test_merge_single_array_op.py b/python/jittor/test/test_merge_single_array_op.py
@@ -45,8 +45,8 @@ def test(shape, op1, op2):
     with jt.log_capture_scope(log_v=0, log_vprefix="fused_op.cc=100") as logs:
         d__ = d.data
     logs = find_log_with_re(logs, 
-        "Jit (fused )?op key (not )?found: \[opkey0:array\]\[opkey1")
-    assert(len(logs)==1)
+        "Jit (fused )?op key (not )?found: \[opkey0:array\[T:float32")
+    assert(len(logs)==1), logs
 
     a_ = a.data
     b_ = b.data
diff --git a/python/jittor/test/test_mkl_conv_op.py b/python/jittor/test/test_mkl_conv_op.py
@@ -114,7 +114,7 @@ def test_backward(self):
         b = np.random.rand(o,i,h,w).astype(np.float32)
         da = np.random.rand(n,o,H,W).astype(np.float32)
         dx = jt.mkl_ops.mkl_conv_backward_x(b,da,H,W,1,1,1).data
-        dw = jt.mkl_ops.mkl_conv_backward_w(a,da,h,1,1,1).data
+        dw = jt.mkl_ops.mkl_conv_backward_w(a,da,h,w,1,1,1).data
         a_jt = jt.array(a)
         b_jt = jt.array(b)
 
@@ -160,7 +160,7 @@ def test_backward_nhwc_hwio(self):
         b = np.random.rand(h,w,i,o).astype(np.float32)
         da = np.random.rand(n,H,W,o).astype(np.float32)
         dx = jt.mkl_ops.mkl_conv_backward_x(b,da,H,W,1,1,1,xformat="acdb",wformat="hwio",yformat="acdb").data
-        dw = jt.mkl_ops.mkl_conv_backward_w(a,da,h,1,1,1,xformat="acdb",wformat="hwio",yformat="acdb").data
+        dw = jt.mkl_ops.mkl_conv_backward_w(a,da,h,w,1,1,1,xformat="acdb",wformat="hwio",yformat="acdb").data
         a_jt = jt.array(a)
         b_jt = jt.array(b)
 
diff --git a/python/jittor/test/test_mpi_batchnorm.py b/python/jittor/test/test_mpi_batchnorm.py
@@ -26,10 +26,10 @@ def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=None, is_train=T
         self.is_train = is_train
         self.eps = eps
         self.momentum = momentum
-        self.running_mean = init.constant((num_features,), "float32", 0.0).stop_grad()
-        self.running_var = init.constant((num_features,), "float32", 1.0).stop_grad()
         self.weight = init.constant((num_features,), "float32", 1.0)
         self.bias = init.constant((num_features,), "float32", 0.0)
+        self.running_mean = init.constant((num_features,), "float32", 0.0).stop_grad()
+        self.running_var = init.constant((num_features,), "float32", 1.0).stop_grad()
 
     def execute(self, x, global_x):
         if self.is_train:
diff --git a/python/jittor/test/test_slice.py b/python/jittor/test/test_slice.py
@@ -49,7 +49,7 @@ def check(shape, slices, i_to_vs="", i_to_o="", o_shape=""):
             # print(slices)
             x = jt.random(shape)
 
-            with jt.log_capture_scope(log_vprefix="getitem=1000") as logs:
+            with jt.log_capture_scope(log_vprefix="getitem=999") as logs:
                 a = x.getitem(slices)
                 a.sync()
             b = x.data[slices]
diff --git a/src/ops/array_op.cc b/src/ops/array_op.cc
@@ -74,6 +74,11 @@ ArrayOp::ArrayOp(ArrayArgs&& args) {
     std::memcpy(allocation.ptr, args.ptr, output->size);
 }
 
+void ArrayOp::jit_prepare() {
+    if (output->flags.get(NodeFlags::_force_fuse))
+        add_jit_define("T", output->dtype());
+}
+
 void ArrayOp::run() {
     #ifdef HAS_CUDA
     if (allocation.allocator == &cuda_dual_allocator) {
diff --git a/src/ops/array_op.h b/src/ops/array_op.h
@@ -28,6 +28,7 @@ struct ArrayOp : Op {
     
     const char* name() const override { return "array"; }
     void run() override;
+    void jit_prepare() override;
 };
 
 } // jittor
diff --git a/src/ops/getitem_op.cc b/src/ops/getitem_op.cc
@@ -358,7 +358,7 @@ void GetitemOp::infer_shape() {
     this->i_to_o = i_to_o.to_nano_vector();
     this->o_shape = o_shape.to_nano_vector();
 
-    LOGvvvv << "\ni_to_vs:" << i_to_vs
+    LOGV(999) << "\ni_to_vs:" << i_to_vs
         << "\ni_to_o:" << i_to_o
         << "\no_shape:" << o_shape;
 }
diff --git a/src/test/test_kernel_ir.cc b/src/test/test_kernel_ir.cc
@@ -49,11 +49,11 @@ JIT_TEST(kernel_ir) {
         })", true
     );
     string code = R"(// 
-// scope: main(1), 
+// scope: <cmath>(1), aaa(1), main(1), 
 
-// C macro code:"#include <cmath>"
+// C macro code:"#include <cmath>" lvalue:"<cmath>"
 #include <cmath>
-// C macro code:"#define aaa  bbb"
+// C macro code:"#define aaa  bbb" lvalue:"aaa" rvalue:" bbb"
 #define aaa  bbb
 // C  code:"using namespace std;" raw:"1"
 using namespace std;

Original file line number	Diff line number	Diff line change
`@@ -358,7 +358,7 @@ void GetitemOp::infer_shape() {`
`358`	`358`	`this->i_to_o = i_to_o.to_nano_vector();`
`359`	`359`	`this->o_shape = o_shape.to_nano_vector();`
`360`	`360`
`361`		`- LOGvvvv << "\ni_to_vs:" << i_to_vs`
	`361`	`+ LOGV(999) << "\ni_to_vs:" << i_to_vs`
`362`	`362`	`<< "\ni_to_o:" << i_to_o`
`363`	`363`	`<< "\no_shape:" << o_shape;`
`364`	`364`	`}`