initial code

yikangshen · Jul 25, 2018 · ac912ce · ac912ce
1 parent 6d3c94a
commit ac912ce
Show file tree

Hide file tree

Showing 23 changed files with 51,739 additions and 1 deletion.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+<<<<<<< HEAD
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@@ -8,6 +9,7 @@ __pycache__/
 
 # Distribution / packaging
 .Python
+env/
 build/
 develop-eggs/
 dist/
@@ -78,10 +80,119 @@ celerybeat-schedule
 # SageMath parsed files
 *.sage.py
 
-# Environments
+# dotenv
 .env
+
+# virtualenv
 .venv
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+#pycharm
+.idea/
+
+#pytorch
+*.pt
+=======
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
 env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# dotenv
+.env
+
+# virtualenv
+.venv
 venv/
 ENV/
 
@@ -97,3 +208,10 @@ ENV/
 
 # mypy
 .mypy_cache/
+
+#pycharm
+.idea/
+
+#pytorch
+*.pt
+>>>>>>> e6ee33014912d2cdc248ceaf7855ad53fd2edad5
diff --git a/LSTMCell.py b/LSTMCell.py
@@ -0,0 +1,65 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.modules.rnn import RNNCellBase
+
+
+class LayerNorm(nn.Module):
+
+    def __init__(self, features, eps=1e-6):
+        super(LayerNorm, self).__init__()
+        self.gamma = nn.Parameter(torch.ones(features))
+        self.beta = nn.Parameter(torch.zeros(features))
+        self.eps = eps
+
+    def forward(self, x):
+        mean = x.mean(-1, keepdim=True)
+        std = x.std(-1, keepdim=True)
+        return self.gamma * (x - mean) / (std + self.eps) + self.beta
+
+
+class LSTMCell(RNNCellBase):
+
+    def __init__(self, input_size, hidden_size, dropout=0):
+        super(LSTMCell, self).__init__()
+        self.input_size = input_size
+        self.hidden_size = hidden_size
+
+        self.ih = nn.Sequential(nn.Linear(input_size, 3 * hidden_size, bias=True), LayerNorm(3 * hidden_size))
+        self.hh = nn.Sequential(nn.Linear(hidden_size, 3 * hidden_size, bias=True), LayerNorm(3 * hidden_size))
+
+        self.c_norm = LayerNorm(hidden_size)
+        self.drop = nn.Dropout(dropout)
+
+        self.dst = nn.Sequential(nn.Linear(hidden_size + input_size, hidden_size),
+                                 # LayerNorm(1),
+                                 nn.Softmax(dim=-1))
+
+    def forward(self, input, hidden, rmask):
+        hx, cx = hidden
+
+        input = self.drop(input)
+        hx = hx * rmask
+        gates = self.ih(input) + self.hh(hx) #+ self.bias
+
+        cell, ingate, outgate = gates.chunk(3, 1)
+
+        dst = self.dst(torch.cat([input, hx], dim=-1))
+        fgate = torch.cumsum(dst, dim=-1)
+
+        distance = fgate.sum(dim=-1) / self.hidden_size
+
+        ingate = F.sigmoid(ingate) * fgate
+        forgetgate = (1 - ingate)
+        cell = F.tanh(cell)
+        outgate = F.sigmoid(outgate)
+
+        cy = forgetgate * cx + ingate * cell
+        hy = outgate * F.tanh(self.c_norm(cy))
+
+        return hy, cy, distance
+
+    def init_hidden(self, bsz):
+        weight = next(self.parameters()).data
+        return weight.new(bsz, self.hidden_size).zero_(), \
+               weight.new(bsz, self.hidden_size).zero_()
diff --git a/LSTMCell_new.py b/LSTMCell_new.py
@@ -0,0 +1,70 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.modules.rnn import RNNCellBase
+
+
+class LayerNorm(nn.Module):
+
+    def __init__(self, features, eps=1e-6):
+        super(LayerNorm, self).__init__()
+        self.gamma = nn.Parameter(torch.ones(features))
+        self.beta = nn.Parameter(torch.zeros(features))
+        self.eps = eps
+
+    def forward(self, x):
+        mean = x.mean(-1, keepdim=True)
+        std = x.std(-1, keepdim=True)
+        return self.gamma * (x - mean) / (std + self.eps) + self.beta
+
+
+class LSTMCell(RNNCellBase):
+
+    def __init__(self, input_size, hidden_size, dropout=0):
+        super(LSTMCell, self).__init__()
+        self.input_size = input_size
+        self.hidden_size = hidden_size
+
+        self.ih = nn.Sequential(nn.Linear(input_size, 5 * hidden_size, bias=True), LayerNorm(5 * hidden_size))
+        self.hh = nn.Sequential(nn.Linear(hidden_size, 5 * hidden_size, bias=True), LayerNorm(5 * hidden_size))
+
+        self.c_norm = LayerNorm(hidden_size)
+        self.drop = nn.Dropout(dropout)
+
+        self.dst = nn.Sequential(nn.Linear(hidden_size + input_size, hidden_size),
+                                 # LayerNorm(1),
+                                 nn.Softmax(dim=-1))
+
+    def forward(self, input, hidden, rmask):
+        hx, cx = hidden
+
+        input = self.drop(input)
+        hx = hx * rmask
+        gates = self.ih(input) + self.hh(hx) #+ self.bias
+
+        cell, ingate, forgetgate, updategate, outgate = gates.chunk(5, 1)
+
+        # dst = self.dst(torch.cat([input, hx], dim=-1))
+        # fgate = torch.cumsum(dst, dim=-1)
+
+        def cumsoftmax(x):
+            return torch.cumsum(F.softmax(x, dim=-1), dim=-1)
+
+        ingate = cumsoftmax(ingate)
+        forgetgate = 1. - cumsoftmax(forgetgate)
+        updategate = F.sigmoid(updategate)
+        cell = F.tanh(cell)
+        outgate = F.sigmoid(outgate)
+
+        distance = (1. - forgetgate).sum(dim=-1) / self.hidden_size
+        # distance = ingate.sum(dim=-1) / self.hidden_size
+
+        cy = forgetgate * updategate * cx + ingate * (1. - updategate) * cell
+        hy = outgate * F.tanh(self.c_norm(cy))
+
+        return hy, cy, distance
+
+    def init_hidden(self, bsz):
+        weight = next(self.parameters()).data
+        return weight.new(bsz, self.hidden_size).zero_(), \
+               weight.new(bsz, self.hidden_size).zero_()
diff --git a/LSTMCell_normal.py b/LSTMCell_normal.py
@@ -0,0 +1,58 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.modules.rnn import RNNCellBase
+
+
+class LayerNorm(nn.Module):
+
+    def __init__(self, features, eps=1e-6):
+        super(LayerNorm, self).__init__()
+        self.gamma = nn.Parameter(torch.ones(features))
+        self.beta = nn.Parameter(torch.zeros(features))
+        self.eps = eps
+
+    def forward(self, x):
+        mean = x.mean(-1, keepdim=True)
+        std = x.std(-1, keepdim=True)
+        return self.gamma * (x - mean) / (std + self.eps) + self.beta
+
+
+class LSTMCell(RNNCellBase):
+
+    def __init__(self, input_size, hidden_size, dropout=0):
+        super(LSTMCell, self).__init__()
+        self.input_size = input_size
+        self.hidden_size = hidden_size
+
+        self.ih = nn.Sequential(nn.Linear(input_size, 4 * hidden_size, bias=True), LayerNorm(4 * hidden_size))
+        self.hh = nn.Sequential(nn.Linear(hidden_size, 4 * hidden_size, bias=True), LayerNorm(4 * hidden_size))
+
+        self.c_norm = LayerNorm(hidden_size)
+        self.drop = nn.Dropout(dropout)
+
+    def forward(self, input, hidden, rmask):
+        hx, cx = hidden
+
+        input = self.drop(input)
+        hx = hx * rmask
+        gates = self.ih(input) + self.hh(hx) #+ self.bias
+
+        cell, ingate, forgetgate, outgate = gates.chunk(4, 1)
+
+        distance = forgetgate.sum(dim=-1) / self.hidden_size
+
+        ingate = F.sigmoid(ingate)
+        forgetgate = F.sigmoid(forgetgate)
+        cell = F.tanh(cell)
+        outgate = F.sigmoid(outgate)
+
+        cy = forgetgate * cx + ingate * cell
+        hy = outgate * F.tanh(self.c_norm(cy))
+
+        return hy, cy, distance
+
+    def init_hidden(self, bsz):
+        weight = next(self.parameters()).data
+        return weight.new(bsz, self.hidden_size).zero_(), \
+               weight.new(bsz, self.hidden_size).zero_()