Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

[NOT FOR MERGING] Transfer learning bug #4209

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions allennlp/common/from_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ def pop_and_construct_arg(
f"The module from model at {archive_file} at path {module_path} "
f"was expected of type {annotation} but is of type {type(result)}"
)
print("_PRETRAINED FINISHED")
return result

popped_params = params.pop(name, default) if default != _NO_DEFAULT else params.pop(name)
Expand Down
4 changes: 4 additions & 0 deletions allennlp/data/vocabulary.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ def from_instances(
of what the other parameters do.
"""
logger.info("Fitting token dictionary from dataset.")
print("FROM INSTANCES")
padding_token = padding_token if padding_token is not None else DEFAULT_PADDING_TOKEN
oov_token = oov_token if oov_token is not None else DEFAULT_OOV_TOKEN
namespace_token_counts: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int))
Expand Down Expand Up @@ -361,6 +362,7 @@ def from_files_and_instances(
"""
vocab = cls.from_files(directory, padding_token, oov_token)
logger.info("Fitting token dictionary from dataset.")
print("FROM FILES AND INSTANCES")
namespace_token_counts: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int))
for instance in Tqdm.tqdm(instances):
instance.count_vocab_items(namespace_token_counts)
Expand All @@ -374,6 +376,7 @@ def from_files_and_instances(
tokens_to_add=tokens_to_add,
min_pretrained_embeddings=min_pretrained_embeddings,
)
print(f"\n\nVOCABULARY: {vocab}\n\n")
return vocab

@classmethod
Expand Down Expand Up @@ -446,6 +449,7 @@ def set_from_file(

def extend_from_instances(self, instances: Iterable["adi.Instance"]) -> None:
logger.info("Fitting token dictionary from dataset.")
print("EXTEND FROM INSTANCES")
namespace_token_counts: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int))
for instance in Tqdm.tqdm(instances):
instance.count_vocab_items(namespace_token_counts)
Expand Down
1 change: 1 addition & 0 deletions allennlp/models/basic_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def __init__(
self._accuracy = CategoricalAccuracy()
self._loss = torch.nn.CrossEntropyLoss()
initializer(self)
self.extend_embedder_vocab()

def forward( # type: ignore
self, tokens: TextFieldTensors, label: torch.IntTensor = None
Expand Down
8 changes: 8 additions & 0 deletions allennlp/modules/token_embedders/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ def extend_vocab(
to give a helpful error message when extend_vocab is implicitly called
by train or any other command.
"""
print("\n\nEXTENDING VOCAB\n\n")
# Caveat: For allennlp v0.8.1 and below, we weren't storing vocab_namespace as an attribute,
# knowing which is necessary at time of embedding vocab extension. So old archive models are
# currently unextendable.
Expand All @@ -255,6 +256,13 @@ def extend_vocab(
return

extended_num_embeddings = extended_vocab.get_vocab_size(vocab_namespace)
print(f"VOCAB: {extended_vocab}")
print(f"NAMESPACE: {vocab_namespace}")
print(f"EXTENDED_NUM_EMBEDDINGS: {extended_num_embeddings}")
print(f"NUM EMBEDDINGS: {self.num_embeddings}")
import traceback
for line in traceback.format_stack()[:-5]:
print(line)
if extended_num_embeddings == self.num_embeddings:
# It's already been extended. No need to initialize / read pretrained file in first place (no-op)
return
Expand Down
50 changes: 50 additions & 0 deletions allennlp/tests/fixtures/data/esnli_train.jsonl

Large diffs are not rendered by default.

50 changes: 50 additions & 0 deletions allennlp/tests/fixtures/data/movies_train.jsonl

Large diffs are not rendered by default.

72 changes: 72 additions & 0 deletions allennlp/tests/transfer_learning/fixtures/esnli.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
local embedding_dim = 100;
local seq_encoder = {
"type": "lstm",
"input_size": embedding_dim,
"hidden_size": embedding_dim,
"num_layers": 1,
"bidirectional": true
};

{
"dataset_reader": {
"type": "snli",
"token_indexers": {
"tokens": {
"type": "single_id",
"lowercase_tokens": true
}
}
},
"train_data_path": "allennlp/tests/fixtures/data/esnli_train.jsonl",
"validation_data_path": "allennlp/tests/fixtures/data/esnli_train.jsonl",
"model": {
"type": "esim",
"text_field_embedder": {
"token_embedders": {
"tokens": {
"type": "embedding",
"pretrained_file": "allennlp/tests/fixtures/embeddings/glove.6B.100d.sample.txt.gz", //"https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.100d.txt.gz",
"embedding_dim": embedding_dim,
"trainable": true
}
}
},
"encoder": seq_encoder,
"matrix_attention": {
"type": "dot_product"
},
"projection_feedforward": {
"input_dim": 8*embedding_dim,
"hidden_dims": embedding_dim,
"num_layers": 1,
"activations": "relu"
},
"inference_encoder": seq_encoder,
"output_feedforward": {
"input_dim": 8*embedding_dim,
"num_layers": 1,
"hidden_dims": embedding_dim,
"activations": "relu",
"dropout": 0.5
},
"output_logit": {
"input_dim": embedding_dim,
"num_layers": 1,
"hidden_dims": 3,
"activations": "linear"
},
},
"data_loader": {
"type": "default",
"batch_size": 10
},
"trainer": {
"num_epochs": 1,
"cuda_device": -1,
"validation_metric": "+accuracy",
"optimizer": {
"type": "adam",
"lr": 5e-4
},
}
}
52 changes: 52 additions & 0 deletions allennlp/tests/transfer_learning/fixtures/movies.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
local pretrained = function(module_path, frozen=false) {"_pretrained": {
"archive_file": std.extVar("ARCHIVE_PATH"),
"module_path": module_path,
"freeze": frozen
}};

{
"dataset_reader": {
"type": "text_classification_json",
"tokenizer": {
"type": "whitespace"
}
},

"train_data_path": "allennlp/tests/fixtures/data/movies_train.jsonl",
"validation_data_path": "allennlp/tests/fixtures/data/movies_train.jsonl",
"vocabulary": {
"type": "extend",
"directory": "/tmp/taskA/vocabulary"
},
"model": {
"type": "basic_classifier",
"text_field_embedder": pretrained("_text_field_embedder"),
"seq2seq_encoder": pretrained("_encoder"),
"seq2vec_encoder": {
"type": "boe",
"embedding_dim": 200
},
"feedforward": {
"input_dim": 200,
"num_layers": 2,
"hidden_dims": [200, 2],
"activations": ["relu", "linear"],
"dropout": [0.15, 0.0]
},
},
"data_loader": {
"type": "default",
"batch_size": 10
},
"trainer": {
"num_epochs": 1,
"patience": 5,
"cuda_device": -1,
"grad_norm": 40,
"validation_metric": "+accuracy",
"optimizer": {
"type": "adam",
"lr": 5e-3
},
}
}
Empty file.
22 changes: 22 additions & 0 deletions allennlp/tests/transfer_learning/models/transfer_learning_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# pylint: disable=invalid-name,protected-access
import pathlib, json, os

from allennlp_models.nli import snli_reader
from allennlp.common.testing import ModelTestCase
from allennlp.common.testing.test_case import TEST_DIR
from allennlp.commands.train import train_model, train_model_from_file

os.environ['ARCHIVE_PATH'] = "/tmp/taskA"

class TransferLearningTest(ModelTestCase):
def setUp(self):
super().setUp()
self.set_up_model('allennlp/tests/fixtures/esnli.jsonnet',
'allennlp/tests/fixtures/esnli_train.jsonl')

def test_taskA_end_to_end(self):
train_model_from_file("allennlp/tests/transfer_learning/fixtures/esnli.jsonnet", serialization_dir="/tmp/taskA", force=True)

def test_taskB_end_to_end(self):
train_model_from_file("allennlp/tests/transfer_learning/fixtures/movies.jsonnet", serialization_dir="/tmp/taskB", force=True)