Skip to content

Commit

Permalink
Add Python checks pipeline (#7032)
Browse files Browse the repository at this point in the history
This change adds a new pipeline for checking Python code. Currently this pipeline only runs flake8.
flake8 is also run as part of the CMake project builds, but we can switch over completely to the new pipeline later.
The .flake8 config file was also updated to make it easier to run standalone (flake8 --config ./.flake8) and some Python formatting issues were addressed in files that were not previously scanned.
  • Loading branch information
edgchen1 authored Aug 9, 2021
1 parent aaeb781 commit baf8c39
Show file tree
Hide file tree
Showing 15 changed files with 146 additions and 82 deletions.
23 changes: 19 additions & 4 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,22 @@ max-line-length = 120
per-file-ignores =
__init__.py:F401
format = [flake8 PEP8 ERROR] %(path)s:%(row)d:%(col)d: %(code)s %(text)s
# We generally exclude using cmake/flake8.cmake. If something needs to be excluded here
# The exclude value/s need to be on a newline otherwise it doesn't work (at least on Windows)
# exclude =
# ./onnxruntime/core/flatbuffers/ort_flatbuffers_py
exclude =
# ignore default build directory
./build,
# ignore external dependency files
./cmake/external,
# TODO enable
./docs/python,
# ignore generated flatbuffers code
./onnxruntime/core/flatbuffers/ort_flatbuffers_py,
# TODO enable
./onnxruntime/core/providers/nuphar,
# TODO enable
./onnxruntime/python/tools,
# ignore test code for now
./onnxruntime/test,
# TODO enable
./orttraining,
# ignore server code for now
./server,
11 changes: 7 additions & 4 deletions cgmanifests/submodules/generate_submodule_cgmanifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,25 @@

registrations = []

with open(os.path.join(REPO_DIR, 'tools', 'ci_build', 'github', 'linux', 'docker', 'Dockerfile.manylinux2014_cuda11'), "r") as f:
with open(os.path.join(REPO_DIR, 'tools', 'ci_build', 'github', 'linux', 'docker', 'Dockerfile.manylinux2014_cuda11'),
"r") as f:
for line in f:
if not line.strip():
package_name = None
package_filename = None
package_url = None
if package_filename is None:
m = re.match("RUN\s+export\s+(.+?)_ROOT=(\S+).*", line)
m = re.match(r"RUN\s+export\s+(.+?)_ROOT=(\S+).*", line)
if m is not None:
package_name = m.group(1)
package_filename = m.group(2)
else:
m = re.match("RUN\s+export\s+(.+?)_VERSION=(\S+).*", line)
m = re.match(r"RUN\s+export\s+(.+?)_VERSION=(\S+).*", line)
if m is not None:
package_name = m.group(1)
package_filename = m.group(2)
elif package_url is None:
m = re.match("(.+?)_DOWNLOAD_URL=(\S+)", line)
m = re.match(r"(.+?)_DOWNLOAD_URL=(\S+)", line)
if m is not None:
package_url = m.group(2)
if package_name == 'LIBXCRYPT':
Expand Down Expand Up @@ -60,9 +61,11 @@
package_filename = None
package_url = None


def normalize_path_separators(path):
return path.replace(os.path.sep, "/")


proc = subprocess.run(
["git", "submodule", "foreach", "--quiet", "--recursive", "{} {} $toplevel/$sm_path".format(
normalize_path_separators(sys.executable),
Expand Down
13 changes: 6 additions & 7 deletions csharp/testdata/test_input_BFLOAT16.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,26 @@
import onnx
from onnx import helper
from onnx.helper import make_opsetid
from onnx import AttributeProto, TensorProto, GraphProto
from onnx import TensorProto

input_info = helper.make_tensor_value_info('input', TensorProto.BFLOAT16, [1, 5])
output_info = helper.make_tensor_value_info('output', TensorProto.BFLOAT16, [1, 5])

# Create a node (NodeProto) - This is based on Pad-11
node_def = helper.make_node(
'Identity', # node name
['input'], # inputs
['output'] # outputs
'Identity', # node name
['input'], # inputs
['output'] # outputs
)

graph_def = helper.make_graph(nodes=[node_def], name='test_types_BLOAT16',
inputs=[input_info], outputs=[output_info])
inputs=[input_info], outputs=[output_info])

model_def = helper.make_model(graph_def, producer_name='AIInfra',
opset_imports=[make_opsetid('', 13)])
opset_imports=[make_opsetid('', 13)])

onnx.checker.check_model(model_def)
onnx.helper.strip_doc_string(model_def)
final_model = onnx.shape_inference.infer_shapes(model_def)
onnx.checker.check_model(final_model)
onnx.save(final_model, 'test_types_BFLOAT16.onnx')

18 changes: 9 additions & 9 deletions csharp/testdata/test_input_FLOAT16.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,26 @@
import onnx
from onnx import helper
from onnx.helper import make_opsetid
from onnx import AttributeProto, TensorProto, GraphProto
from onnx import TensorProto

input_info = helper.make_tensor_value_info('input', TensorProto.FLOAT16, [1, 5])
output_info = helper.make_tensor_value_info('output', TensorProto.FLOAT16, [1, 5])

# Create a node (NodeProto) - This is based on Pad-11
node_def = helper.make_node(
'Slice', # node name
['input'], # inputs
['output'], # outputs
axes=[0,1], # attributes
ends=[1,5],
starts=[0,0]
'Slice', # node name
['input'], # inputs
['output'], # outputs
axes=[0, 1], # attributes
ends=[1, 5],
starts=[0, 0]
)

graph_def = helper.make_graph(nodes=[node_def], name='test_input_FLOAT16',
inputs=[input_info], outputs=[output_info])
inputs=[input_info], outputs=[output_info])

model_def = helper.make_model(graph_def, producer_name='AIInfra',
opset_imports=[make_opsetid('', 7)])
opset_imports=[make_opsetid('', 7)])

onnx.checker.check_model(model_def)
onnx.helper.strip_doc_string(model_def)
Expand Down
5 changes: 4 additions & 1 deletion samples/python/training/orttrainer/mnist/ort_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from torchvision import datasets, transforms

import onnxruntime
from onnxruntime.training import ORTTrainer, ORTTrainerOptions, optim, checkpoint
from onnxruntime.training import ORTTrainer, ORTTrainerOptions, optim


# Pytorch model
Expand All @@ -34,9 +34,11 @@ def mnist_model_description():
'outputs': [('loss', [], True),
('probability', ['batch', 10])]}


def my_loss(x, target):
return F.nll_loss(F.log_softmax(x, dim=1), target)


# Helpers
def train(log_interval, trainer, device, train_loader, epoch, train_steps):
for batch_idx, (data, target) in enumerate(train_loader):
Expand Down Expand Up @@ -151,5 +153,6 @@ def main():
if args.save_path:
torch.save(model.state_dict(), os.path.join(args.save_path, "mnist_cnn.pt"))


if __name__ == '__main__':
main()
2 changes: 2 additions & 0 deletions samples/python/training/orttrainer/mnist/pytorch_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def my_loss(x, target, is_train=True):
else:
return F.nll_loss(F.log_softmax(x, dim=1), target, reduction='sum')


# Helpers
def train(args, model, device, train_loader, optimizer, epoch):
model.train()
Expand Down Expand Up @@ -127,5 +128,6 @@ def main():
if args.save_path:
torch.save(model.state_dict(), os.path.join(args.save_path, "mnist_cnn.pt"))


if __name__ == '__main__':
main()
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import argparse
import math
import torch
import onnxruntime

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@ def legacy_transformer_model_description(bptt=35, batch_size=20, ntokens=28785):
label_desc = Legacy_IODescription('label', [bptt * batch_size])
loss_desc = Legacy_IODescription('loss', [])
predictions_desc = Legacy_IODescription('predictions', [bptt, batch_size, ntokens])
return Legacy_ModelDescription([input_desc, label_desc],[loss_desc, predictions_desc]),\
Legacy_IODescription('__learning_rate', [1])
return (Legacy_ModelDescription([input_desc, label_desc], [loss_desc, predictions_desc]),
Legacy_IODescription('__learning_rate', [1]))


def legacy_transformer_model_description_dynamic_axes(ntokens=28785):
input_desc = Legacy_IODescription('input1', ['bptt', 'batch_size'])
label_desc = Legacy_IODescription('label', ['bptt_x_batch_size'])
loss_desc = Legacy_IODescription('loss', [])
predictions_desc = Legacy_IODescription('predictions', ['bptt', 'batch_size', ntokens])
return Legacy_ModelDescription([input_desc, label_desc],[loss_desc, predictions_desc]),\
Legacy_IODescription('__learning_rate', [1])
return (Legacy_ModelDescription([input_desc, label_desc], [loss_desc, predictions_desc]),
Legacy_IODescription('__learning_rate', [1]))
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import math
import torch
import torch.nn as nn
import torch.nn.functional as F


class TransformerModel(nn.Module):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import argparse
import math
import torch
import torch.nn as nn
import torch.nn.functional as F

from utils import prepare_data, get_batch
from pt_model import TransformerModel
Expand Down
11 changes: 6 additions & 5 deletions samples/python/training/orttrainer/pytorch_transformer/utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import io
import os
import torch
import torchtext
from torchtext.utils import download_from_url, extract_archive
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator


def batchify(data, bsz, device):
# Divide the dataset into bsz parts.
nbatch = data.size(0) // bsz
Expand Down Expand Up @@ -36,15 +36,16 @@ def prepare_data(device='cpu', train_batch_size=20, eval_batch_size=20, data_dir
extract_path = os.path.join(data_dir, 'extracted')
os.makedirs(extract_path, exist_ok=True)

test_filepath, valid_filepath, train_filepath = extract_archive(download_from_url(url, root=download_path), to_path=extract_path)
test_filepath, valid_filepath, train_filepath = extract_archive(download_from_url(url, root=download_path),
to_path=extract_path)
tokenizer = get_tokenizer('basic_english')
vocab = build_vocab_from_iterator(map(tokenizer,
iter(io.open(train_filepath,
encoding="utf8"))))
iter(io.open(train_filepath,
encoding="utf8"))))

def data_process(raw_text_iter):
data = [torch.tensor([vocab[token] for token in tokenizer(item)],
dtype=torch.long) for item in raw_text_iter]
dtype=torch.long) for item in raw_text_iter]
return torch.cat(tuple(filter(lambda t: t.numel() > 0, data)))

train_data = data_process(iter(io.open(train_filepath, encoding="utf8")))
Expand Down
Loading

0 comments on commit baf8c39

Please sign in to comment.