Commit dd077c1f authored by vmasip's avatar vmasip
Browse files

mnist async in use

parent 0f02c0f4
......@@ -5,5 +5,5 @@
#source get_pods_ip.sh
#masterIP=(${Nodes[2]})
echo "MasterIP is:" $MY_POD_IP
cd pyeddl/third_party/compss_runtime/
conda run --no-capture-output -n pyeddl_pycompss_env runcompss -d --lang=python --python_interpreter=python3 --project=/root/project.xml --resources=/root/resources.xml --master_name=$MY_POD_IP eddl_train_batch_compss_mnist_sync.py
\ No newline at end of file
cd pyeddl
conda run --no-capture-output -n pyeddl_pycompss_env runcompss -d --lang=python --python_interpreter=python3 --project=/root/project.xml --resources=/root/resources.xml --master_name=$MY_POD_IP eddl_train_batch_compss_mnist_async.py
\ No newline at end of file
"""\
TRAIN_BATCH example.
"""
import argparse
import platform
import sys
from timeit import default_timer as timer
import pyeddl.eddl as eddl
from pycompss.api.api import compss_wait_on
from pyeddl.tensor import Tensor as eddlT
import eddl_compss_distributed_api as compss_api
from cvars import *
from eddl_array import array
def main(args):
print("E: ", platform.uname())
#eddl.download_mnist()
start_time = timer()
num_workers = args.num_workers
num_epochs = args.num_epochs
workers_batch_size = args.workers_batch_size
num_epochs_for_param_sync = args.num_epochs_for_param_sync
max_num_async_epochs = args.max_num_async_epochs
num_classes = 10
# Model that works
in_ = eddl.Input([784])
layer = in_
layer = eddl.ReLu(eddl.Dense(layer, 1024))
layer = eddl.ReLu(eddl.Dense(layer, 1024))
layer = eddl.ReLu(eddl.Dense(layer, 1024))
out = eddl.Softmax(eddl.Dense(layer, num_classes))
net = eddl.Model([in_], [out])
compss_api.build(
net,
eddl.sgd(CVAR_SGD1, CVAR_SGD2),
["soft_cross_entropy"],
["categorical_accuracy"],
eddl.CS_CPU(),
True
)
eddl.summary(net)
x_train = eddlT.load(CVAR_DATASET_PATH + "mnist_trX.bin")
y_train = eddlT.load(CVAR_DATASET_PATH + "mnist_trY.bin")
x_test = eddlT.load(CVAR_DATASET_PATH + "mnist_tsX.bin")
y_test = eddlT.load(CVAR_DATASET_PATH + "mnist_tsY.bin")
eddlT.div_(x_train, 255.0)
eddlT.div_(x_test, 255.0)
compss_wait_on(x_train, y_train)
# Distribute data
train_images_per_worker = int(eddlT.getShape(x_train)[0] / num_workers)
x_train_dist = array(x_train, train_images_per_worker)
y_train_dist = array(y_train, train_images_per_worker)
# Model training
print("Model training...")
print("Number of epochs: ", num_epochs)
print("Number of epochs for parameter syncronization: ", num_epochs_for_param_sync)
compss_api.fit_async(
net,
x_train_dist,
y_train_dist,
num_workers,
num_epochs_for_param_sync,
num_epochs,
workers_batch_size)
end_time = timer()
final_time = end_time - start_time
print("Elapsed time: ", final_time, " seconds")
# Model evaluation
print("Evaluating model against test set")
eddl.evaluate(net, [x_test], [y_test])
if __name__ == "__main__":
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--num_workers", type=int, metavar="INT", default=4)
parser.add_argument("--num_epochs", type=int, metavar="INT", default=1)
parser.add_argument("--num_epochs_for_param_sync", type=int, metavar="INT", default=1)
parser.add_argument("--max_num_async_epochs", type=int, metavar="INT", default=1)
parser.add_argument("--workers_batch_size", type=int, metavar="INT", default=250)
# parser.add_argument("--epochs", type=int, metavar="INT", default=4)
# parser.add_argument("--batch-size", type=int, metavar="INT", default=1000)
# parser.add_argument("--gpu", action="store_true")
main(parser.parse_args(sys.argv[1:]))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment