Commit 2359ddbe authored by salbiach's avatar salbiach
Browse files

modified readme

parent 36ffb811
......@@ -36,7 +36,8 @@ Run:
`bash configure_compss.sh`
Modify runcompss.sh to your needs
Modify runcompss.sh to your needs, for example, if it is desired that some epochs are perfomed asynchronously the following parameter should be added to the runcompss call:
`--num_async_epochs=5`
Run:
......
......@@ -119,16 +119,3 @@ class Eddl_Compss_Distributed:
final_parameters = net_parametersToNumpy(eddl.get_parameters(model))
return final_parameters
@constraint(computing_units="${OMP_NUM_THREADS}")
@task(accumulated_parameters=COMMUTATIVE, parameters_to_aggregate=IN, mult_factor=IN, target_direction=IN)
def aggregate_parameters_async(self, accumulated_parameters, parameters_to_aggregate, mult_factor):
for i in range(0, len(accumulated_parameters)):
for j in range(0, len(accumulated_parameters[i])):
accumulated_parameters[i][j] = (
(accumulated_parameters[i][j] + parameters_to_aggregate[i][j]) / 2).astype(np.float32)
return accumulated_parameters
\ No newline at end of file
......@@ -74,50 +74,3 @@ def train_batch(model, x_train, y_train, num_workers, num_epochs_for_param_sync,
# Set the parameters of the model to the aggregated parameters
eddl.set_parameters(model, net_parametersToTensor(final_weights))
def fit_async(model, x_train, y_train, num_workers, num_epochs_for_param_sync, max_num_async_epochs,
workers_batch_size):
global compss_object
# Define the number of images corresponding to each computing unit
num_total_samples = x_train.shape[0]
num_images_per_worker = int(num_total_samples / num_workers)
# Variable where parameters will be aggregated asynchornously
accumulated_parameters = net_parametersToNumpy(eddl.get_parameters(model))
# Define the parameters for each worker
workers_parameters = [net_parametersToNumpy(eddl.get_parameters(model)) for i in range(0, num_workers)]
x_blocks = [x[0] for x in paired_partition(x_train, y_train)]
y_blocks = [x[1] for x in paired_partition(x_train, y_train)]
# Until the maximum number of asynchrnous epochs is reached
for i in range(0, max_num_async_epochs):
# Train and aggregate the parameters asynchronously for each distributed computing unit
for j in range(0, num_workers):
shuffled_x, shuffled_y = block_shuffle_async(
x_blocks[j],
y_blocks[j],
workers_parameters[j])
x_blocks[j], y_blocks[j] = [shuffled_x], [shuffled_y]
workers_parameters[j] = compss_object.train_batch(
x_blocks[j],
y_blocks[j],
workers_parameters[j],
num_images_per_worker,
num_epochs_for_param_sync,
workers_batch_size)
workers_parameters[j] = compss_object.aggregate_parameters_async(
accumulated_parameters,
workers_parameters[j],
1 / num_workers)
# Wait until every computing unit has aggregated its parameters
accumulated_parameters = compss_wait_on(accumulated_parameters)
# Set the model parameters to the aggregated parameters
eddl.set_parameters(model, net_parametersToTensor(accumulated_parameters))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment