Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
PPC-BSC
Software
Deep Health Compss
Commits
2359ddbe
Commit
2359ddbe
authored
Jan 25, 2022
by
salbiach
Browse files
modified readme
parent
36ffb811
Changes
3
Hide whitespace changes
Inline
Side-by-side
Readme.md
View file @
2359ddbe
...
...
@@ -36,7 +36,8 @@ Run:
`bash configure_compss.sh`
Modify runcompss.sh to your needs
Modify runcompss.sh to your needs, for example, if it is desired that some epochs are perfomed asynchronously the following parameter should be added to the runcompss call:
`--num_async_epochs=5`
Run:
...
...
docker/pyeddl/eddl_compss_distributed.py
View file @
2359ddbe
...
...
@@ -119,16 +119,3 @@ class Eddl_Compss_Distributed:
final_parameters
=
net_parametersToNumpy
(
eddl
.
get_parameters
(
model
))
return
final_parameters
@
constraint
(
computing_units
=
"${OMP_NUM_THREADS}"
)
@
task
(
accumulated_parameters
=
COMMUTATIVE
,
parameters_to_aggregate
=
IN
,
mult_factor
=
IN
,
target_direction
=
IN
)
def
aggregate_parameters_async
(
self
,
accumulated_parameters
,
parameters_to_aggregate
,
mult_factor
):
for
i
in
range
(
0
,
len
(
accumulated_parameters
)):
for
j
in
range
(
0
,
len
(
accumulated_parameters
[
i
])):
accumulated_parameters
[
i
][
j
]
=
(
(
accumulated_parameters
[
i
][
j
]
+
parameters_to_aggregate
[
i
][
j
])
/
2
).
astype
(
np
.
float32
)
return
accumulated_parameters
\ No newline at end of file
docker/pyeddl/eddl_compss_distributed_api.py
View file @
2359ddbe
...
...
@@ -74,50 +74,3 @@ def train_batch(model, x_train, y_train, num_workers, num_epochs_for_param_sync,
# Set the parameters of the model to the aggregated parameters
eddl
.
set_parameters
(
model
,
net_parametersToTensor
(
final_weights
))
def
fit_async
(
model
,
x_train
,
y_train
,
num_workers
,
num_epochs_for_param_sync
,
max_num_async_epochs
,
workers_batch_size
):
global
compss_object
# Define the number of images corresponding to each computing unit
num_total_samples
=
x_train
.
shape
[
0
]
num_images_per_worker
=
int
(
num_total_samples
/
num_workers
)
# Variable where parameters will be aggregated asynchornously
accumulated_parameters
=
net_parametersToNumpy
(
eddl
.
get_parameters
(
model
))
# Define the parameters for each worker
workers_parameters
=
[
net_parametersToNumpy
(
eddl
.
get_parameters
(
model
))
for
i
in
range
(
0
,
num_workers
)]
x_blocks
=
[
x
[
0
]
for
x
in
paired_partition
(
x_train
,
y_train
)]
y_blocks
=
[
x
[
1
]
for
x
in
paired_partition
(
x_train
,
y_train
)]
# Until the maximum number of asynchrnous epochs is reached
for
i
in
range
(
0
,
max_num_async_epochs
):
# Train and aggregate the parameters asynchronously for each distributed computing unit
for
j
in
range
(
0
,
num_workers
):
shuffled_x
,
shuffled_y
=
block_shuffle_async
(
x_blocks
[
j
],
y_blocks
[
j
],
workers_parameters
[
j
])
x_blocks
[
j
],
y_blocks
[
j
]
=
[
shuffled_x
],
[
shuffled_y
]
workers_parameters
[
j
]
=
compss_object
.
train_batch
(
x_blocks
[
j
],
y_blocks
[
j
],
workers_parameters
[
j
],
num_images_per_worker
,
num_epochs_for_param_sync
,
workers_batch_size
)
workers_parameters
[
j
]
=
compss_object
.
aggregate_parameters_async
(
accumulated_parameters
,
workers_parameters
[
j
],
1
/
num_workers
)
# Wait until every computing unit has aggregated its parameters
accumulated_parameters
=
compss_wait_on
(
accumulated_parameters
)
# Set the model parameters to the aggregated parameters
eddl
.
set_parameters
(
model
,
net_parametersToTensor
(
accumulated_parameters
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment