... | @@ -142,21 +142,21 @@ With B=16, T=1024, 1 rank/socket |
... | @@ -142,21 +142,21 @@ With B=16, T=1024, 1 rank/socket |
|
|
|
|
|
| Number of ranks | time/iteration | tokens/s | tokens/(s.cpus) |
|
|
| Number of ranks | time/iteration | tokens/s | tokens/(s.cpus) |
|
|
| ------ | ------ | ------ | ------ |
|
|
| ------ | ------ | ------ | ------ |
|
|
| 1 | 67000 ms | | |
|
|
| 1 | 67925 ms | 241 | 4.3 |
|
|
| 2 | 31739 ms | 516 | 4.6 |
|
|
| 2 | 31739 ms | 516 | 4.6 |
|
|
| 4 | 15084 ms | 1086 | 4.9 |
|
|
| 4 | 15084 ms | 1086 | 4.9 |
|
|
| 8 | 7624 ms | 2149 | 4.8 |
|
|
| 8 | 7624 ms | 2149 | 4.8 |
|
|
| 16 | 4084 ms | 4011 | 4.48 |
|
|
| 16 | 4084 ms | 4011 | 4.48 |
|
|
|
|
|
|
|
|
|
|
# Increasing the batch size per rank
|
|
# Increasing the batch size
|
|
|
|
|
|
4 ranks, 1 rank/socket, T=1024
|
|
4 ranks, 1 rank/socket, T=1024
|
|
|
|
|
|
| Batch size (B) | time/iteration | tokens/s | tokens/(s.cpus) |
|
|
| Batch size (B) | Batch size/rank | time/iteration | tokens/s | tokens/(s.cpus) |
|
|
| ------ | ------ | ------ | ------ |
|
|
| ------ | ------ | ------ | ------ | ------ |
|
|
| 4 | | | |
|
|
| 4 | 1 | 3984 ms | 1028 | 4.6 |
|
|
| 8 | | | |
|
|
| 8 | 2 | 7421 ms | 1104 | 4.9 |
|
|
| 16 | | | |
|
|
| 16 | 4 | 15084 ms | 1086 | 4.9 |
|
|
| 32 | | | |
|
|
| 32 | 8 | | | |
|
|
| 64 | | | | |
|
| 64 | 16 | | | | |