Commit c8a5bf8b authored by Gaurav Saxena's avatar Gaurav Saxena

BioFVM with X decomposition - obtaining much better timings than the Z...

BioFVM with X decomposition - obtaining much better timings than the Z decomposition. Also remember to use the ppr notation in the script file. Best results being obtained from 2 processes per node i.e. one per NUMA region and then nodes can be increased.
parent 975859a6
......@@ -778,7 +778,7 @@ void Microenvironment::simulate_bulk_sources_and_sinks( double dt )
void Microenvironment::simulate_cell_sources_and_sinks( std::vector<Basic_Agent*>& basic_agent_list , double dt )
{
//#pragma omp parallel for
#pragma omp parallel for
for( int i=0 ; i < basic_agent_list.size() ; i++ )
{
basic_agent_list[i]->simulate_secretion_and_uptake( this , dt );
......
This diff is collapsed.
......@@ -2,8 +2,15 @@ VERSION := 1.1.6
PROGRAM_NAME := make_lib
#For GCC
CC := mpic++
#For Intel
#CC := mpiicpc
#For TAU
#CC := tau_cxx.sh
# change this to your own CPU archicture.
# Here is a list for gcc 4.9.0
# https://gcc.gnu.org/onlinedocs/gcc-4.9.0/gcc/i386-and-x86-64-Options.html#i386-and-x86-64-Options
......@@ -24,7 +31,10 @@ ARCH := native # best auto-tuning
# ARCH := silvermont
# ARCH := nocona #64-bit pentium 4 or later
CFLAGS := -march=$(ARCH) -O3 -fomit-frame-pointer -mfpmath=both -fopenmp -m64 -std=c++11 -g
#First for GCC and second for ICPC
CFLAGS := -march=$(ARCH) -O2 -fomit-frame-pointer -mfpmath=both -fopenmp -m64 -std=c++11 -g
#CFLAGS := -march=$(ARCH) -O2 -fomit-frame-pointer -qopenmp -m64 -std=c++11 -g
BioFVM_OBJECTS := BioFVM_vector.o BioFVM_matlab.o BioFVM_utilities.o BioFVM_mesh.o \
BioFVM_microenvironment.o BioFVM_solvers.o BioFVM_basic_agent.o \
......
This diff is collapsed.
No preview for this file type
This diff is collapsed.
No preview for this file type
TIME FOR RESIZING MICROENVIRONMENT = 0.758827
TIME FOR GENERATING GAUSSIAN PROFILE = 0.00794553
TIME FOR WRITING INITIAL CONCENTRATION FILE = 0.34451
TIME FOR CREATING ALL BASIC AGENTS = 0.000949931
TIME FOR SIMULATING (SOURCES+SINKS+DIFFUSION) = 8.48919
TIME FOR WRITING FINAL FILE = 0.280464
TOTAL PROGRAM EXECUTION TIME = 9.88661
#!/bin/bash
#SBATCH -n 48
#SBATCH -t 00:15:00
#SBATCH --cpus-per-task=1
#SBATCH -o output-%j
#SBATCH -e error-%j
#SBATCH --x11=batch
# set application and parameters
paraprof
#jumpshot tau.slog2
#!/bin/bash
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=8
#SBATCH --cpus-per-task=1
#SBATCH -t 00:05:00
#SBATCH --nodes=4
#SBATCH --ntasks-per-node=2
#SBATCH --cpus-per-task=24
#SBATCH -t 00:15:00
#SBATCH -o output-%j
#SBATCH -e error-%j
#SBATCH --exclusive
mpiexec ./examples/tutorial1
#export OMP_SCHEDULE=STATIC
export OMP_DISPLAY_ENV=true
export OMP_NUM_THREADS=24
export OMP_PROC_BIND=spread
#export OMP_PLACES="{0:1}:48:1"
export OMP_PLACES=threads
mpiexec --map-by ppr:1:socket:pe=24 --report-bindings ./examples/tutorial1
#mpiexec --map-by socket --bind-to core --report-bindings ./examples/tutorial1
#mpiexec --map-by node --bind-to none --report-bindings ./examples/tutorial1
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment