diff --git a/Makefile b/Makefile index cd6ddd8..269d167 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,9 @@ -# Makefile for assignment 1, CPSC 521 +# Makefile for assignment 2, MPE version, CPSC 521 CC = mpicc CFLAGS = -Wall -Wextra -std=c99 -LDFLAGS = -lm +#LDFLAGS = -mpe=mpilog -lm -lmpe +LDFLAGS = -lm -lmpe TARGET = nbody SOURCES = $(wildcard *.c) diff --git a/nbody.c b/nbody.c index 067b4f6..fefe072 100644 --- a/nbody.c +++ b/nbody.c @@ -1,6 +1,7 @@ /* This program is written in C99. */ #include +#include #include #include #include // for memcpy @@ -60,6 +61,16 @@ void simulation_step(body_t *body_array, int granularity, int self, int bodies void send_body_array(body_t *body, int count, int dest, int tag); void recv_body_array(body_t *body, int count, int source, int tag); +enum state_t { + STATE_INIT_1, STATE_INIT_2, + STATE_COMPUTE_1, STATE_COMPUTE_2, + STATE_COMMUNICATE_1, STATE_COMMUNICATE_2, + STATE_LOCAL_1, STATE_LOCAL_2, + STATE_FINAL_1, STATE_FINAL_2, + STATES +}; +int state[STATES]; + int main(int argc, char *argv[]) { MPI_Init(&argc, &argv); #ifdef DO_TIMING @@ -70,6 +81,23 @@ int main(int argc, char *argv[]) { MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); + MPE_Init_log(); + for(int i = 0; i < STATES; i ++) { + state[i] = MPE_Log_get_event_number(); + } + if(rank == 0) { + MPE_Describe_state(state[STATE_INIT_1], state[STATE_INIT_2], + "Initialize", "red"); + MPE_Describe_state(state[STATE_COMPUTE_1], state[STATE_COMPUTE_2], + "Shared compute", "blue"); + MPE_Describe_state(state[STATE_COMMUNICATE_1], state[STATE_COMMUNICATE_2], + "Communicate", "yellow"); + MPE_Describe_state(state[STATE_LOCAL_1], state[STATE_LOCAL_2], + "Local compute", "green"); + MPE_Describe_state(state[STATE_FINAL_1], state[STATE_FINAL_2], + "Finalize", "orange"); + } + if(argc != 4) { if(rank == 0) printf(USAGE, argv[0]); return 1; @@ -82,8 +110,10 @@ int main(int argc, char *argv[]) { data = parse_input(argv[3], size*granularity); } + MPE_Start_log(); setup_simulation(data, rounds, granularity, rank, size); free(data); // do nothing if NULL + MPE_Finish_log("cpilog"); } #ifdef DO_TIMING @@ -139,6 +169,7 @@ body_t *parse_input(const char *filename, int lines) { void setup_simulation(body_t *data, int rounds, int granularity, int self, int bodies) { + MPE_Log_event(state[STATE_INIT_1], 0, "start init"); body_t *body_array = malloc(granularity * sizeof(*body_array)); if(self == 0) { @@ -158,9 +189,11 @@ void setup_simulation(body_t *data, int rounds, int granularity, } free(temp_body); } + MPE_Log_event(state[STATE_INIT_2], 0, "end init"); perform_simulation(body_array, rounds, granularity, self, bodies); + MPE_Log_event(state[STATE_FINAL_1], 0, "start finalize"); if(self == 0) { for(int j = 0; j < granularity; j ++) { printf("%.10f %.10f %f\n", @@ -188,6 +221,7 @@ void setup_simulation(body_t *data, int rounds, int granularity, free(temp_body); } free(body_array); + MPE_Log_event(state[STATE_FINAL_2], 0, "end finalize"); } void perform_simulation(body_t *body_array, int rounds, int granularity, @@ -245,6 +279,7 @@ void simulation_step(body_t *body_array, int granularity, int self, int bodies continue; } + MPE_Log_event(state[STATE_COMMUNICATE_1], 0, "start send/recv"); body_t *other = malloc(granularity * sizeof(*other)); recv_body_array(other, granularity, prev, TAG_SIMULATE); #ifdef VISUALIZE_OUTPUT @@ -253,7 +288,9 @@ void simulation_step(body_t *body_array, int granularity, int self, int bodies } #endif if(p + 1 < bodies) send_body_array(other, granularity, next, TAG_SIMULATE); + MPE_Log_event(state[STATE_COMMUNICATE_2], 0, "end send/recv"); + MPE_Log_event(state[STATE_COMPUTE_1], 0, "start merge computation"); for(int a = 0; a < granularity; a ++) { for(int b = 0; b < granularity; b ++) { vector_t pos_vector; @@ -270,8 +307,10 @@ void simulation_step(body_t *body_array, int granularity, int self, int bodies } } free(other); + MPE_Log_event(state[STATE_COMPUTE_2], 0, "end merge computation"); } + MPE_Log_event(state[STATE_LOCAL_1], 0, "start local computation"); for(int a = 0; a < granularity; a ++) { for(int b = a + 1; b < granularity; b ++) { vector_t pos_vector; @@ -301,6 +340,7 @@ void simulation_step(body_t *body_array, int granularity, int self, int bodies body_array[a].pos.x, body_array[a].pos.y, body_array[a].velocity.x, body_array[a].velocity.y, body_array[a].mass); } + MPE_Log_event(state[STATE_LOCAL_2], 0, "end local computation"); free(force_x); free(force_y);