/*****************************************************************************\ * test7.2.prog.c - Test of basic PMI library functionality ***************************************************************************** * Copyright (C) 2005-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov> * CODE-OCEC-09-009. All rights reserved. * * This file is part of Slurm, a resource management program. * For details, see <https://slurm.schedmd.com/>. * Please also read the included file: DISCLAIMER. * * Slurm is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. * * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along * with Slurm; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. \*****************************************************************************/ #include <stdio.h> #include <stdlib.h> #include <slurm/pmi.h> #include <sys/time.h> #if 0 /* Typical MPICH2 use */ # define BARRIER_CNT 0 # define PUTS_PER_BARRIER 0 #else /* Typical MVAPICH2 use * * Adjust job time limit and timeout in test7.2 as needed * for large values. */ # define BARRIER_CNT 4 # define PUTS_PER_BARRIER 0 #endif #define _DEBUG 0 #define OFFSET_1 1234 #define OFFSET_2 5678 int main (int argc, char **argv) { int i, j, rc; int nprocs, procid; int clique_size, *clique_ranks = NULL; char *jobid_ptr, *nprocs_ptr, *procid_ptr; int pmi_rank, pmi_size, kvs_name_len, key_len, val_len; PMI_BOOL initialized; char *key, *val, *kvs_name; struct timeval tv1, tv2; long delta_t; char tv_str[20]; gettimeofday(&tv1, NULL); /* Get process count and our id from environment variables */ jobid_ptr = getenv("SLURM_JOB_ID"); nprocs_ptr = getenv("SLURM_NPROCS"); procid_ptr = getenv("SLURM_PROCID"); if (jobid_ptr == NULL) { printf("WARNING: PMI test not run under Slurm\n"); nprocs = 1; procid = 0; } else if ((nprocs_ptr == NULL) || (procid_ptr == NULL)) { printf("FAILURE: Slurm environment variables not set\n"); exit(1); } else { nprocs = atoi(nprocs_ptr); procid = atoi(procid_ptr); } /* Validate process count and our id */ if ((nprocs < 1) || (nprocs > 9999)) { printf("FAILURE: Invalid nprocs %s\n", nprocs_ptr); exit(1); } if ((procid < 0) || (procid > 9999)) { printf("FAILURE: Invalid procid %s\n", procid_ptr); exit(1); } /* Get process count and size from PMI and validate */ if ((rc = PMI_Init(&i)) != PMI_SUCCESS) { printf("FAILURE: PMI_Init: %d\n", rc); exit(1); } initialized = PMI_FALSE; if ((rc = PMI_Initialized(&initialized)) != PMI_SUCCESS) { printf("FAILURE: PMI_Initialized: %d\n", rc); exit(1); } if (initialized != PMI_TRUE) { printf("FAILURE: PMI_Initialized returned false\n"); exit(1); } if ((rc = PMI_Get_rank(&pmi_rank)) != PMI_SUCCESS) { printf("FAILURE: PMI_Get_rank: %d\n", rc); exit(1); } #if _DEBUG printf("PMI_Get_rank = %d\n", pmi_rank); #endif if ((rc = PMI_Get_size(&pmi_size)) != PMI_SUCCESS) { printf("FAILURE: PMI_Get_size: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_Get_size = %d\n", pmi_size); #endif if (pmi_rank != procid) { printf("FAILURE: Rank(%d) != PROCID(%d)\n", pmi_rank, procid); exit(1); } if (pmi_size != nprocs) { printf("FAILURE: Size(%d) != NPROCS(%d), task %d\n", pmi_size, nprocs, pmi_rank); exit(1); } if ((rc = PMI_Get_clique_size(&clique_size)) != PMI_SUCCESS) { printf("FAILURE: PMI_Get_clique_size: %d, task %d\n", rc, pmi_rank); exit(1); } clique_ranks = malloc(sizeof(int) * clique_size); if ((rc = PMI_Get_clique_ranks(clique_ranks, clique_size)) != PMI_SUCCESS) { printf("FAILURE: PMI_Get_clique_ranks: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG for (i=0; i<clique_size; i++) printf("PMI_Get_clique_ranks[%d]=%d\n", i, clique_ranks[i]); #endif free(clique_ranks); if ((rc = PMI_KVS_Get_name_length_max(&kvs_name_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get_name_length_max: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Get_name_length_max = %d\n", kvs_name_len); #endif kvs_name = malloc(kvs_name_len); if ((rc = PMI_KVS_Get_my_name(kvs_name, kvs_name_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get_my_name: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Get_my_name = %s\n", kvs_name); #endif if ((rc = PMI_KVS_Get_key_length_max(&key_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get_key_length_max: %d, task %d\n", rc, pmi_rank); exit(1); } key = malloc(key_len); if ((rc = PMI_KVS_Get_value_length_max(&val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get_value_length_max: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Get_value_length_max = %d\n", val_len); #endif val = malloc(val_len); /* Build and set some key=val pairs */ snprintf(key, key_len, "ATTR_1_%d", procid); snprintf(val, val_len, "A%d", procid+OFFSET_1); if ((rc = PMI_KVS_Put(kvs_name, key, val)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Put(%s,%s,%s): %d, task %d\n", kvs_name, key, val, rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Put(%s,%s,%s)\n", kvs_name, key, val); #endif snprintf(key, key_len, "attr_2_%d", procid); snprintf(val, val_len, "B%d", procid+OFFSET_2); if ((rc = PMI_KVS_Put(kvs_name, key, val)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Put(%s,%s,%s): %d, task %d\n", kvs_name, key, val, rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Put(%s,%s,%s)\n", kvs_name, key, val); #endif /* Sync KVS across all tasks */ if ((rc = PMI_KVS_Commit(kvs_name)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Commit: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Commit completed\n"); #endif if ((rc = PMI_Barrier()) != PMI_SUCCESS) { printf("FAILURE: PMI_Barrier: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_Barrier completed\n"); #endif /* Now lets get all keypairs and validate */ for (i=0; i<pmi_size; i++) { snprintf(key, key_len, "ATTR_1_%d", i); if ((rc = PMI_KVS_Get(kvs_name, key, val, val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get(%s): %d, task %d\n", key, rc, pmi_rank); exit(1); } if ((val[0] != 'A') || ((atoi(&val[1])-OFFSET_1) != i)) { printf("FAILURE: Bad keypair %s=%s, task %d\n", key, val, pmi_rank); exit(1); } #if _DEBUG if ((pmi_size <= 8) && (pmi_rank == 0)) /* limit output */ printf("PMI_KVS_Get(%s,%s) %s\n", kvs_name, key, val); #endif snprintf(key, key_len, "attr_2_%d", i); if ((rc = PMI_KVS_Get(kvs_name, key, val, val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get(%s): %d, task %d\n", key, rc, pmi_rank); exit(1); } if ((val[0] != 'B') || ((atoi(&val[1])-OFFSET_2) != i)) { printf("FAILURE: Bad keypair %s=%s, task %d\n", key,val, pmi_rank); exit(1); } #if _DEBUG if ((pmi_size <= 8) && (pmi_rank == 1)) /* limit output */ printf("PMI_KVS_Get(%s,%s) %s\n", kvs_name, key, val); #endif } /* use iterator */ if ((rc = PMI_KVS_Iter_first(kvs_name, key, key_len, val, val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_iter_first: %d, task %d\n", rc, pmi_rank); exit(1); } for (i=0; ; i++) { if (key[0] == '\0') { if (i != (pmi_size * 2)) { printf("FAILURE: PMI_KVS_iter_next " "cycle count(%d, %d), task %d\n", i, pmi_size, pmi_rank); } break; } #if _DEBUG if ((pmi_size <= 8) && (pmi_rank == 1)) { /* limit output */ printf("PMI_KVS_Iter_next(%s,%d): %s=%s\n", kvs_name, i, key, val); } #endif if ((rc = PMI_KVS_Iter_next(kvs_name, key, key_len, val, val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_iter_next: %d, task %d\n", rc, pmi_rank); exit(1); } } /* Build some more key=val pairs */ snprintf(key, key_len, "ATTR_3_%d", procid); snprintf(val, val_len, "C%d", procid+OFFSET_1); if ((rc = PMI_KVS_Put(kvs_name, key, val)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Put(%s,%s,%s): %d, task %d\n", kvs_name, key, val, rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Put(%s,%s,%s)\n", kvs_name, key, val); #endif snprintf(key, key_len, "attr_4_%d", procid); snprintf(val, val_len, "D%d", procid+OFFSET_2); if ((rc = PMI_KVS_Put(kvs_name, key, val)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Put(%s,%s,%s): %d, task %d\n", kvs_name, key, val, rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Put(%s,%s,%s)\n", kvs_name, key, val); #endif /* Sync KVS across all tasks */ if ((rc = PMI_KVS_Commit(kvs_name)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Commit: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Commit completed\n"); #endif if ((rc = PMI_Barrier()) != PMI_SUCCESS) { printf("FAILURE: PMI_Barrier: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_Barrier completed\n"); #endif /* Now lets get some keypairs and validate */ for (i=0; i<pmi_size; i++) { snprintf(key, key_len, "ATTR_1_%d", i); if ((rc = PMI_KVS_Get(kvs_name, key, val, val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get(%s): %d, task %d\n", key, rc, pmi_rank); exit(1); } if ((val[0] != 'A') || ((atoi(&val[1])-OFFSET_1) != i)) { printf("FAILURE: Bad keypair %s=%s, task %d\n", key, val, pmi_rank); exit(1); } #if _DEBUG if ((pmi_size <= 8) && (pmi_rank == 1)) /* limit output */ printf("PMI_KVS_Get(%s,%s) %s\n", kvs_name, key, val); #endif snprintf(key, key_len, "attr_4_%d", i); if ((rc = PMI_KVS_Get(kvs_name, key, val, val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get(%s): %d, task %d\n", key, rc, pmi_rank); exit(1); } if ((val[0] != 'D') || ((atoi(&val[1])-OFFSET_2) != i)) { printf("FAILURE: Bad keypair %s=%s, task %d\n", key,val, pmi_rank); exit(1); } #if _DEBUG if ((pmi_size <= 8) && (pmi_rank == 1)) /* limit output */ printf("PMI_KVS_Get(%s,%s) %s\n", kvs_name, key, val); #endif } /* Replicate the very heavy load that MVAPICH2 puts on PMI * This load exceeds that of MPICH2 by a very wide margin */ #if _DEBUG printf("Starting %d iterations each with %d PMI_KVS_Put and \n" " one each PMI_KVS_Commit and KVS_Barrier\n", BARRIER_CNT, PUTS_PER_BARRIER); fflush(stdout); #endif for (i=0; i<BARRIER_CNT; i++) { for (j=0; j<PUTS_PER_BARRIER; j++) { snprintf(key, key_len, "ATTR_%d_%d_%d", i, j, procid); snprintf(val, val_len, "C%d", procid+OFFSET_1); if ((rc = PMI_KVS_Put(kvs_name, key, val)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Put(%s,%s,%s): " "%d, task %d\n", kvs_name, key, val, rc, pmi_rank); exit(1); } } if ((rc= PMI_KVS_Commit(kvs_name)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Commit: %d, task %d\n", rc, pmi_rank); exit(1); } if ((rc = PMI_Barrier()) != PMI_SUCCESS) { printf("FAILURE: PMI_Barrier: %d, task %d\n", rc, pmi_rank); exit(1); } /* Don't bother with PMI_KVS_Get as those are all local * and do not put a real load on srun or the network */ } #if _DEBUG printf("Interative PMI calls successful\n"); #endif /* create new keyspace and test it */ if ((rc = PMI_KVS_Create(kvs_name, kvs_name_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Create: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Create %s\n", kvs_name); #endif if ((rc = PMI_KVS_Put(kvs_name, "KVS_KEY", "KVS_VAL")) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Put: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Put(%s,KVS_KEY,KVS_VAL)\n", kvs_name); #endif if ((rc = PMI_KVS_Get(kvs_name, "KVS_KEY", val, val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get(%s, KVS_KEY): %d, task %d\n", kvs_name, rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Get(%s,%s) %s\n", kvs_name, "KVS_KEY", val); #endif if ((rc = PMI_KVS_Destroy(kvs_name)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Destroy(%s): %d, task %d\n", kvs_name, rc, pmi_rank); exit(1); } if ((rc = PMI_KVS_Get(kvs_name, "KVS_KEY", val, val_len)) != PMI_ERR_INVALID_KVS) { printf("FAILURE: PMI_KVS_Get(%s, KVS_KEY): %d, task %d\n", kvs_name, rc, pmi_rank); exit(1); } if ((rc = PMI_Finalize()) != PMI_SUCCESS) { printf("FAILURE: PMI_Finalize: %d, task %d\n", rc, pmi_rank); exit(1); } if (_DEBUG || (pmi_rank < 4)) { gettimeofday(&tv2, NULL); delta_t = (tv2.tv_sec - tv1.tv_sec) * 1000000; delta_t += tv2.tv_usec - tv1.tv_usec; snprintf(tv_str, sizeof(tv_str), "usec=%ld", delta_t); printf("PMI test ran successfully, for task %d, %s\n", pmi_rank, tv_str); } if (pmi_rank == 0) { printf("NOTE: All failures reported, "); printf("but only first four successes reported\n"); } exit(0); }