2929#include < NdbSleep.h>
3030
3131#define PATH_SEPARATOR DIR_SEPARATOR
32+ #define TESTCASE_RETRIES_THRESHOLD_WARNING 5
3233
3334/* * Global variables */
3435static const char progname[] = " ndb_atrt" ;
@@ -325,161 +326,174 @@ int main(int argc, char **argv) {
325326 */
326327 g_logger.debug (" Entering main loop" );
327328 while (!feof (g_test_case_file)) {
328- /* *
329- * Do we need to restart ndb
330- */
331- if (restart) {
332- restart = false ;
333- g_logger.info (" (Re)starting server processes..." );
329+ atrt_testcase test_case;
330+ const int num_element_lines =
331+ read_test_case (g_test_case_file, test_case, lineno);
332+ if (num_element_lines == 0 ) {
333+ // Should be at end of file. Let while condition catch that.
334+ continue ;
335+ }
336+ if (num_element_lines < 0 ) {
337+ g_logger.critical (" Corrupt testcase at line %d (error %d)" , lineno,
338+ num_element_lines);
339+ goto cleanup;
340+ }
341+ g_logger.info (" #%d - %s" , test_no, test_case.m_name .c_str ());
334342
335- if (!stop_processes (g_config, ~0 )) {
336- g_logger.critical (" Failed to stop all processes" );
337- goto end;
343+ int result = 0 ;
344+ time_t elapsed;
345+ int testruns;
346+ int total_runs = 1 + test_case.m_max_retries ;
347+ for (testruns = 1 ; testruns <= total_runs; testruns++) {
348+ if (testruns > 1 ) {
349+ g_logger.info (" Retrying test #%d - '%s', attempt (%d/%d)" , test_no,
350+ test_case.m_name .c_str (), testruns - 1 ,
351+ test_case.m_max_retries );
338352 }
339353
340- g_logger.info (" Waiting for all processes to stop..." );
341- if (!wait_for_processes_to_stop (g_config, ~0 )) {
342- g_logger.critical (" Fail to stop all processes" );
343- goto end;
344- }
354+ /* *
355+ * Do we need to restart ndb
356+ */
357+ if (restart) {
358+ restart = false ;
359+ g_logger.info (" (Re)starting server processes..." );
360+
361+ if (!stop_processes (g_config, ~0 )) {
362+ g_logger.critical (" Failed to stop all processes" );
363+ goto end;
364+ }
345365
346- if (!setup_directories (g_config, 2 )) {
347- g_logger.critical (" Failed to setup directories" );
348- goto end;
349- }
366+ g_logger.info (" Waiting for all processes to stop..." );
367+ if (!wait_for_processes_to_stop (g_config, ~0 )) {
368+ g_logger.critical (" Fail to stop all processes" );
369+ goto end;
370+ }
350371
351- if (!setup_files (g_config, 2 , 1 )) {
352- g_logger.critical (" Failed to setup files " );
353- goto end;
354- }
372+ if (!setup_directories (g_config, 2 )) {
373+ g_logger.critical (" Failed to setup directories " );
374+ goto end;
375+ }
355376
356- if (!setup_hosts (g_config)) {
357- g_logger.critical (" Failed to setup hosts " );
358- goto end;
359- }
377+ if (!setup_files (g_config, 2 , 1 )) {
378+ g_logger.critical (" Failed to setup files " );
379+ goto end;
380+ }
360381
361- g_logger.debug (" Setup complete, starting servers" );
362- if (!start (g_config, p_ndb | p_servers)) {
363- g_logger.critical (" Failed to start server processes" );
364- g_logger.info (" Gathering logs and saving them as test %u" , test_no);
382+ if (!setup_hosts (g_config)) {
383+ g_logger.critical (" Failed to setup hosts" );
384+ goto end;
385+ }
386+
387+ g_logger.debug (" Setup complete, starting servers" );
388+ if (!start (g_config, p_ndb | p_servers)) {
389+ g_logger.critical (" Failed to start server processes" );
390+ g_logger.info (" Gathering logs and saving them as test %u" , test_no);
365391
366- int tmp;
367- if (!gather_result (g_config, &tmp)) {
368- g_logger.critical (" Failed to gather results" );
392+ int tmp;
393+ if (!gather_result (g_config, &tmp)) {
394+ g_logger.critical (" Failed to gather results" );
395+ goto cleanup;
396+ }
397+
398+ if (g_report_file != 0 ) {
399+ fprintf (g_report_file, " %s ; %d ; %d ; %d ; %d\n " , " start servers" ,
400+ test_no, ERR_FAILED_TO_START, 0 , 0 );
401+ fflush (g_report_file);
402+ }
403+
404+ BaseString resdir;
405+ resdir.assfmt (" result.%d" , test_no);
406+ remove_dir (resdir.c_str (), true );
407+
408+ if (rename (" result" , resdir.c_str ()) != 0 ) {
409+ g_logger.critical (" Failed to rename %s as %s" , " result" ,
410+ resdir.c_str ());
411+ goto cleanup;
412+ }
369413 goto cleanup;
370414 }
371415
372- if (g_report_file != 0 ) {
373- fprintf (g_report_file, " %s ; %d ; %d ; %d\n " , " start servers" ,
374- test_no, ERR_FAILED_TO_START, 0 );
375- fflush (g_report_file);
416+ if (!setup_db (g_config)) {
417+ g_logger.critical (" Failed to setup database" );
418+ goto cleanup;
376419 }
377420
378- BaseString resdir;
379- resdir.assfmt (" result.%d" , test_no);
380- remove_dir (resdir.c_str (), true );
421+ g_logger.info (" All servers start completed" );
422+ }
381423
382- if (rename (" result" , resdir.c_str ()) != 0 ) {
383- g_logger.critical (" Failed to rename %s as %s" , " result" ,
384- resdir.c_str ());
385- goto cleanup;
386- }
424+ // Assign processes to programs
425+ if (!setup_test_case (g_config, test_case)) {
426+ g_logger.critical (" Failed to setup test case" );
387427 goto cleanup;
388428 }
389429
390- if (!setup_db (g_config)) {
391- g_logger.critical (" Failed to setup database " );
430+ if (!start_processes (g_config, p_clients )) {
431+ g_logger.critical (" Failed to start client processes " );
392432 goto cleanup;
393433 }
394434
395- g_logger.info (" All servers start completed" );
396- }
435+ const time_t start = time (0 );
436+ time_t now = start;
437+ do {
438+ if (!update_status (g_config, atrt_process::AP_ALL)) {
439+ g_logger.critical (" Failed to get updated status for all processes" );
440+ goto cleanup;
441+ }
397442
398- // const int start_line = lineno;
399- atrt_testcase test_case;
400- const int num_element_lines =
401- read_test_case (g_test_case_file, test_case, lineno);
402- if (num_element_lines == 0 ) {
403- // Should be at end of file. Let while condition catch that.
404- continue ;
405- }
406- if (num_element_lines < 0 ) {
407- g_logger.critical (" Corrupt testcase at line %d (error %d)" , lineno,
408- num_element_lines);
409- goto cleanup;
410- }
411- g_logger.info (" #%d - %s" , test_no, test_case.m_name .c_str ());
443+ if ((result = check_ndb_or_servers_failures (g_config))) {
444+ break ;
445+ }
412446
413- // Assign processes to programs
414- if (!setup_test_case (g_config, test_case)) {
415- g_logger.critical (" Failed to setup test case" );
416- goto cleanup;
417- }
447+ if (!is_client_running (g_config)) {
448+ break ;
449+ }
418450
419- if (!start_processes (g_config, p_clients)) {
420- g_logger.critical (" Failed to start client processes" );
421- goto cleanup;
422- }
451+ if (!do_command (g_config)) {
452+ result = ERR_COMMAND_FAILED;
453+ g_logger.critical (" Failure on client command execution" );
454+ break ;
455+ }
423456
424- int result = 0 ;
457+ now = time (0 );
458+ if (now > (start + test_case.m_max_time )) {
459+ g_logger.debug (" Timed out" );
460+ result = ERR_MAX_TIME_ELAPSED;
461+ g_logger.info (" Timeout '%s' after %ld seconds" ,
462+ test_case.m_name .c_str (), test_case.m_max_time );
463+ break ;
464+ }
465+ NdbSleep_SecSleep (1 );
466+ } while (true );
425467
426- const time_t start = time (0 );
427- time_t now = start;
428- do {
429- if (!update_status (g_config, atrt_process::AP_ALL)) {
430- g_logger.critical (" Failed to get updated status for all processes" );
468+ elapsed = time (0 ) - start;
469+ if (!stop_processes (g_config, p_clients)) {
470+ g_logger.critical (" Failed to stop client processes" );
431471 goto cleanup;
432472 }
433473
434- if ((result = check_ndb_or_servers_failures (g_config))) {
435- break ;
436- }
437-
438- if (!is_client_running (g_config)) {
439- break ;
474+ if (!wait_for_processes_to_stop (g_config, p_clients)) {
475+ g_logger.critical (" Failed to stop client processes" );
476+ goto cleanup;
440477 }
441478
442- if (! do_command (g_config)) {
443- result = ERR_COMMAND_FAILED;
444- g_logger.critical (" Failure on client command execution " );
445- break ;
479+ int tmp, *rp = result ? &tmp : &result;
480+ if (! gather_result (g_config, rp)) {
481+ g_logger.critical (" Failed to gather result after test run " );
482+ goto end ;
446483 }
447484
448- now = time (0 );
449- if (now > (start + test_case.m_max_time )) {
450- g_logger.debug (" Timed out" );
451- result = ERR_MAX_TIME_ELAPSED;
452- g_logger.info (" Timeout '%s' after %ld seconds" ,
453- test_case.m_name .c_str (), test_case.m_max_time );
485+ g_logger.info (" #%d %s(%d)" , test_no, (result == 0 ? " OK" : " FAILED" ),
486+ result);
487+ if (result == 0 ) {
454488 break ;
489+ } else {
490+ restart = true ;
455491 }
456- NdbSleep_SecSleep (1 );
457- } while (true );
458-
459- const time_t elapsed = time (0 ) - start;
460-
461- if (!stop_processes (g_config, p_clients)) {
462- g_logger.critical (" Failed to stop client processes" );
463- goto cleanup;
464- }
465-
466- if (!wait_for_processes_to_stop (g_config, p_clients)) {
467- g_logger.critical (" Failed to stop client processes" );
468- goto cleanup;
469492 }
470493
471- int tmp, *rp = result ? &tmp : &result;
472- if (!gather_result (g_config, rp)) {
473- g_logger.critical (" Failed to gather result after test run" );
474- goto end;
475- }
476-
477- g_logger.info (" #%d %s(%d)" , test_no, (result == 0 ? " OK" : " FAILED" ),
478- result);
479-
480494 if (g_report_file != 0 ) {
481- fprintf (g_report_file, " %s ; %d ; %d ; %ld\n " , test_case. m_name . c_str () ,
482- test_no, result, elapsed);
495+ fprintf (g_report_file, " %s ; %d ; %d ; %ld ; %d \n " ,
496+ test_case. m_name . c_str (), test_no, result, elapsed, testruns );
483497 fflush (g_report_file);
484498 }
485499
@@ -510,10 +524,6 @@ int main(int argc, char **argv) {
510524 if (reset_config (g_config)) {
511525 restart = true ;
512526 }
513-
514- if (result != 0 ) {
515- restart = true ;
516- }
517527 test_no++;
518528 }
519529 return_code = 0 ;
@@ -526,8 +536,8 @@ int main(int argc, char **argv) {
526536end:
527537 g_logger.info (" Finishing, result: %d" , return_code);
528538 if (return_code != 0 && g_report_file != 0 ) {
529- fprintf (g_report_file, " %s ; %d ; %d ; %d\n " , " critical error" , test_no ,
530- ERR_FAILED_TO_START, 0 );
539+ fprintf (g_report_file, " %s ; %d ; %d ; %d ; %d \n " , " critical error" ,
540+ test_no, ERR_FAILED_TO_START, 0 , 0 );
531541 fflush (g_report_file);
532542 }
533543 if (g_report_file != 0 ) {
@@ -1379,6 +1389,23 @@ int read_test_case(FILE *file, atrt_testcase &tc, int &line) {
13791389 used_elements++;
13801390 }
13811391
1392+ tc.m_max_retries = 0 ;
1393+ if (p.get (" max-retries" , &mt)) {
1394+ tc.m_max_retries = atoi (mt);
1395+ used_elements++;
1396+ }
1397+
1398+ if (tc.m_max_retries < 0 ) {
1399+ g_logger.error (" No of retries must not be less than zero for test '%s'" ,
1400+ tc.m_name .c_str ());
1401+ return -4 ;
1402+ }
1403+
1404+ if (tc.m_max_retries > TESTCASE_RETRIES_THRESHOLD_WARNING)
1405+ g_logger.warning (
1406+ " No of retries should be less than or equal to %d for test '%s'" ,
1407+ TESTCASE_RETRIES_THRESHOLD_WARNING, tc.m_name .c_str ());
1408+
13821409 if (used_elements != elements) {
13831410 g_logger.critical (
13841411 " Invalid test file: unknown properties in test case above line: %d" ,
0 commit comments