@@ -206,6 +206,7 @@ public long getProtocolVersion(String protocol,
206206 private final boolean haEnabled ;
207207 private final HAContext haContext ;
208208 protected boolean allowStaleStandbyReads ;
209+ private Runtime runtime = Runtime .getRuntime ();
209210
210211
211212 /** httpServer */
@@ -481,11 +482,16 @@ private void stopCommonServices() {
481482 }
482483
483484 private void startTrashEmptier (Configuration conf ) throws IOException {
484- long trashInterval
485- = conf . getLong ( CommonConfigurationKeys .FS_TRASH_INTERVAL_KEY ,
486- CommonConfigurationKeys .FS_TRASH_INTERVAL_DEFAULT );
487- if (trashInterval == 0 )
485+ long trashInterval = conf . getLong (
486+ CommonConfigurationKeys .FS_TRASH_INTERVAL_KEY ,
487+ CommonConfigurationKeys .FS_TRASH_INTERVAL_DEFAULT );
488+ if (trashInterval == 0 ) {
488489 return ;
490+ } else if (trashInterval < 0 ) {
491+ throw new IOException ("Cannot start tresh emptier with negative interval."
492+ + " Set " + CommonConfigurationKeys .FS_TRASH_INTERVAL_KEY + " to a"
493+ + " positive value." );
494+ }
489495 this .emptier = new Thread (new Trash (conf ).getEmptier (), "Trash Emptier" );
490496 this .emptier .setDaemon (true );
491497 this .emptier .start ();
@@ -1235,14 +1241,37 @@ synchronized HAServiceState getServiceState() {
12351241 }
12361242 return state .getServiceState ();
12371243 }
1244+
1245+ @ VisibleForTesting
1246+ public synchronized void setRuntimeForTesting (Runtime runtime ) {
1247+ this .runtime = runtime ;
1248+ }
12381249
12391250 /**
1240- * Class used as expose {@link NameNode} as context to {@link HAState}
1251+ * Shutdown the NN immediately in an ungraceful way. Used when it would be
1252+ * unsafe for the NN to continue operating, e.g. during a failed HA state
1253+ * transition.
12411254 *
1242- * TODO(HA):
1243- * When entering and exiting state, on failing to start services,
1244- * appropriate action is needed todo either shutdown the node or recover
1245- * from failure.
1255+ * @param t exception which warrants the shutdown. Printed to the NN log
1256+ * before exit.
1257+ * @throws ServiceFailedException thrown only for testing.
1258+ */
1259+ private synchronized void doImmediateShutdown (Throwable t )
1260+ throws ServiceFailedException {
1261+ String message = "Error encountered requiring NN shutdown. " +
1262+ "Shutting down immediately." ;
1263+ try {
1264+ LOG .fatal (message , t );
1265+ } catch (Throwable ignored ) {
1266+ // This is unlikely to happen, but there's nothing we can do if it does.
1267+ }
1268+ runtime .exit (1 );
1269+ // This code is only reached during testing, when runtime is stubbed out.
1270+ throw new ServiceFailedException (message , t );
1271+ }
1272+
1273+ /**
1274+ * Class used to expose {@link NameNode} as context to {@link HAState}
12461275 */
12471276 protected class NameNodeHAContext implements HAContext {
12481277 @ Override
@@ -1257,32 +1286,52 @@ public HAState getState() {
12571286
12581287 @ Override
12591288 public void startActiveServices () throws IOException {
1260- namesystem .startActiveServices ();
1261- startTrashEmptier (conf );
1289+ try {
1290+ namesystem .startActiveServices ();
1291+ startTrashEmptier (conf );
1292+ } catch (Throwable t ) {
1293+ doImmediateShutdown (t );
1294+ }
12621295 }
12631296
12641297 @ Override
12651298 public void stopActiveServices () throws IOException {
1266- if (namesystem != null ) {
1267- namesystem .stopActiveServices ();
1299+ try {
1300+ if (namesystem != null ) {
1301+ namesystem .stopActiveServices ();
1302+ }
1303+ stopTrashEmptier ();
1304+ } catch (Throwable t ) {
1305+ doImmediateShutdown (t );
12681306 }
1269- stopTrashEmptier ();
12701307 }
12711308
12721309 @ Override
12731310 public void startStandbyServices () throws IOException {
1274- namesystem .startStandbyServices (conf );
1311+ try {
1312+ namesystem .startStandbyServices (conf );
1313+ } catch (Throwable t ) {
1314+ doImmediateShutdown (t );
1315+ }
12751316 }
12761317
12771318 @ Override
12781319 public void prepareToStopStandbyServices () throws ServiceFailedException {
1279- namesystem .prepareToStopStandbyServices ();
1320+ try {
1321+ namesystem .prepareToStopStandbyServices ();
1322+ } catch (Throwable t ) {
1323+ doImmediateShutdown (t );
1324+ }
12801325 }
12811326
12821327 @ Override
12831328 public void stopStandbyServices () throws IOException {
1284- if (namesystem != null ) {
1285- namesystem .stopStandbyServices ();
1329+ try {
1330+ if (namesystem != null ) {
1331+ namesystem .stopStandbyServices ();
1332+ }
1333+ } catch (Throwable t ) {
1334+ doImmediateShutdown (t );
12861335 }
12871336 }
12881337
0 commit comments