更新 cluster.c 和 cluster.h 的注释

huangzworks · huangzworks · commit 68d5c5dcf5b2 · 2014-06-22T20:41:51.000+08:00
diff --git a/src/cluster.c b/src/cluster.c
@@ -44,6 +44,7 @@
 /* A global reference to myself is handy to make code more clear.
  * Myself always points to server.cluster->myself, that is, the clusterNode
  * that represents this node. */
+// 为了方便起见，维持一个 myself 全局变量，让它总是指向 cluster->myself 。
 clusterNode *myself = NULL;
 
 clusterNode *createClusterNode(char *nodename, int flags);
@@ -3074,6 +3075,7 @@ void clusterSendFailoverAuth(clusterNode *node) {
 }
 
 /* Send a MFSTART message to the specified node. */
+// 向给定的节点发送一条 MFSTART 消息
 void clusterSendMFStart(clusterNode *node) {
     unsigned char buf[sizeof(clusterMsg)];
     clusterMsg *hdr = (clusterMsg*) buf;
@@ -3411,6 +3413,7 @@ void clusterHandleSlaveFailover(void) {
         clusterBroadcastPong(CLUSTER_BROADCAST_ALL);
 
         /* 6) If there was a manual failover in progress, clear the state. */
+        // 如果有手动故障转移正在执行，那么清理和它有关的状态
         resetManualFailover();
     }
 }
@@ -3534,8 +3537,13 @@ void clusterHandleSlaveMigration(int max_slaves) {
 /* Reset the manual failover state. This works for both masters and slavesa
  * as all the state about manual failover is cleared.
  *
+ * 重置与手动故障转移有关的状态，主节点和从节点都可以使用。
+ *
  * The function can be used both to initialize the manual failover state at
- * startup or to abort a manual failover in progress. */
+ * startup or to abort a manual failover in progress. 
+ * 这个函数既可以用于在启动集群时进行初始化，
+ * 又可以实际地应用在手动故障转移的情况。
+ */
 void resetManualFailover(void) {
     if (server.cluster->mf_end && clientsArePaused()) {
         server.clients_pause_end_time = 0;
@@ -3797,6 +3805,8 @@ void clusterCron(void) {
 
         /* If we are a master and one of the slaves requested a manual
          * failover, ping it continuously. */
+        // 如果这是一个主节点，并且有一个从服务器请求进行手动故障转移
+        // 那么向从服务器发送 PING 。
         if (server.cluster->mf_end &&
             nodeIsMaster(myself) &&
             server.cluster->mf_slave == node &&
@@ -4021,6 +4031,8 @@ int clusterDelNodeSlots(clusterNode *node) {
 
 /* Clear the migrating / importing state for all the slots.
  * This is useful at initialization and when turning a master into slave. */
+// 清理所有槽的迁移和导入状态
+// 通常在初始化或者将主节点转为从节点时使用
 void clusterCloseAllSlots(void) {
     memset(server.cluster->migrating_slots_to,0,
         sizeof(server.cluster->migrating_slots_to));
@@ -4232,6 +4244,8 @@ int verifyClusterConfigWithData(void) {
 
 /* Set the specified node 'n' as master for this node.
  * If this node is currently a master, it is turned into a slave. */
+// 将节点 n 设置为当前节点的主节点
+// 如果当前节点为主节点，那么将它转换为从节点
 void clusterSetMaster(clusterNode *n) {
     redisAssert(n != myself);
     redisAssert(myself->numslots == 0);
@@ -4262,6 +4276,7 @@ void clusterSetMaster(clusterNode *n) {
  * See clusterGenNodesDescription() top comment for more information.
  *
  * The function returns the string representation as an SDS string. */
+// 生成节点的状态描述信息
 sds clusterGenNodeDescription(clusterNode *node) {
     int j, start;
     sds ci;
@@ -4856,6 +4871,8 @@ void clusterCommand(redisClient *c) {
         addReply(c,shared.ok);
     } else if (!strcasecmp(c->argv[1]->ptr,"slaves") && c->argc == 3) {
         /* CLUSTER SLAVES <NODE ID> */
+        // 打印给定主节点的所有从节点的信息
+
         clusterNode *n = clusterLookupNode(c->argv[2]->ptr);
         int j;
 
@@ -4880,6 +4897,8 @@ void clusterCommand(redisClient *c) {
                (c->argc == 2 || c->argc == 3))
     {
         /* CLUSTER FAILOVER [FORCE] */
+        // 执行手动故障转移
+
         int force = 0;
 
         if (c->argc == 3) {
@@ -4891,26 +4910,36 @@ void clusterCommand(redisClient *c) {
             }
         }
 
+        // 命令只能发送给从节点
         if (nodeIsMaster(myself)) {
             addReplyError(c,"You should send CLUSTER FAILOVER to a slave");
             return;
         } else if (!force &&
                    (myself->slaveof == NULL || nodeFailed(myself->slaveof) ||
                    myself->slaveof->link == NULL))
         {
+            // 如果主节点已下线或者处于失效状态
+            // 并且命令没有给定 force 参数，那么命令执行失败
             addReplyError(c,"Master is down or failed, "
                             "please use CLUSTER FAILOVER FORCE");
             return;
         }
+
+        // 重置手动故障转移的有关属性
         resetManualFailover();
+        // 设定手动故障转移的最大执行时限
         server.cluster->mf_end = mstime() + REDIS_CLUSTER_MF_TIMEOUT;
 
         /* If this is a forced failover, we don't need to talk with our master
          * to agree about the offset. We just failover taking over it without
          * coordination. */
+        // 如果这是强制的手动 failover ，那么直接开始 failover ，
+        // 无须向其他 master 沟通偏移量。
         if (force) {
+            // 如果这是强制的手动故障转移，那么直接开始执行故障转移操作
             server.cluster->mf_can_start = 1;
         } else {
+            // 如果不是强制的话，那么需要和主节点比对相互的偏移量是否一致
             clusterSendMFStart(myself->slaveof);
         }
         redisLog(REDIS_WARNING,"Manual failover user request accepted.");
diff --git a/src/cluster.h b/src/cluster.h
@@ -32,18 +32,29 @@
 #define REDIS_CLUSTER_FAIL_UNDO_TIME_ADD 10 /* Some additional time. */
 // 在检查从节点数据是否有效时使用的乘法因子
 #define REDIS_CLUSTER_SLAVE_VALIDITY_MULT 10 /* Slave data validity. */
+// 在执行故障转移之前需要等待的秒数，似乎已经废弃
 #define REDIS_CLUSTER_FAILOVER_DELAY 5 /* Seconds */
+// 未使用，似乎已经废弃
 #define REDIS_CLUSTER_DEFAULT_MIGRATION_BARRIER 1
+// 在进行手动的故障转移之前，需要等待的超时时间
 #define REDIS_CLUSTER_MF_TIMEOUT 5000 /* Milliseconds to do a manual failover. */
+// 未使用，似乎已经废弃
 #define REDIS_CLUSTER_MF_PAUSE_MULT 2 /* Master pause manual failover mult. */
 
 /* Redirection errors returned by getNodeByQuery(). */
+/* 由 getNodeByQuery() 函数返回的转向错误。 */
+// 节点可以处理这个命令
 #define REDIS_CLUSTER_REDIR_NONE 0          /* Node can serve the request. */
+// 键在其他槽
 #define REDIS_CLUSTER_REDIR_CROSS_SLOT 1    /* Keys in different slots. */
+// 键所处的槽正在进行 reshard
 #define REDIS_CLUSTER_REDIR_UNSTABLE 2      /* Keys in slot resharding. */
+// 需要进行 ASK 转向
 #define REDIS_CLUSTER_REDIR_ASK 3           /* -ASK redirection required. */
+// 需要进行 MOVED 转向
 #define REDIS_CLUSTER_REDIR_MOVED 4         /* -MOVED redirection required. */
 
+// 前置定义，防止编译错误
 struct clusterNode;
 
 
@@ -91,6 +102,7 @@ typedef struct clusterLink {
 // 空名字（在节点为主节点时，用作消息中的 slaveof 属性的值）
 #define REDIS_NODE_NULL_NAME "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
 
+// 用于判断节点身份和状态的一系列宏
 #define nodeIsMaster(n) ((n)->flags & REDIS_NODE_MASTER)
 #define nodeIsSlave(n) ((n)->flags & REDIS_NODE_SLAVE)
 #define nodeInHandshake(n) ((n)->flags & REDIS_NODE_HANDSHAKE)
@@ -186,9 +198,10 @@ struct clusterNode {
 typedef struct clusterNode clusterNode;
 
 
-// 集群状态，每个节点都保存着一个这样的状态，记录了它们眼中的集群的样子
-// 注意，结构中有一部分属性其实和节点有关的，不知道为什么被放在了这里
-// 比如 slots_to_keys 、failover_auth_count 等属性就是和本节点有关的
+// 集群状态，每个节点都保存着一个这样的状态，记录了它们眼中的集群的样子。
+// 另外，虽然这个结构主要用于记录集群的属性，但是为了节约资源，
+// 有些与节点有关的属性，比如 slots_to_keys 、 failover_auth_count 
+// 也被放到了这个结构里面。
 typedef struct clusterState {
 
     // 指向当前节点的指针
@@ -244,18 +257,31 @@ typedef struct clusterState {
     int failover_auth_sent;     /* True if we already asked for votes. */
 
     int failover_auth_rank;     /* This slave rank for current auth request. */
+
     uint64_t failover_auth_epoch; /* Epoch of the current election. */
+
     /* Manual failover state in common. */
+    /* 共用的手动故障转移状态 */
+
+    // 手动故障转移执行的时间限制
     mstime_t mf_end;            /* Manual failover time limit (ms unixtime).
                                    It is zero if there is no MF in progress. */
     /* Manual failover state of master. */
+    /* 主服务器的手动故障转移状态 */
     clusterNode *mf_slave;      /* Slave performing the manual failover. */
     /* Manual failover state of slave. */
+    /* 从服务器的手动故障转移状态 */
     long long mf_master_offset; /* Master offset the slave needs to start MF
                                    or zero if stil not received. */
+    // 指示手动故障转移是否可以开始的标志值
+    // 值为非 0 时表示各个主服务器可以开始投票
     int mf_can_start;           /* If non-zero signal that the manual failover
                                    can start requesting masters vote. */
+
     /* The followign fields are uesd by masters to take state on elections. */
+    /* 以下这些域由主服务器使用，用于记录选举时的状态 */
+
+    // 集群最后一次进行投票的纪元
     uint64_t lastVoteEpoch;     /* Epoch of the last vote granted. */
 
     // 在进入下个事件循环之前要做的事情，以各个 flag 来记录
@@ -303,6 +329,7 @@ typedef struct clusterState {
 #define CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK 6     /* Yes, you have my vote */
 // 槽布局已经发生变化，消息发送者要求消息接收者进行相应的更新
 #define CLUSTERMSG_TYPE_UPDATE 7        /* Another node slots configuration */
+// 为了进行手动故障转移，暂停各个客户端
 #define CLUSTERMSG_TYPE_MFSTART 8       /* Pause clients for manual failover */
 
 /* Initially we don't know our "name", but we'll find it once we connect
@@ -331,6 +358,7 @@ typedef struct {
     // 节点的标识值
     uint16_t flags;
 
+    // 对齐字节，不使用
     uint32_t notused; /* for 64 bit alignment */
 
 } clusterMsgDataGossip;
@@ -446,6 +474,7 @@ typedef struct {
     // 消息发送者所处集群的状态
     unsigned char state; /* Cluster state from the POV of the sender */
 
+    // 消息标志
     unsigned char mflags[3]; /* Message flags: CLUSTERMSG_FLAG[012]_... */
 
     // 消息的正文（或者说，内容）