eugeniaft
diff --git a/‎docs/_config.yml
Lines changed: 1 addition & 0 deletions b/‎docs/_config.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/en/week11/11-1.md
Lines changed: 10 additions & 10 deletions b/‎docs/en/week11/11-1.md
Lines changed: 10 additions & 10 deletions
diff --git a/‎docs/en/week13/13-2.md
Lines changed: 1 addition & 1 deletion b/‎docs/en/week13/13-2.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/es/week11/11-1.md
Lines changed: 20 additions & 20 deletions b/‎docs/es/week11/11-1.md
Lines changed: 20 additions & 20 deletions
diff --git a/‎docs/fr/week11/11-1.md
Lines changed: 20 additions & 20 deletions b/‎docs/fr/week11/11-1.md
Lines changed: 20 additions & 20 deletions
@@ -38,6 +38,7 @@ kramdown:
 exclude:
   - jekyllbook
   - en/index.md
+  - vendor
 
 ################################### English ####################################
 prologues:
 
@@ -30,7 +30,7 @@ There are variations in ReLU. The Random ReLU (RReLU) is defined as follows.
 
 $$
 \text{RReLU}(x) = \begin{cases}
-      x, & \text{if $x \geq 0$}\\
+      x, & \text{if} x \geq 0\\
       ax, & \text{otherwise}
     \end{cases}
 $$
@@ -47,7 +47,7 @@ Note that for RReLU, $a$ is a random variable that keeps samplings in a given ra
 
 $$
 \text{LeakyReLU}(x) = \begin{cases}
-      x, & \text{if $x \geq 0$}\\
+      x, & \text{if} x \geq 0\\
       a_\text{negative slope}x, & \text{otherwise}
     \end{cases}
 $$
@@ -66,7 +66,7 @@ LeakyReLU is necessary for skinny network, which is almost impossible to get gra
 
 $$
 \text{PReLU}(x) = \begin{cases}
-      x, & \text{if $x \geq 0$}\\
+      x, & \text{if} x \geq 0\\
       ax, & \text{otherwise}
     \end{cases}
 $$
@@ -209,8 +209,8 @@ It is similar to the Sigmoid function but gets to the asymptote slowly and allev
 
 $$
 \text{HardTanh}(x) = \begin{cases}
-      1, & \text{if $x > 1$}\\
-      -1, & \text{if $x < -1$}\\
+      1, & \text{if} x > 1\\
+      -1, & \text{if} x < -1\\
       x, & \text{otherwise}
 \end{cases}
 $$
@@ -229,7 +229,7 @@ It works surprisingly well especially when weights are kept within the small val
 
 $$
   y = \begin{cases}
-      x, & \text{if $x > \text{threshold}$}\\
+      x, & \text{if} x > \text{threshold}\\
       v, & \text{otherwise}
     \end{cases}
 $$
@@ -255,8 +255,8 @@ It is rarely used except for sparse coding to compute the value of the latent va
 
 $$
   \text{SoftShrinkage}(x) = \begin{cases}
-      x - \lambda, & \text{if $x > \lambda$}\\
-      x + \lambda, & \text{if $x < -\lambda$}\\
+      x - \lambda, & \text{if} x > \lambda\\
+      x + \lambda, & \text{if} x < -\lambda\\
       0, & \text{otherwise}
     \end{cases}
 $$
@@ -273,8 +273,8 @@ This basically shrinks the variable by a constant towards 0, and forces to 0 if
 
 $$
   \text{HardShrinkage}(x) = \begin{cases}
-      x, & \text{if $x > \lambda$}\\
-      x, & \text{if $x < -\lambda$}\\
+      x, & \text{if} x > \lambda\\
+      x, & \text{if} x < -\lambda\\
       0, & \text{otherwise}
     \end{cases}
 $$
 
@@ -156,7 +156,7 @@ Matrix representation being,
 
 $$h^{l+1} = \eta(\boldsymbol{D}^{-1}\boldsymbol{A}h^{l}\boldsymbol{W}^{l})$$
 
-where, \boldsymbol{A} has the dimensions $n \times n$, $h^{l}$ has dimensions $n \times d$ and $W^{l}$ has $d \times d$, which results in a $n \times d$ $h^{l+1}$ matrix.
+where, $\boldsymbol{A}$ has the dimensions $n \times n$, $h^{l}$ has dimensions $n \times d$ and $W^{l}$ has $d \times d$, which results in a $n \times d$ $h^{l+1}$ matrix.
 
 And the vectorial representation being,
 
 
@@ -62,15 +62,15 @@ Hay variaciones de la ReLU. La ReLU Aleatoria (RReLU, por sus siglas en inglés)
 
 <!--$$
 \text{RReLU}(x) = \begin{cases}
-      x, & \text{if $x \geq 0$}\\
+      x, & \text{if} x \geq 0\\
       ax, & \text{otherwise}
     \end{cases}
 $$
 -->
 
 $$
 \text{RReLU}(x) = \begin{cases}
-      x, & \text{si $x \geq 0$}\\
+      x, & \text{si} x \geq 0\\
       ax, & \text{en otro caso}
     \end{cases}
 $$
@@ -100,15 +100,15 @@ Recuerda que para la RReLU, $a$ es una variable aleatoria que se muestrea consta
 
 <!--$$
 \text{LeakyReLU}(x) = \begin{cases}
-      x, & \text{if $x \geq 0$}\\
+      x, & \text{if} x \geq 0\\
       a_\text{negative slope}x, & \text{otherwise}
     \end{cases}
 $$
 -->
 
 $$
 \text{LeakyReLU}(x) = \begin{cases}
-      x, & \text{si $x \geq 0$}\\
+      x, & \text{si} x \geq 0\\
       a_\text{pendiente negativa}x, & \text{en otro caso}
     \end{cases}
 $$
@@ -143,15 +143,15 @@ La LeakyReLU es necesaria para las redes neuronales delgadas, en las que es casi
 
 <!--$$
 \text{PReLU}(x) = \begin{cases}
-      x, & \text{if $x \geq 0$}\\
+      x, & \text{if} x \geq 0\\
       ax, & \text{otherwise}
     \end{cases}
 $$
 -->
 
 $$
 \text{PReLU}(x) = \begin{cases}
-      x, & \text{si $x \geq 0$}\\
+      x, & \text{si} x \geq 0\\
       ax, & \text{en otro caso}
     \end{cases}
 $$
@@ -469,17 +469,17 @@ Es similar a la función Sigmoide pero se acerca lentamente a la asíntota y has
 
 <!--$$
 \text{HardTanh}(x) = \begin{cases}
-      1, & \text{if $x > 1$}\\
-      -1, & \text{if $x < -1$}\\
+      1, & \text{if} x > 1\\
+      -1, & \text{if} x < -1\\
       x, & \text{otherwise}
 \end{cases}
 $$
 -->
 
 $$
 \text{Tanh dura}(x) = \begin{cases}
-      1, & \text{si $x > 1$}\\
-      -1, & \text{si $x < -1$}\\
+      1, & \text{si} x > 1\\
+      -1, & \text{si} x < -1\\
       x, & \text{en otro caso}
 \end{cases}
 $$
@@ -514,15 +514,15 @@ Funciona sorprendentemente bien, en especial cuando los pesos se mantienen dentr
 
 <!--$$
   y = \begin{cases}
-      x, & \text{if $x > \text{threshold}$}\\
+      x, & \text{if} x > \text{threshold}\\
       v, & \text{otherwise}
     \end{cases}
 $$
 -->
 
 $$
   y = \begin{cases}
-      x, & \text{si $x > \text{umbral}$}\\
+      x, & \text{si} x > \text{umbral}\\
       v, & \text{en otro caso}
     \end{cases}
 $$
@@ -573,17 +573,17 @@ Se usa muy raramente, con la excepción de la codificación dispersa, donde se u
 
 <!--$$
   \text{SoftShrinkage}(x) = \begin{cases}
-      x - \lambda, & \text{if $x > \lambda$}\\
-      x + \lambda, & \text{if $x < -\lambda$}\\
+      x - \lambda, & \text{if} x > \lambda\\
+      x + \lambda, & \text{if} x < -\lambda\\
       0, & \text{otherwise}
     \end{cases}
 $$
 -->
 
 $$
   \text{Encogimiento suave}(x) = \begin{cases}
-      x - \lambda, & \text{si $x > \lambda$}\\
-      x + \lambda, & \text{si $x < -\lambda$}\\
+      x - \lambda, & \text{si} x > \lambda\\
+      x + \lambda, & \text{si} x < -\lambda\\
       0, & \text{en otro caso}
     \end{cases}
 $$
@@ -613,17 +613,17 @@ Esta función básicamente encoge la variable por un valor constante hacía 0, y
 
 <!--$$
   \text{HardShrinkage}(x) = \begin{cases}
-      x, & \text{if $x > \lambda$}\\
-      x, & \text{if $x < -\lambda$}\\
+      x, & \text{if} x > \lambda\\
+      x, & \text{if} x < -\lambda\\
       0, & \text{otherwise}
     \end{cases}
 $$
 -->
 
 $$
   \text{EncogimientoDuro}(x) = \begin{cases}
-      x, & \text{si $x > \lambda$}\\
-      x, & \text{si $x < -\lambda$}\\
+      x, & \text{si} x > \lambda\\
+      x, & \text{si} x < -\lambda\\
       0, & \text{en otro caso}
     \end{cases}
 $$
 
@@ -53,7 +53,7 @@ There are variations in ReLU. The Random ReLU (RReLU) is defined as follows.
 
 $$
 \text{RReLU}(x) = \begin{cases}
-      x, & \text{if $x \geq 0$}\\
+      x, & \text{if} x \geq 0\\
       ax, & \text{otherwise}
     \end{cases}
 $$
@@ -73,7 +73,7 @@ Il y a des variations dans ReLU. Le ReLU aléatoire (RReLU) est défini comme su
 
 $$
 \text{RReLU}(x) = \begin{cases}
-      x, & \text{if $x \geq 0$}\\
+      x, & \text{if} x \geq 0\\
       ax, & \text{otherwise}
     \end{cases}
 $$
@@ -93,7 +93,7 @@ Notez que pour RReLU, $a$ est une variable aléatoire qui maintient les prélèv
 
 $$
 \text{LeakyReLU}(x) = \begin{cases}
-      x, & \text{if $x \geq 0$}\\
+      x, & \text{if} x \geq 0\\
       a_\text{negative slope}x, & \text{otherwise}
     \end{cases}
 $$
@@ -112,7 +112,7 @@ LeakyReLU is necessary for skinny network, which is almost impossible to get gra
 
 $$
 \text{LeakyReLU}(x) = \begin{cases}
-      x, & \text{if $x \geq 0$}\\
+      x, & \text{if} x \geq 0\\
       a_\text{negative slope}x, & \text{otherwise}
     \end{cases}
 $$
@@ -131,7 +131,7 @@ Avec LeakyReLU, le réseau peut toujours avoir des gradients même si nous somme
 
 $$
 \text{PReLU}(x) = \begin{cases}
-      x, & \text{if $x \geq 0$}\\
+      x, & \text{if} x \geq 0\\
       ax, & \text{otherwise}
     \end{cases}
 $$
@@ -150,7 +150,7 @@ The above activation functions (*i.e.* ReLU, LeakyReLU, PReLU) are scale-invaria
 
 $$
 \text{PReLU}(x) = \begin{cases}
-      x, & \text{if $x \geq 0$}\\
+      x, & \text{if} x \geq 0\\
       ax, & \text{otherwise}
     \end{cases}
 $$
@@ -422,8 +422,8 @@ Elle est similaire à la fonction Sigmoïde mais arrive lentement à l'asymptote
 
 $$
 \text{HardTanh}(x) = \begin{cases}
-      1, & \text{if $x > 1$}\\
-      -1, & \text{if $x < -1$}\\
+      1, & \text{if} x > 1\\
+      -1, & \text{if} x < -1\\
       x, & \text{otherwise}
 \end{cases}
 $$
@@ -442,8 +442,8 @@ It works surprisingly well especially when weights are kept within the small val
 
 $$
 \text{HardTanh}(x) = \begin{cases}
-      1, & \text{if $x > 1$}\\
-      -1, & \text{if $x < -1$}\\
+      1, & \text{if} x > 1\\
+      -1, & \text{if} x < -1\\
       x, & \text{otherwise}
 \end{cases}
 $$
@@ -464,7 +464,7 @@ Elle fonctionne étonnamment bien, surtout lorsque les poids sont maintenus dans
 
 $$
   y = \begin{cases}
-      x, & \text{if $x > \text{threshold}$}\\
+      x, & \text{if} x > \text{threshold}\\
       v, & \text{otherwise}
     \end{cases}
 $$
@@ -477,7 +477,7 @@ It is rarely used because we cannot propagate the gradient back. And it is also
 
 $$
   y = \begin{cases}
-      x, & \text{if $x > \text{threshold}$}\\
+      x, & \text{if} x > \text{threshold}\\
       v, & \text{otherwise}
     \end{cases}
 $$
@@ -519,8 +519,8 @@ Elle est rarement utilisée sauf pour les codages épars afin de calculer la val
 
 $$
   \text{SoftShrinkage}(x) = \begin{cases}
-      x - \lambda, & \text{if $x > \lambda$}\\
-      x + \lambda, & \text{if $x < -\lambda$}\\
+      x - \lambda, & \text{if} x > \lambda\\
+      x + \lambda, & \text{if} x < -\lambda\\
       0, & \text{otherwise}
     \end{cases}
 $$
@@ -537,8 +537,8 @@ This basically shrinks the variable by a constant towards 0, and forces to 0 if
 
 $$
   \text{SoftShrinkage}(x) = \begin{cases}
-      x - \lambda, & \text{if $x > \lambda$}\\
-      x + \lambda, & \text{if $x < -\lambda$}\\
+      x - \lambda, & \text{if} x > \lambda\\
+      x + \lambda, & \text{if} x < -\lambda\\
       0, & \text{otherwise}
     \end{cases}
 $$
@@ -556,8 +556,8 @@ Essentiellement, cela réduit la variable d'une constante vers 0 et la force à
 
 $$
   \text{HardShrinkage}(x) = \begin{cases}
-      x, & \text{if $x > \lambda$}\\
-      x, & \text{if $x < -\lambda$}\\
+      x, & \text{if} x > \lambda\\
+      x, & \text{if} x < -\lambda\\
       0, & \text{otherwise}
     \end{cases}
 $$
@@ -574,8 +574,8 @@ It is rarely used except for sparse coding.
 
 $$
   \text{HardShrinkage}(x) = \begin{cases}
-      x, & \text{if $x > \lambda$}\\
-      x, & \text{if $x < -\lambda$}\\
+      x, & \text{if} x > \lambda\\
+      x, & \text{if} x < -\lambda\\
       0, & \text{otherwise}
     \end{cases}
 $$