buwei-h
diff --git a/‎soft_margin_svm/Qp-cvxopt.pdf‎
139 KB b/‎soft_margin_svm/Qp-cvxopt.pdf‎
139 KB
diff --git a/‎soft_margin_svm/soft_margin_svm.ipynb‎
Lines changed: 291 additions & 0 deletions b/‎soft_margin_svm/soft_margin_svm.ipynb‎
Lines changed: 291 additions & 0 deletions
@@ -0,0 +1,291 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy\n",
+    "from cvxopt import matrix\n",
+    "from cvxopt import solvers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 定义二次规划参数\n",
+    "P = matrix([[1.0,0.0],[0.0,0.0]])\n",
+    "q = matrix([3.0,4.0])\n",
+    "G = matrix([[-1.0,0.0,-1.0,2.0,3.0],[0.0,-1.0,-3.0,5.0,4.0]])\n",
+    "h = matrix([0.0,0.0,-15.0,100.0,80.0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "     pcost       dcost       gap    pres   dres\n",
+      " 0:  1.0780e+02 -7.6366e+02  9e+02  4e-17  4e+01\n",
+      " 1:  9.3245e+01  9.7637e+00  8e+01  8e-17  3e+00\n",
+      " 2:  6.7311e+01  3.2553e+01  3e+01  8e-17  1e+00\n",
+      " 3:  2.6071e+01  1.5068e+01  1e+01  7e-17  7e-01\n",
+      " 4:  3.7092e+01  2.3152e+01  1e+01  1e-16  4e-01\n",
+      " 5:  2.5352e+01  1.8652e+01  7e+00  9e-17  4e-16\n",
+      " 6:  2.0062e+01  1.9974e+01  9e-02  7e-17  2e-16\n",
+      " 7:  2.0001e+01  2.0000e+01  9e-04  8e-17  2e-16\n",
+      " 8:  2.0000e+01  2.0000e+01  9e-06  1e-16  2e-16\n",
+      "Optimal solution found.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 构建求解\n",
+    "sol = solvers.qp(P,q,G,h)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[ 7.13e-07]\n",
+      "[ 5.00e+00]\n",
+      " 20.00000617311241\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 获取最优值\n",
+    "print(sol['x'],sol['primal objective'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from numpy import linalg\n",
+    "import cvxopt\n",
+    "import cvxopt.solvers\n",
+    "import pylab as pl"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 定义一个线性核\n",
+    "def linear_kernel(x1, x2):\n",
+    "    return np.dot(x1, x2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def gen_non_lin_separable_data():\n",
+    "    mean1 = [-1, 2]\n",
+    "    mean2 = [1, -1]\n",
+    "    mean3 = [4, -4]\n",
+    "    mean4 = [-4, 4]\n",
+    "    cov = [[1.0, 0.8], [0.8, 1.0]]\n",
+    "    X1 = np.random.multivariate_normal(mean1, cov, 50)\n",
+    "    X1 = np.vstack((X1, np.random.multivariate_normal(mean3, cov, 50)))\n",
+    "    y1 = np.ones(len(X1))\n",
+    "    X2 = np.random.multivariate_normal(mean2, cov, 50)\n",
+    "    X2 = np.vstack((X2, np.random.multivariate_normal(mean4, cov, 50)))\n",
+    "    y2 = np.ones(len(X2)) * -1\n",
+    "    return X1, y1, X2, y2\n",
+    "\n",
+    "X1, y1, X2, y2 = gen_non_lin_separable_data()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(180, 2) (180,) (20, 2) (20,)\n"
+     ]
+    }
+   ],
+   "source": [
+    "def split_train(X1, y1, X2, y2):\n",
+    "    X1_train = X1[:90]\n",
+    "    y1_train = y1[:90]\n",
+    "    X2_train = X2[:90]\n",
+    "    y2_train = y2[:90]\n",
+    "    X_train = np.vstack((X1_train, X2_train))\n",
+    "    y_train = np.hstack((y1_train, y2_train))\n",
+    "    return X_train, y_train\n",
+    "\n",
+    "\n",
+    "def split_test(X1, y1, X2, y2):\n",
+    "    X1_test = X1[90:]\n",
+    "    y1_test = y1[90:]\n",
+    "    X2_test = X2[90:]\n",
+    "    y2_test = y2[90:]\n",
+    "    X_test = np.vstack((X1_test, X2_test))\n",
+    "    y_test = np.hstack((y1_test, y2_test))\n",
+    "    return X_test, y_test\n",
+    "\n",
+    "X_train, y_train = split_train(X1, y1, X2, y2)\n",
+    "X_test, y_test = split_test(X1, y1, X2, y2)\n",
+    "print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def fit(X, y, C):\n",
+    "    n_samples, n_features = X.shape\n",
+    "\n",
+    "    # Gram matrix\n",
+    "    K = np.zeros((n_samples, n_samples))\n",
+    "    for i in range(n_samples):\n",
+    "        for j in range(n_samples):\n",
+    "            K[i, j] = linear_kernel(X[i], X[j])\n",
+    "\n",
+    "    P = cvxopt.matrix(np.outer(y, y) * K)\n",
+    "    q = cvxopt.matrix(np.ones(n_samples) * -1)\n",
+    "    A = cvxopt.matrix(y, (1, n_samples))\n",
+    "    b = cvxopt.matrix(0.0)\n",
+    "\n",
+    "    if C is None:\n",
+    "        G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))\n",
+    "        h = cvxopt.matrix(np.zeros(n_samples))\n",
+    "    else:\n",
+    "        tmp1 = np.diag(np.ones(n_samples) * -1)\n",
+    "        tmp2 = np.identity(n_samples)\n",
+    "        G = cvxopt.matrix(np.vstack((tmp1, tmp2)))\n",
+    "        tmp1 = np.zeros(n_samples)\n",
+    "        tmp2 = np.ones(n_samples) * C\n",
+    "        h = cvxopt.matrix(np.hstack((tmp1, tmp2)))\n",
+    "\n",
+    "    # solve QP problem\n",
+    "    solution = cvxopt.solvers.qp(P, q, G, h, A, b)\n",
+    "\n",
+    "    # Lagrange multipliers\n",
+    "    a = np.ravel(solution['x'])\n",
+    "    # Support vectors have non zero lagrange multipliers\n",
+    "    sv = a > 1e-5\n",
+    "    ind = np.arange(len(a))[sv]\n",
+    "    a = a[sv]\n",
+    "    sv_x = X[sv]\n",
+    "    sv_y = y[sv]\n",
+    "    print(\"%d support vectors out of %d points\" % (len(a), n_samples))\n",
+    "\n",
+    "    # Intercept\n",
+    "    b = 0\n",
+    "    for n in range(len(a)):\n",
+    "        b += sv_y[n]\n",
+    "        b -= np.sum(a * sv_y * K[ind[n], sv])\n",
+    "    b /= len(a)\n",
+    "\n",
+    "    # Weight vector\n",
+    "    w = np.zeros(n_features)\n",
+    "    for n in range(len(a)):\n",
+    "        w += a[n] * sv_y[n] * sv[n]\n",
+    "    else:\n",
+    "        w = None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "     pcost       dcost       gap    pres   dres\n",
+      " 0: -5.5569e+04 -7.4952e+07  8e+07  1e-02  5e-11\n",
+      " 1: -7.3687e+04 -9.6455e+05  9e+05  1e-04  5e-11\n",
+      " 2: -8.2531e+04 -1.6849e+05  9e+04  1e-05  6e-11\n",
+      " 3: -1.2558e+05 -1.4468e+05  2e+04  1e-06  8e-11\n",
+      " 4: -1.2939e+05 -1.3440e+05  5e+03  3e-07  8e-11\n",
+      " 5: -1.3084e+05 -1.3145e+05  6e+02  2e-08  9e-11\n",
+      " 6: -1.3103e+05 -1.3115e+05  1e+02  4e-09  9e-11\n",
+      " 7: -1.3107e+05 -1.3109e+05  2e+01  6e-10  1e-10\n",
+      " 8: -1.3108e+05 -1.3108e+05  2e+00  4e-11  1e-10\n",
+      " 9: -1.3108e+05 -1.3108e+05  2e-01  3e-11  1e-10\n",
+      "10: -1.3108e+05 -1.3108e+05  2e-03  4e-11  1e-10\n",
+      "Optimal solution found.\n",
+      "158 support vectors out of 180 points\n"
+     ]
+    }
+   ],
+   "source": [
+    "w, b = fit(X_train, y_train, C=1000.1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.5"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}