Merge pull request scikit-learn#7064 from Erotemic/quickfix_py2_libsvm_kernel_unicode

GaelVaroquaux · web-flow · commit 9e913c04d748 · 2016-08-01T18:12:18.000+02:00
Fixed python2 libsvm call with unicode kernel
diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py
@@ -232,6 +232,15 @@ def _dense_fit(self, X, y, sample_weight, solver_type, kernel,
 
         libsvm.set_verbosity_wrap(self.verbose)
 
+        if six.PY2:
+            # In python2 ensure kernel is ascii bytes to prevent a TypeError
+            if isinstance(kernel, six.types.UnicodeType):
+                kernel = str(kernel)
+        if six.PY3:
+            # In python2 ensure kernel is utf8 unicode to prevent a TypeError
+            if isinstance(kernel, bytes):
+                kernel = str(kernel, 'utf8')
+
         # we don't pass **self.get_params() to allow subclasses to
         # add other parameters to __init__
         self.support_, self.support_vectors_, self.n_support_, \
diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
@@ -3,7 +3,6 @@
 
 TODO: remove hard coded numerical results when possible
 """
-
 import numpy as np
 import itertools
 from numpy.testing import assert_array_equal, assert_array_almost_equal
@@ -25,6 +24,7 @@
 from sklearn.exceptions import ConvergenceWarning
 from sklearn.exceptions import NotFittedError
 from sklearn.multiclass import OneVsRestClassifier
+from sklearn.externals import six
 
 # toy sample
 X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
@@ -521,6 +521,30 @@ def test_bad_input():
     assert_raises(ValueError, clf.predict, Xt)
 
 
+def test_unicode_kernel():
+    # Test that a unicode kernel name does not cause a TypeError on clf.fit
+    if six.PY2:
+        # Test unicode (same as str on python3)
+        clf = svm.SVC(kernel=unicode('linear'))
+        clf.fit(X, Y)
+
+        # Test ascii bytes (str is bytes in python2)
+        clf = svm.SVC(kernel=str('linear'))
+        clf.fit(X, Y)
+    else:
+        # Test unicode (str is unicode in python3)
+        clf = svm.SVC(kernel=str('linear'))
+        clf.fit(X, Y)
+
+        # Test ascii bytes (same as str on python2)
+        clf = svm.SVC(kernel=bytes('linear', 'ascii'))
+        clf.fit(X, Y)
+
+    # Test default behavior on both versions
+    clf = svm.SVC(kernel='linear')
+    clf.fit(X, Y)
+
+
 def test_sparse_precomputed():
     clf = svm.SVC(kernel='precomputed')
     sparse_gram = sparse.csr_matrix([[1, 0], [0, 1]])