diff --git a/setup.py b/setup.py index 7efa31dbc4..381b617596 100644 --- a/setup.py +++ b/setup.py @@ -82,7 +82,7 @@ def run(self): license='BSD', install_requires=[ - 'tqdm', 'requests', 'torch', 'numpy', 'sentencepiece' + 'tqdm', 'requests', 'torch', 'numpy' ], python_requires='>=3.5', classifiers=[ diff --git a/test/data/test_functional.py b/test/data/test_functional.py index 80648d0983..66fda21154 100644 --- a/test/data/test_functional.py +++ b/test/data/test_functional.py @@ -5,7 +5,6 @@ import uuid import unittest -import sentencepiece as spm import torch import torchtext.data as data from torchtext.data.functional import ( @@ -46,11 +45,8 @@ def test_generate_sp_model(self): model_prefix = os.path.join(dir_name, f'spm_user_{uuid.uuid4()}') model_file = f'{model_prefix}.model' generate_sp_model(data_path, vocab_size=23456, model_prefix=model_prefix) - - sp_user = spm.SentencePieceProcessor() - sp_user.Load(model_file) - - self.assertEqual(len(sp_user), 23456) + sp_model = load_sp_model(model_file) + self.assertEqual(sp_model.GetPieceSize(), 23456) def test_sentencepiece_numericalizer(self): test_sample = 'SentencePiece is an unsupervised text tokenizer and detokenizer'