diff --git a/.gitignore b/.gitignore index 678636a7b..693ffceca 100644 --- a/.gitignore +++ b/.gitignore @@ -79,4 +79,7 @@ target/ *.ipynb.syncdoc .ipynb_checkpoints .ipython-daemon.json -.cache + +.idea/ +.pybuilder/ +_SUCCESS diff --git a/.travis.yml b/.travis.yml index 33f91240d..0c0079ca5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,7 +10,8 @@ python: install: - pip install --upgrade pip - "if [[ $TRAVIS_PYTHON_VERSION != 'pypy'* ]]; then pip install pylzma; fi" - - "pip install .[test]" + - "pip install -r 'requirements-build.txt'" script: - pylint pysparkling - - python -m pytest -vv + - pyb -X + diff --git a/build.py b/build.py new file mode 100644 index 000000000..eed1f9d89 --- /dev/null +++ b/build.py @@ -0,0 +1,143 @@ +# -*- coding: utf-8 -*- +import logging +from pathlib import Path + +from pybuilder.core import use_plugin, init, before, after, Author +from pybuilder.errors import BuildFailedException + +use_plugin("python.core") +use_plugin("python.flake8") +use_plugin("python.distutils") +# use_plugin("python.unittest") +# use_plugin("python.coverage") +# https://github.com/AlexeySanko/pybuilder_pytest +# use_plugin('pypi:pybuilder_pytest') +# https://github.com/AlexeySanko/pybuilder_pytest_coverage +# use_plugin('pypi:pybuilder_pytest_coverage') + + +def _get_pysparkling_version() -> str: + """Returns the pysparkling version based on the one mentioned in the __init__.py file under the package.""" + from pathlib import Path + import re + + with open(Path(__file__).parent / 'src/main/python/pysparkling/__init__.py') as fp: + version_line = next(l for l in fp if l.startswith('__version__')) + return re.search(r'=\s*(["\'])(.*)(\1)\s*$', version_line).group(2) + + +name = "pysparkling" +default_task = "publish" +version = _get_pysparkling_version() +license = 'MIT' +summary = 'Pure Python implementation of the Spark RDD interface.' +description = None +authors = [ + Author(name='Sven Kreiss', email='me@svenkreiss.com'), + Author(name="Erwan Guyomarc'h", email='tools4origins@gmail.com'), + Author(name='Steven Van Ingelgem', email='steven@vaningelgem.be'), +] +maintainers = [ + Author(name="Erwan Guyomarc'h", email='tools4origins@gmail.com'), +] +requires_python = '>= 3.4' +url = 'https://github.com/tools4origins/pysparkling' + +_logging_root_level: int = logging.root.getEffectiveLevel() + + +@before('run_unit_tests') +def _set_debug_mode(): + logging.root.setLevel('DEBUG') + + +@after('run_unit_tests') +def _set_debug_mode_after(): + logging.root.setLevel(_logging_root_level) + + +@after('package') +def _add_extras_require(project, logger): + indent_size = 4 + encoding = 'utf-8' + + setup_script = Path(project.expand_path("$dir_dist", "setup.py")) + logger.info("Adding 'extras_require' to setup.py") + setup = setup_script.read_text(encoding=encoding).rstrip() + if setup[-1] != ')': + raise BuildFailedException("This setup.py seems to be wrong?") + + # Get the requirements-dev.txt file line by line, ready for insertion. + requirements_dev = '\n'.join( + ' '*4*indent_size + "'" + x.strip() + "'," + for x in (Path(__file__).parent / 'requirements-build.txt').read_text().split('\n') + if x + ) + + # TODO: find a nicer way to embed this! + new_setup = ( + setup[:-1].rstrip() + + f""" + extras_require={{ + 'hdfs': ['hdfs>=2.0.0'], + 'pandas': ['pandas>=0.23.2'], + 'performance': ['matplotlib>=1.5.3'], + 'streaming': ['tornado>=4.3'], + 'test': [ +{requirements_dev} + ] + }}, + ) +""" + ) + + setup_script.write_text(new_setup, encoding=encoding) + + +@init +def set_properties(project): + # Small tweak to project.list_scripts() as that method lists EVERYTHING in the scripts directory. + # and we're only interested in *.py files: + old_project_list_scripts = project.list_scripts + + def _my_list_scripts(): + return [ + filename + for filename in old_project_list_scripts() + if filename.lower().endswith('.py') + ] + setattr(project, 'list_scripts', _my_list_scripts) + + project.depends_on_requirements(file='requirements.txt') + + project.set_property('distutils_readme_description', True) + project.set_property('distutils_readme_file', 'README.rst') + + project.set_property_if_unset("pytest_extra_args", []) + project.get_property("pytest_extra_args").append("-x") # Fail on first failing unittest + project.set_property('pytest_coverage_break_build_threshold', 0) # Don't let coverage break the build (for now) + + project.set_property('distutils_console_scripts', []) + project.set_property( + 'distutils_classifiers', + [ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'Natural Language :: English', + 'License :: OSI Approved :: MIT License', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: Implementation :: PyPy', + ] + ) + + +if __name__ == '__main__': + from pybuilder.cli import main + main('-CX', '--no-venvs') diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..950f54989 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["pybuilder>=0.12.0"] +build-backend = "pybuilder.pep517" diff --git a/pysparkling/tests/pyspark/key_value.txt.bz2/_SUCCESS b/pysparkling/tests/pyspark/key_value.txt.bz2/_SUCCESS deleted file mode 100644 index e69de29bb..000000000 diff --git a/pysparkling/tests/pyspark/key_value.txt.gz/_SUCCESS b/pysparkling/tests/pyspark/key_value.txt.gz/_SUCCESS deleted file mode 100644 index e69de29bb..000000000 diff --git a/requirements-build.txt b/requirements-build.txt new file mode 100644 index 000000000..f5ebc02d0 --- /dev/null +++ b/requirements-build.txt @@ -0,0 +1,8 @@ +backports.tempfile==1.0rc1 +cloudpickle>=0.1.0 +futures>=3.0.1 +pylint>=2.3,<2.6 +memory_profiler>=0.47 +pytest +tornado>=4.3 +pybuilder diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..757301857 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +boto>=2.36.0 +future>=0.15 +requests>=2.6.0 +pytz>=2019.3 +python-dateutil>=2.8.0 diff --git a/setup.cfg b/setup.cfg index 3ded2f1dc..f23707245 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [flake8] ignore = H301 -exclude = venv*,logo,docs,build +exclude = venv*,logo,docs,build,target [tool:pytest] addopts = --doctest-modules diff --git a/setup.py b/setup.py deleted file mode 100644 index 0c692dba7..000000000 --- a/setup.py +++ /dev/null @@ -1,69 +0,0 @@ -from setuptools import setup, find_packages - -# workaround: nosetests don't exit cleanly with older -# python version (<=2.6 and even <2.7.4) -try: - import multiprocessing # noqa -except ImportError: - pass - - -# extract version from __init__.py -with open('pysparkling/__init__.py', 'r') as f: - version_line = [l for l in f if l.startswith('__version__')][0] - VERSION = version_line.split('=')[1].strip()[1:-1] - - -setup( - name='pysparkling', - version=VERSION, - packages=find_packages(), - license='MIT', - description='Pure Python implementation of the Spark RDD interface.', - long_description=open('README.rst').read(), - author='Sven Kreiss', - author_email='me@svenkreiss.com', - url='https://github.com/svenkreiss/pysparkling', - - install_requires=[ - 'boto>=2.36.0', - 'future>=0.15', - 'requests>=2.6.0', - 'pytz>=2019.3', - 'python-dateutil>=2.8.0' - ], - extras_require={ - 'hdfs': ['hdfs>=2.0.0'], - 'pandas': ['pandas>=0.23.2'], - 'performance': ['matplotlib>=1.5.3'], - 'streaming': ['tornado>=4.3'], - 'test': [ - 'backports.tempfile==1.0rc1', - 'cloudpickle>=0.1.0', - 'futures>=3.0.1', - 'pylint>=2.3,<2.6', - 'memory-profiler>=0.47', - 'pytest', - 'tornado>=4.3', - ] - }, - entry_points={ - 'console_scripts': [], - }, - - classifiers=[ - 'Development Status :: 4 - Beta', - 'Intended Audience :: Developers', - 'Natural Language :: English', - 'License :: OSI Approved :: MIT License', - 'Operating System :: OS Independent', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: Implementation :: PyPy', - ] -) diff --git a/pysparkling/__init__.py b/src/main/python/pysparkling/__init__.py similarity index 100% rename from pysparkling/__init__.py rename to src/main/python/pysparkling/__init__.py diff --git a/pysparkling/accumulators.py b/src/main/python/pysparkling/accumulators.py similarity index 100% rename from pysparkling/accumulators.py rename to src/main/python/pysparkling/accumulators.py diff --git a/pysparkling/broadcast.py b/src/main/python/pysparkling/broadcast.py similarity index 100% rename from pysparkling/broadcast.py rename to src/main/python/pysparkling/broadcast.py diff --git a/pysparkling/cache_manager.py b/src/main/python/pysparkling/cache_manager.py similarity index 100% rename from pysparkling/cache_manager.py rename to src/main/python/pysparkling/cache_manager.py diff --git a/pysparkling/context.py b/src/main/python/pysparkling/context.py similarity index 100% rename from pysparkling/context.py rename to src/main/python/pysparkling/context.py index 851903010..7541e572f 100644 --- a/pysparkling/context.py +++ b/src/main/python/pysparkling/context.py @@ -3,17 +3,17 @@ from __future__ import (absolute_import, division, print_function, unicode_literals) -from collections import defaultdict import itertools import logging import pickle import struct import time import traceback +from collections import defaultdict from . import __version__ as PYSPARKLING_VERSION -from .broadcast import Broadcast from . import accumulators +from .broadcast import Broadcast from .cache_manager import CacheManager from .exceptions import ContextIsLockedException from .fileio import File, TextFile diff --git a/pysparkling/exceptions.py b/src/main/python/pysparkling/exceptions.py similarity index 100% rename from pysparkling/exceptions.py rename to src/main/python/pysparkling/exceptions.py diff --git a/pysparkling/fileio/__init__.py b/src/main/python/pysparkling/fileio/__init__.py similarity index 99% rename from pysparkling/fileio/__init__.py rename to src/main/python/pysparkling/fileio/__init__.py index ecc31675f..0cbb879a3 100644 --- a/pysparkling/fileio/__init__.py +++ b/src/main/python/pysparkling/fileio/__init__.py @@ -1,7 +1,8 @@ from __future__ import absolute_import -# flake8: noqa from .file import File from .textfile import TextFile +# flake8: noqa + __all__ = ['File', 'TextFile'] diff --git a/pysparkling/fileio/codec/__init__.py b/src/main/python/pysparkling/fileio/codec/__init__.py similarity index 100% rename from pysparkling/fileio/codec/__init__.py rename to src/main/python/pysparkling/fileio/codec/__init__.py diff --git a/pysparkling/fileio/codec/bz2.py b/src/main/python/pysparkling/fileio/codec/bz2.py similarity index 100% rename from pysparkling/fileio/codec/bz2.py rename to src/main/python/pysparkling/fileio/codec/bz2.py index 2a26b2a9c..1c73f9adb 100644 --- a/pysparkling/fileio/codec/bz2.py +++ b/src/main/python/pysparkling/fileio/codec/bz2.py @@ -1,8 +1,8 @@ from __future__ import absolute_import import bz2 -from io import BytesIO import logging +from io import BytesIO from .codec import Codec diff --git a/pysparkling/fileio/codec/codec.py b/src/main/python/pysparkling/fileio/codec/codec.py similarity index 100% rename from pysparkling/fileio/codec/codec.py rename to src/main/python/pysparkling/fileio/codec/codec.py diff --git a/pysparkling/fileio/codec/gz.py b/src/main/python/pysparkling/fileio/codec/gz.py similarity index 100% rename from pysparkling/fileio/codec/gz.py rename to src/main/python/pysparkling/fileio/codec/gz.py index 28a4dcb1d..9312e13c2 100644 --- a/pysparkling/fileio/codec/gz.py +++ b/src/main/python/pysparkling/fileio/codec/gz.py @@ -1,8 +1,8 @@ from __future__ import absolute_import import gzip -from io import BytesIO import logging +from io import BytesIO from .codec import Codec diff --git a/pysparkling/fileio/codec/lzma.py b/src/main/python/pysparkling/fileio/codec/lzma.py similarity index 100% rename from pysparkling/fileio/codec/lzma.py rename to src/main/python/pysparkling/fileio/codec/lzma.py diff --git a/pysparkling/fileio/codec/sevenz.py b/src/main/python/pysparkling/fileio/codec/sevenz.py similarity index 100% rename from pysparkling/fileio/codec/sevenz.py rename to src/main/python/pysparkling/fileio/codec/sevenz.py diff --git a/pysparkling/fileio/codec/tar.py b/src/main/python/pysparkling/fileio/codec/tar.py similarity index 100% rename from pysparkling/fileio/codec/tar.py rename to src/main/python/pysparkling/fileio/codec/tar.py index c3ae44fbe..2617fd062 100644 --- a/pysparkling/fileio/codec/tar.py +++ b/src/main/python/pysparkling/fileio/codec/tar.py @@ -1,8 +1,8 @@ from __future__ import absolute_import -from io import BytesIO import logging import tarfile +from io import BytesIO from .codec import Codec diff --git a/pysparkling/fileio/codec/zip.py b/src/main/python/pysparkling/fileio/codec/zip.py similarity index 100% rename from pysparkling/fileio/codec/zip.py rename to src/main/python/pysparkling/fileio/codec/zip.py index 4aaaba3f6..2bfde66e0 100644 --- a/pysparkling/fileio/codec/zip.py +++ b/src/main/python/pysparkling/fileio/codec/zip.py @@ -1,8 +1,8 @@ from __future__ import absolute_import -from io import BytesIO import logging import zipfile +from io import BytesIO from .codec import Codec diff --git a/pysparkling/fileio/file.py b/src/main/python/pysparkling/fileio/file.py similarity index 100% rename from pysparkling/fileio/file.py rename to src/main/python/pysparkling/fileio/file.py index 32b5cf0a1..1e55f2c2e 100644 --- a/pysparkling/fileio/file.py +++ b/src/main/python/pysparkling/fileio/file.py @@ -1,7 +1,7 @@ from __future__ import absolute_import -from io import BytesIO import logging +from io import BytesIO from . import codec from . import fs diff --git a/pysparkling/fileio/fs/__init__.py b/src/main/python/pysparkling/fileio/fs/__init__.py similarity index 99% rename from pysparkling/fileio/fs/__init__.py rename to src/main/python/pysparkling/fileio/fs/__init__.py index fe0192986..95f922386 100644 --- a/pysparkling/fileio/fs/__init__.py +++ b/src/main/python/pysparkling/fileio/fs/__init__.py @@ -7,7 +7,6 @@ from .local import Local from .s3 import S3 - __all__ = ['FileSystem', 'GS', 'Hdfs', 'Http', 'Local', 'S3'] diff --git a/pysparkling/fileio/fs/file_system.py b/src/main/python/pysparkling/fileio/fs/file_system.py similarity index 100% rename from pysparkling/fileio/fs/file_system.py rename to src/main/python/pysparkling/fileio/fs/file_system.py diff --git a/pysparkling/fileio/fs/gs.py b/src/main/python/pysparkling/fileio/fs/gs.py similarity index 100% rename from pysparkling/fileio/fs/gs.py rename to src/main/python/pysparkling/fileio/fs/gs.py index 59636679f..c1e6d617a 100644 --- a/pysparkling/fileio/fs/gs.py +++ b/src/main/python/pysparkling/fileio/fs/gs.py @@ -1,12 +1,12 @@ from __future__ import absolute_import -from fnmatch import fnmatch import logging +from fnmatch import fnmatch from io import BytesIO, StringIO +from .file_system import FileSystem from ...exceptions import FileSystemNotSupported from ...utils import Tokenizer, parse_file_uri -from .file_system import FileSystem log = logging.getLogger(__name__) diff --git a/pysparkling/fileio/fs/hdfs.py b/src/main/python/pysparkling/fileio/fs/hdfs.py similarity index 100% rename from pysparkling/fileio/fs/hdfs.py rename to src/main/python/pysparkling/fileio/fs/hdfs.py index 9d9902f3e..de9fe46a6 100644 --- a/pysparkling/fileio/fs/hdfs.py +++ b/src/main/python/pysparkling/fileio/fs/hdfs.py @@ -1,12 +1,12 @@ from __future__ import absolute_import, unicode_literals -from fnmatch import fnmatch import logging +from fnmatch import fnmatch from io import BytesIO, StringIO +from .file_system import FileSystem from ...exceptions import FileSystemNotSupported from ...utils import parse_file_uri, format_file_uri -from .file_system import FileSystem log = logging.getLogger(__name__) diff --git a/pysparkling/fileio/fs/http.py b/src/main/python/pysparkling/fileio/fs/http.py similarity index 100% rename from pysparkling/fileio/fs/http.py rename to src/main/python/pysparkling/fileio/fs/http.py diff --git a/pysparkling/fileio/fs/local.py b/src/main/python/pysparkling/fileio/fs/local.py similarity index 100% rename from pysparkling/fileio/fs/local.py rename to src/main/python/pysparkling/fileio/fs/local.py index db91961af..e1ec3155b 100644 --- a/pysparkling/fileio/fs/local.py +++ b/src/main/python/pysparkling/fileio/fs/local.py @@ -1,13 +1,13 @@ from __future__ import absolute_import, unicode_literals import glob -from fnmatch import fnmatch import io import logging import os +from fnmatch import fnmatch -from ...utils import Tokenizer from .file_system import FileSystem +from ...utils import Tokenizer log = logging.getLogger(__name__) diff --git a/pysparkling/fileio/fs/s3.py b/src/main/python/pysparkling/fileio/fs/s3.py similarity index 100% rename from pysparkling/fileio/fs/s3.py rename to src/main/python/pysparkling/fileio/fs/s3.py index 9d123fe22..3f530be1b 100644 --- a/pysparkling/fileio/fs/s3.py +++ b/src/main/python/pysparkling/fileio/fs/s3.py @@ -1,12 +1,12 @@ from __future__ import absolute_import +import logging from fnmatch import fnmatch from io import BytesIO, StringIO -import logging +from .file_system import FileSystem from ...exceptions import FileSystemNotSupported from ...utils import Tokenizer, parse_file_uri -from .file_system import FileSystem log = logging.getLogger(__name__) diff --git a/pysparkling/fileio/textfile.py b/src/main/python/pysparkling/fileio/textfile.py similarity index 100% rename from pysparkling/fileio/textfile.py rename to src/main/python/pysparkling/fileio/textfile.py diff --git a/pysparkling/partition.py b/src/main/python/pysparkling/partition.py similarity index 100% rename from pysparkling/partition.py rename to src/main/python/pysparkling/partition.py diff --git a/pysparkling/rdd.py b/src/main/python/pysparkling/rdd.py similarity index 100% rename from pysparkling/rdd.py rename to src/main/python/pysparkling/rdd.py index 3a423b242..ce4af2221 100644 --- a/pysparkling/rdd.py +++ b/src/main/python/pysparkling/rdd.py @@ -3,20 +3,20 @@ from __future__ import (division, absolute_import, print_function, unicode_literals) -import math -from builtins import range, zip -from collections import defaultdict import copy import functools import io import itertools import logging -from operator import itemgetter +import math import os import pickle import random import subprocess import sys +from builtins import range, zip +from collections import defaultdict +from operator import itemgetter try: import numpy diff --git a/pysparkling/samplers.py b/src/main/python/pysparkling/samplers.py similarity index 100% rename from pysparkling/samplers.py rename to src/main/python/pysparkling/samplers.py diff --git a/pysparkling/sql/__init__.py b/src/main/python/pysparkling/sql/__init__.py similarity index 100% rename from pysparkling/sql/__init__.py rename to src/main/python/pysparkling/sql/__init__.py diff --git a/pysparkling/sql/casts.py b/src/main/python/pysparkling/sql/casts.py similarity index 100% rename from pysparkling/sql/casts.py rename to src/main/python/pysparkling/sql/casts.py diff --git a/pysparkling/sql/column.py b/src/main/python/pysparkling/sql/column.py similarity index 100% rename from pysparkling/sql/column.py rename to src/main/python/pysparkling/sql/column.py diff --git a/pysparkling/sql/conf.py b/src/main/python/pysparkling/sql/conf.py similarity index 100% rename from pysparkling/sql/conf.py rename to src/main/python/pysparkling/sql/conf.py diff --git a/pysparkling/sql/context.py b/src/main/python/pysparkling/sql/context.py similarity index 100% rename from pysparkling/sql/context.py rename to src/main/python/pysparkling/sql/context.py diff --git a/pysparkling/sql/dataframe.py b/src/main/python/pysparkling/sql/dataframe.py similarity index 100% rename from pysparkling/sql/dataframe.py rename to src/main/python/pysparkling/sql/dataframe.py diff --git a/pysparkling/sql/expressions/__init__.py b/src/main/python/pysparkling/sql/expressions/__init__.py similarity index 100% rename from pysparkling/sql/expressions/__init__.py rename to src/main/python/pysparkling/sql/expressions/__init__.py diff --git a/pysparkling/sql/expressions/aggregate/__init__.py b/src/main/python/pysparkling/sql/expressions/aggregate/__init__.py similarity index 100% rename from pysparkling/sql/expressions/aggregate/__init__.py rename to src/main/python/pysparkling/sql/expressions/aggregate/__init__.py diff --git a/pysparkling/sql/expressions/aggregate/aggregations.py b/src/main/python/pysparkling/sql/expressions/aggregate/aggregations.py similarity index 100% rename from pysparkling/sql/expressions/aggregate/aggregations.py rename to src/main/python/pysparkling/sql/expressions/aggregate/aggregations.py diff --git a/pysparkling/sql/expressions/aggregate/collectors.py b/src/main/python/pysparkling/sql/expressions/aggregate/collectors.py similarity index 100% rename from pysparkling/sql/expressions/aggregate/collectors.py rename to src/main/python/pysparkling/sql/expressions/aggregate/collectors.py diff --git a/pysparkling/sql/expressions/aggregate/covariance_aggregations.py b/src/main/python/pysparkling/sql/expressions/aggregate/covariance_aggregations.py similarity index 100% rename from pysparkling/sql/expressions/aggregate/covariance_aggregations.py rename to src/main/python/pysparkling/sql/expressions/aggregate/covariance_aggregations.py diff --git a/pysparkling/sql/expressions/aggregate/stat_aggregations.py b/src/main/python/pysparkling/sql/expressions/aggregate/stat_aggregations.py similarity index 100% rename from pysparkling/sql/expressions/aggregate/stat_aggregations.py rename to src/main/python/pysparkling/sql/expressions/aggregate/stat_aggregations.py diff --git a/pysparkling/sql/expressions/arrays.py b/src/main/python/pysparkling/sql/expressions/arrays.py similarity index 100% rename from pysparkling/sql/expressions/arrays.py rename to src/main/python/pysparkling/sql/expressions/arrays.py diff --git a/pysparkling/sql/expressions/csvs.py b/src/main/python/pysparkling/sql/expressions/csvs.py similarity index 100% rename from pysparkling/sql/expressions/csvs.py rename to src/main/python/pysparkling/sql/expressions/csvs.py diff --git a/pysparkling/sql/expressions/dates.py b/src/main/python/pysparkling/sql/expressions/dates.py similarity index 100% rename from pysparkling/sql/expressions/dates.py rename to src/main/python/pysparkling/sql/expressions/dates.py diff --git a/pysparkling/sql/expressions/explodes.py b/src/main/python/pysparkling/sql/expressions/explodes.py similarity index 99% rename from pysparkling/sql/expressions/explodes.py rename to src/main/python/pysparkling/sql/expressions/explodes.py index 448d8e54d..12f42ae73 100644 --- a/pysparkling/sql/expressions/explodes.py +++ b/src/main/python/pysparkling/sql/expressions/explodes.py @@ -1,6 +1,5 @@ -from pysparkling.sql.types import StructField, IntegerType, DataType - from pysparkling.sql.expressions.expressions import UnaryExpression +from pysparkling.sql.types import StructField, IntegerType, DataType class Explode(UnaryExpression): diff --git a/pysparkling/sql/expressions/expressions.py b/src/main/python/pysparkling/sql/expressions/expressions.py similarity index 100% rename from pysparkling/sql/expressions/expressions.py rename to src/main/python/pysparkling/sql/expressions/expressions.py diff --git a/pysparkling/sql/expressions/fields.py b/src/main/python/pysparkling/sql/expressions/fields.py similarity index 99% rename from pysparkling/sql/expressions/fields.py rename to src/main/python/pysparkling/sql/expressions/fields.py index 1ba5a48be..d14c28f7b 100644 --- a/pysparkling/sql/expressions/fields.py +++ b/src/main/python/pysparkling/sql/expressions/fields.py @@ -1,6 +1,5 @@ -from pysparkling.sql.types import StructField - from pysparkling.sql.expressions.expressions import Expression +from pysparkling.sql.types import StructField from pysparkling.sql.utils import AnalysisException diff --git a/pysparkling/sql/expressions/jsons.py b/src/main/python/pysparkling/sql/expressions/jsons.py similarity index 100% rename from pysparkling/sql/expressions/jsons.py rename to src/main/python/pysparkling/sql/expressions/jsons.py diff --git a/pysparkling/sql/expressions/literals.py b/src/main/python/pysparkling/sql/expressions/literals.py similarity index 100% rename from pysparkling/sql/expressions/literals.py rename to src/main/python/pysparkling/sql/expressions/literals.py diff --git a/pysparkling/sql/expressions/mappers.py b/src/main/python/pysparkling/sql/expressions/mappers.py similarity index 100% rename from pysparkling/sql/expressions/mappers.py rename to src/main/python/pysparkling/sql/expressions/mappers.py diff --git a/pysparkling/sql/expressions/operators.py b/src/main/python/pysparkling/sql/expressions/operators.py similarity index 100% rename from pysparkling/sql/expressions/operators.py rename to src/main/python/pysparkling/sql/expressions/operators.py diff --git a/pysparkling/sql/expressions/orders.py b/src/main/python/pysparkling/sql/expressions/orders.py similarity index 100% rename from pysparkling/sql/expressions/orders.py rename to src/main/python/pysparkling/sql/expressions/orders.py diff --git a/pysparkling/sql/expressions/strings.py b/src/main/python/pysparkling/sql/expressions/strings.py similarity index 100% rename from pysparkling/sql/expressions/strings.py rename to src/main/python/pysparkling/sql/expressions/strings.py diff --git a/pysparkling/sql/expressions/userdefined.py b/src/main/python/pysparkling/sql/expressions/userdefined.py similarity index 100% rename from pysparkling/sql/expressions/userdefined.py rename to src/main/python/pysparkling/sql/expressions/userdefined.py diff --git a/pysparkling/sql/functions.py b/src/main/python/pysparkling/sql/functions.py similarity index 100% rename from pysparkling/sql/functions.py rename to src/main/python/pysparkling/sql/functions.py diff --git a/pysparkling/sql/group.py b/src/main/python/pysparkling/sql/group.py similarity index 100% rename from pysparkling/sql/group.py rename to src/main/python/pysparkling/sql/group.py diff --git a/pysparkling/sql/internal_utils/__init__.py b/src/main/python/pysparkling/sql/internal_utils/__init__.py similarity index 100% rename from pysparkling/sql/internal_utils/__init__.py rename to src/main/python/pysparkling/sql/internal_utils/__init__.py diff --git a/pysparkling/sql/internal_utils/column.py b/src/main/python/pysparkling/sql/internal_utils/column.py similarity index 100% rename from pysparkling/sql/internal_utils/column.py rename to src/main/python/pysparkling/sql/internal_utils/column.py diff --git a/pysparkling/sql/internal_utils/joins.py b/src/main/python/pysparkling/sql/internal_utils/joins.py similarity index 100% rename from pysparkling/sql/internal_utils/joins.py rename to src/main/python/pysparkling/sql/internal_utils/joins.py diff --git a/pysparkling/sql/internal_utils/options.py b/src/main/python/pysparkling/sql/internal_utils/options.py similarity index 100% rename from pysparkling/sql/internal_utils/options.py rename to src/main/python/pysparkling/sql/internal_utils/options.py diff --git a/pysparkling/sql/internal_utils/readers/__init__.py b/src/main/python/pysparkling/sql/internal_utils/readers/__init__.py similarity index 100% rename from pysparkling/sql/internal_utils/readers/__init__.py rename to src/main/python/pysparkling/sql/internal_utils/readers/__init__.py diff --git a/pysparkling/sql/internal_utils/readers/common.py b/src/main/python/pysparkling/sql/internal_utils/readers/common.py similarity index 100% rename from pysparkling/sql/internal_utils/readers/common.py rename to src/main/python/pysparkling/sql/internal_utils/readers/common.py diff --git a/pysparkling/sql/internal_utils/readers/csvreader.py b/src/main/python/pysparkling/sql/internal_utils/readers/csvreader.py similarity index 100% rename from pysparkling/sql/internal_utils/readers/csvreader.py rename to src/main/python/pysparkling/sql/internal_utils/readers/csvreader.py diff --git a/pysparkling/sql/internal_utils/readers/jsonreader.py b/src/main/python/pysparkling/sql/internal_utils/readers/jsonreader.py similarity index 100% rename from pysparkling/sql/internal_utils/readers/jsonreader.py rename to src/main/python/pysparkling/sql/internal_utils/readers/jsonreader.py diff --git a/pysparkling/sql/internal_utils/readers/textreader.py b/src/main/python/pysparkling/sql/internal_utils/readers/textreader.py similarity index 100% rename from pysparkling/sql/internal_utils/readers/textreader.py rename to src/main/python/pysparkling/sql/internal_utils/readers/textreader.py diff --git a/pysparkling/sql/internal_utils/readers/utils.py b/src/main/python/pysparkling/sql/internal_utils/readers/utils.py similarity index 100% rename from pysparkling/sql/internal_utils/readers/utils.py rename to src/main/python/pysparkling/sql/internal_utils/readers/utils.py diff --git a/pysparkling/sql/internal_utils/readwrite.py b/src/main/python/pysparkling/sql/internal_utils/readwrite.py similarity index 100% rename from pysparkling/sql/internal_utils/readwrite.py rename to src/main/python/pysparkling/sql/internal_utils/readwrite.py diff --git a/pysparkling/sql/internal_utils/writers.py b/src/main/python/pysparkling/sql/internal_utils/writers.py similarity index 100% rename from pysparkling/sql/internal_utils/writers.py rename to src/main/python/pysparkling/sql/internal_utils/writers.py diff --git a/pysparkling/sql/internals.py b/src/main/python/pysparkling/sql/internals.py similarity index 99% rename from pysparkling/sql/internals.py rename to src/main/python/pysparkling/sql/internals.py index aaf095f25..e84c45672 100644 --- a/pysparkling/sql/internals.py +++ b/src/main/python/pysparkling/sql/internals.py @@ -17,6 +17,7 @@ from pysparkling.sql.column import parse from pysparkling.sql.utils import IllegalArgumentException from pysparkling.stat_counter import RowStatHelper, CovarianceCounter +from pysparkling.storagelevel import StorageLevel from pysparkling.utils import get_keyfunc, compute_weighted_percentiles, \ reservoir_sample_and_size, pad_cell, str_half_width, format_cell, merge_rows, \ merge_rows_joined_on_values, portable_hash diff --git a/pysparkling/sql/readwriter.py b/src/main/python/pysparkling/sql/readwriter.py similarity index 100% rename from pysparkling/sql/readwriter.py rename to src/main/python/pysparkling/sql/readwriter.py diff --git a/pysparkling/sql/schema_utils.py b/src/main/python/pysparkling/sql/schema_utils.py similarity index 99% rename from pysparkling/sql/schema_utils.py rename to src/main/python/pysparkling/sql/schema_utils.py index ffe17249c..b1583b716 100644 --- a/pysparkling/sql/schema_utils.py +++ b/src/main/python/pysparkling/sql/schema_utils.py @@ -1,6 +1,6 @@ from functools import reduce -from pysparkling.sql.internal_utils.joins import INNER_JOIN, CROSS_JOIN, LEFT_JOIN,\ +from pysparkling.sql.internal_utils.joins import INNER_JOIN, CROSS_JOIN, LEFT_JOIN, \ LEFT_ANTI_JOIN, LEFT_SEMI_JOIN, RIGHT_JOIN, FULL_JOIN from pysparkling.sql.types import _infer_schema, _has_nulltype, _merge_type, \ StructType, StructField, _get_null_fields diff --git a/pysparkling/sql/session.py b/src/main/python/pysparkling/sql/session.py similarity index 99% rename from pysparkling/sql/session.py rename to src/main/python/pysparkling/sql/session.py index 9ea3d30f8..eccd80bc8 100644 --- a/pysparkling/sql/session.py +++ b/src/main/python/pysparkling/sql/session.py @@ -1,17 +1,16 @@ import sys from threading import RLock -from pysparkling.sql.types import _make_type_verifier, DataType, StructType, \ - _create_converter, _infer_schema, _has_nulltype, _merge_type - import pysparkling from pysparkling import RDD from pysparkling.context import Context from pysparkling.sql.conf import RuntimeConfig -from pysparkling.sql.internals import DataFrameInternal from pysparkling.sql.dataframe import DataFrame +from pysparkling.sql.internals import DataFrameInternal from pysparkling.sql.readwriter import DataFrameReader from pysparkling.sql.schema_utils import infer_schema_from_list +from pysparkling.sql.types import _make_type_verifier, DataType, StructType, \ + _create_converter, _infer_schema, _has_nulltype, _merge_type from pysparkling.sql.utils import require_minimum_pandas_version if sys.version >= '3': diff --git a/pysparkling/sql/types.py b/src/main/python/pysparkling/sql/types.py similarity index 99% rename from pysparkling/sql/types.py rename to src/main/python/pysparkling/sql/types.py index 3242f625b..d39eff251 100644 --- a/pysparkling/sql/types.py +++ b/src/main/python/pysparkling/sql/types.py @@ -14,20 +14,19 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import itertools -import os -import sys -import decimal +import ctypes import datetime +import decimal +import itertools import json as _json +import os +import platform import re +import sys from array import array -import ctypes -import platform from pysparkling.sql.utils import ParseException, require_minimum_pandas_version - if sys.version >= "3": long = int basestring = unicode = str diff --git a/pysparkling/sql/utils.py b/src/main/python/pysparkling/sql/utils.py similarity index 100% rename from pysparkling/sql/utils.py rename to src/main/python/pysparkling/sql/utils.py diff --git a/pysparkling/stat_counter.py b/src/main/python/pysparkling/stat_counter.py similarity index 100% rename from pysparkling/stat_counter.py rename to src/main/python/pysparkling/stat_counter.py diff --git a/pysparkling/storagelevel.py b/src/main/python/pysparkling/storagelevel.py similarity index 100% rename from pysparkling/storagelevel.py rename to src/main/python/pysparkling/storagelevel.py diff --git a/pysparkling/streaming/__init__.py b/src/main/python/pysparkling/streaming/__init__.py similarity index 100% rename from pysparkling/streaming/__init__.py rename to src/main/python/pysparkling/streaming/__init__.py diff --git a/pysparkling/streaming/context.py b/src/main/python/pysparkling/streaming/context.py similarity index 100% rename from pysparkling/streaming/context.py rename to src/main/python/pysparkling/streaming/context.py diff --git a/pysparkling/streaming/dstream.py b/src/main/python/pysparkling/streaming/dstream.py similarity index 100% rename from pysparkling/streaming/dstream.py rename to src/main/python/pysparkling/streaming/dstream.py diff --git a/pysparkling/streaming/filestream.py b/src/main/python/pysparkling/streaming/filestream.py similarity index 100% rename from pysparkling/streaming/filestream.py rename to src/main/python/pysparkling/streaming/filestream.py diff --git a/pysparkling/streaming/queuestream.py b/src/main/python/pysparkling/streaming/queuestream.py similarity index 100% rename from pysparkling/streaming/queuestream.py rename to src/main/python/pysparkling/streaming/queuestream.py diff --git a/pysparkling/streaming/tcpstream.py b/src/main/python/pysparkling/streaming/tcpstream.py similarity index 99% rename from pysparkling/streaming/tcpstream.py rename to src/main/python/pysparkling/streaming/tcpstream.py index 968028fee..13140fac7 100644 --- a/pysparkling/streaming/tcpstream.py +++ b/src/main/python/pysparkling/streaming/tcpstream.py @@ -2,6 +2,7 @@ import logging import struct + from tornado.gen import coroutine, moment from tornado.iostream import StreamClosedError from tornado.tcpserver import TCPServer diff --git a/pysparkling/task_context.py b/src/main/python/pysparkling/task_context.py similarity index 100% rename from pysparkling/task_context.py rename to src/main/python/pysparkling/task_context.py diff --git a/pysparkling/utils.py b/src/main/python/pysparkling/utils.py similarity index 100% rename from pysparkling/utils.py rename to src/main/python/pysparkling/utils.py index 2cac017db..6a887701a 100644 --- a/pysparkling/utils.py +++ b/src/main/python/pysparkling/utils.py @@ -12,9 +12,9 @@ from pytz import UnknownTimeZoneError from pysparkling.sql.casts import get_time_formatter -from pysparkling.sql.schema_utils import get_on_fields from pysparkling.sql.internal_utils.joins import FULL_JOIN, RIGHT_JOIN, LEFT_JOIN, \ CROSS_JOIN, INNER_JOIN, LEFT_SEMI_JOIN, LEFT_ANTI_JOIN +from pysparkling.sql.schema_utils import get_on_fields from pysparkling.sql.types import Row, create_row, row_from_keyed_values from pysparkling.sql.utils import IllegalArgumentException diff --git a/scripts/benchmark_csv.py b/src/main/scripts/benchmark_csv.py similarity index 99% rename from scripts/benchmark_csv.py rename to src/main/scripts/benchmark_csv.py index 5de705c6c..871256a8a 100644 --- a/scripts/benchmark_csv.py +++ b/src/main/scripts/benchmark_csv.py @@ -1,9 +1,10 @@ """Benchmark csv reading performance.""" import argparse -import pysparkling import random +import pysparkling + def create_csv(filename, lines=10000000, columns=12): with open(filename, 'w') as f: diff --git a/scripts/benchmark_generators.py b/src/main/scripts/benchmark_generators.py similarity index 100% rename from scripts/benchmark_generators.py rename to src/main/scripts/benchmark_generators.py diff --git a/scripts/ipcluster_simple.py b/src/main/scripts/ipcluster_simple.py similarity index 100% rename from scripts/ipcluster_simple.py rename to src/main/scripts/ipcluster_simple.py diff --git a/scripts/log_streaming.py b/src/main/scripts/log_streaming.py similarity index 100% rename from scripts/log_streaming.py rename to src/main/scripts/log_streaming.py diff --git a/scripts/multiprocessing_performance_plot.pdf b/src/main/scripts/multiprocessing_performance_plot.pdf similarity index 100% rename from scripts/multiprocessing_performance_plot.pdf rename to src/main/scripts/multiprocessing_performance_plot.pdf diff --git a/scripts/multiprocessing_performance_plot.png b/src/main/scripts/multiprocessing_performance_plot.png similarity index 100% rename from scripts/multiprocessing_performance_plot.png rename to src/main/scripts/multiprocessing_performance_plot.png diff --git a/scripts/multiprocessing_performance_plot.py b/src/main/scripts/multiprocessing_performance_plot.py similarity index 99% rename from scripts/multiprocessing_performance_plot.py rename to src/main/scripts/multiprocessing_performance_plot.py index 22393792b..b19968325 100644 --- a/scripts/multiprocessing_performance_plot.py +++ b/src/main/scripts/multiprocessing_performance_plot.py @@ -1,5 +1,6 @@ import matplotlib.pyplot as plt import numpy as np + import test_multiprocessing diff --git a/scripts/pyspark_comparisons.py b/src/main/scripts/pyspark_comparisons.py similarity index 100% rename from scripts/pyspark_comparisons.py rename to src/main/scripts/pyspark_comparisons.py diff --git a/scripts/pyspark_streaming.py b/src/main/scripts/pyspark_streaming.py similarity index 99% rename from scripts/pyspark_streaming.py rename to src/main/scripts/pyspark_streaming.py index 0433290b1..b375916ee 100644 --- a/scripts/pyspark_streaming.py +++ b/src/main/scripts/pyspark_streaming.py @@ -5,10 +5,10 @@ from __future__ import print_function -import pyspark -import pyspark.streaming import time +import pyspark.streaming + def simple_queue(ssc): ssc.queueStream([range(5), ['a', 'b'], ['c']], oneAtATime=False).pprint() diff --git a/scripts/readme_example.py b/src/main/scripts/readme_example.py similarity index 100% rename from scripts/readme_example.py rename to src/main/scripts/readme_example.py diff --git a/scripts/readme_example_common_crawl.py b/src/main/scripts/readme_example_common_crawl.py similarity index 100% rename from scripts/readme_example_common_crawl.py rename to src/main/scripts/readme_example_common_crawl.py diff --git a/scripts/readme_example_human_microbiome.py b/src/main/scripts/readme_example_human_microbiome.py similarity index 100% rename from scripts/readme_example_human_microbiome.py rename to src/main/scripts/readme_example_human_microbiome.py diff --git a/scripts/readme_example_word_count.py b/src/main/scripts/readme_example_word_count.py similarity index 100% rename from scripts/readme_example_word_count.py rename to src/main/scripts/readme_example_word_count.py diff --git a/scripts/starcluster_simple.py b/src/main/scripts/starcluster_simple.py similarity index 100% rename from scripts/starcluster_simple.py rename to src/main/scripts/starcluster_simple.py diff --git a/scripts/tcpperf_client.py b/src/main/scripts/tcpperf_client.py similarity index 99% rename from scripts/tcpperf_client.py rename to src/main/scripts/tcpperf_client.py index 02a023990..0911564d2 100644 --- a/scripts/tcpperf_client.py +++ b/src/main/scripts/tcpperf_client.py @@ -3,16 +3,17 @@ from __future__ import absolute_import, division import argparse -from contextlib import closing import json import random import struct import sys import time +from contextlib import closing + from tornado import gen -from tornado.tcpclient import TCPClient from tornado.ioloop import IOLoop, PeriodicCallback from tornado.iostream import StreamClosedError +from tornado.tcpclient import TCPClient class Emitter(object): diff --git a/scripts/tcpperf_connections.csv b/src/main/scripts/tcpperf_connections.csv similarity index 100% rename from scripts/tcpperf_connections.csv rename to src/main/scripts/tcpperf_connections.csv diff --git a/scripts/tcpperf_connections.csv.pdf b/src/main/scripts/tcpperf_connections.csv.pdf similarity index 100% rename from scripts/tcpperf_connections.csv.pdf rename to src/main/scripts/tcpperf_connections.csv.pdf diff --git a/scripts/tcpperf_connections.csv.png b/src/main/scripts/tcpperf_connections.csv.png similarity index 100% rename from scripts/tcpperf_connections.csv.png rename to src/main/scripts/tcpperf_connections.csv.png diff --git a/scripts/tcpperf_messages.csv b/src/main/scripts/tcpperf_messages.csv similarity index 100% rename from scripts/tcpperf_messages.csv rename to src/main/scripts/tcpperf_messages.csv diff --git a/scripts/tcpperf_messages.csv.pdf b/src/main/scripts/tcpperf_messages.csv.pdf similarity index 100% rename from scripts/tcpperf_messages.csv.pdf rename to src/main/scripts/tcpperf_messages.csv.pdf diff --git a/scripts/tcpperf_messages.csv.png b/src/main/scripts/tcpperf_messages.csv.png similarity index 100% rename from scripts/tcpperf_messages.csv.png rename to src/main/scripts/tcpperf_messages.csv.png diff --git a/scripts/tcpperf_plot.py b/src/main/scripts/tcpperf_plot.py similarity index 99% rename from scripts/tcpperf_plot.py rename to src/main/scripts/tcpperf_plot.py index 7eafcad6f..a3c6cff86 100644 --- a/scripts/tcpperf_plot.py +++ b/src/main/scripts/tcpperf_plot.py @@ -1,6 +1,8 @@ -from collections import namedtuple import csv +from collections import namedtuple + import matplotlib + matplotlib.use('Agg') import matplotlib.pyplot as plt diff --git a/scripts/tcpperf_server.py b/src/main/scripts/tcpperf_server.py similarity index 100% rename from scripts/tcpperf_server.py rename to src/main/scripts/tcpperf_server.py index 8d7384b9b..f6ec346c9 100644 --- a/scripts/tcpperf_server.py +++ b/src/main/scripts/tcpperf_server.py @@ -1,14 +1,14 @@ from __future__ import print_function, division -from collections import defaultdict import json import logging import math import os -import pysparkling import struct import time +from collections import defaultdict +import pysparkling N_CONNECTIONS = (100, 1000, 2000, 3000, 3500, 4000, 4500, 5000, 6000, 7000, 8000) diff --git a/pysparkling/sql/tests/__init__.py b/src/unittest/python/__init__.py similarity index 100% rename from pysparkling/sql/tests/__init__.py rename to src/unittest/python/__init__.py diff --git a/pysparkling/tests/data.7z b/src/unittest/python/data.7z similarity index 100% rename from pysparkling/tests/data.7z rename to src/unittest/python/data.7z diff --git a/pysparkling/tests/data.tar.gz b/src/unittest/python/data.tar.gz similarity index 100% rename from pysparkling/tests/data.tar.gz rename to src/unittest/python/data.tar.gz diff --git a/pysparkling/tests/profile_textfile.py b/src/unittest/python/profile_textfile.py similarity index 100% rename from pysparkling/tests/profile_textfile.py rename to src/unittest/python/profile_textfile.py diff --git a/pysparkling/tests/__init__.py b/src/unittest/python/sql_tests/__init__.py similarity index 100% rename from pysparkling/tests/__init__.py rename to src/unittest/python/sql_tests/__init__.py diff --git a/pysparkling/sql/tests/data/fundings/part-0.csv b/src/unittest/python/sql_tests/data/fundings/part-0.csv similarity index 100% rename from pysparkling/sql/tests/data/fundings/part-0.csv rename to src/unittest/python/sql_tests/data/fundings/part-0.csv diff --git a/pysparkling/sql/tests/test_casts.py b/src/unittest/python/sql_tests/test_casts.py similarity index 100% rename from pysparkling/sql/tests/test_casts.py rename to src/unittest/python/sql_tests/test_casts.py diff --git a/pysparkling/sql/tests/test_read.py b/src/unittest/python/sql_tests/test_read.py similarity index 100% rename from pysparkling/sql/tests/test_read.py rename to src/unittest/python/sql_tests/test_read.py diff --git a/pysparkling/sql/tests/test_session.py b/src/unittest/python/sql_tests/test_session.py similarity index 100% rename from pysparkling/sql/tests/test_session.py rename to src/unittest/python/sql_tests/test_session.py index cf819761d..faffcecbb 100644 --- a/pysparkling/sql/tests/test_session.py +++ b/src/unittest/python/sql_tests/test_session.py @@ -2,11 +2,11 @@ import pytest +from pysparkling import Context from pysparkling import StorageLevel +from pysparkling.sql.session import SparkSession from pysparkling.sql.types import Row, StructType, StructField, LongType, StringType, DoubleType, \ ArrayType, MapType, IntegerType, row_from_keyed_values -from pysparkling.sql.session import SparkSession -from pysparkling import Context from pysparkling.sql.utils import require_minimum_pandas_version try: diff --git a/pysparkling/sql/tests/test_write.py b/src/unittest/python/sql_tests/test_write.py similarity index 100% rename from pysparkling/sql/tests/test_write.py rename to src/unittest/python/sql_tests/test_write.py diff --git a/pysparkling/tests/test_cache.py b/src/unittest/python/test_cache.py similarity index 100% rename from pysparkling/tests/test_cache.py rename to src/unittest/python/test_cache.py diff --git a/pysparkling/tests/test_context.py b/src/unittest/python/test_context.py similarity index 100% rename from pysparkling/tests/test_context.py rename to src/unittest/python/test_context.py diff --git a/pysparkling/tests/test_multiprocessing.py b/src/unittest/python/test_multiprocessing.py similarity index 100% rename from pysparkling/tests/test_multiprocessing.py rename to src/unittest/python/test_multiprocessing.py index 2715f04dd..e06c0750b 100644 --- a/pysparkling/tests/test_multiprocessing.py +++ b/src/unittest/python/test_multiprocessing.py @@ -1,6 +1,5 @@ from __future__ import division, print_function -from concurrent import futures import logging import math import multiprocessing @@ -12,6 +11,7 @@ import time import timeit import unittest +from concurrent import futures import cloudpickle diff --git a/pysparkling/tests/test_rdd.py b/src/unittest/python/test_rdd.py similarity index 100% rename from pysparkling/tests/test_rdd.py rename to src/unittest/python/test_rdd.py index cc9f70d46..20967bed5 100644 --- a/pysparkling/tests/test_rdd.py +++ b/src/unittest/python/test_rdd.py @@ -1,6 +1,6 @@ import unittest - from operator import add + from pysparkling import Context diff --git a/pysparkling/tests/test_resolve_filenames.py b/src/unittest/python/test_resolve_filenames.py similarity index 100% rename from pysparkling/tests/test_resolve_filenames.py rename to src/unittest/python/test_resolve_filenames.py diff --git a/pysparkling/tests/test_sample.py b/src/unittest/python/test_sample.py similarity index 99% rename from pysparkling/tests/test_sample.py rename to src/unittest/python/test_sample.py index 09f6152c6..1845cdad2 100644 --- a/pysparkling/tests/test_sample.py +++ b/src/unittest/python/test_sample.py @@ -1,6 +1,7 @@ from __future__ import print_function import logging + import pysparkling diff --git a/pysparkling/tests/test_stat_counter.py b/src/unittest/python/test_stat_counter.py similarity index 100% rename from pysparkling/tests/test_stat_counter.py rename to src/unittest/python/test_stat_counter.py index adf5f13ec..ed79c5dc2 100644 --- a/pysparkling/tests/test_stat_counter.py +++ b/src/unittest/python/test_stat_counter.py @@ -1,9 +1,9 @@ from __future__ import division import pysparkling -from pysparkling.stat_counter import ColumnStatHelper -from pysparkling.sql.types import Row, StructType, StructField, IntegerType from pysparkling.sql.functions import col +from pysparkling.sql.types import Row, StructType, StructField, IntegerType +from pysparkling.stat_counter import ColumnStatHelper def test_mean(): diff --git a/pysparkling/tests/test_streaming_files.py b/src/unittest/python/test_streaming_files.py similarity index 100% rename from pysparkling/tests/test_streaming_files.py rename to src/unittest/python/test_streaming_files.py diff --git a/pysparkling/tests/test_streaming_queue.py b/src/unittest/python/test_streaming_queue.py similarity index 100% rename from pysparkling/tests/test_streaming_queue.py rename to src/unittest/python/test_streaming_queue.py diff --git a/pysparkling/tests/test_streaming_tcp.py b/src/unittest/python/test_streaming_tcp.py similarity index 99% rename from pysparkling/tests/test_streaming_tcp.py rename to src/unittest/python/test_streaming_tcp.py index 432e610da..292161ac8 100644 --- a/pysparkling/tests/test_streaming_tcp.py +++ b/src/unittest/python/test_streaming_tcp.py @@ -1,8 +1,9 @@ from __future__ import print_function +import struct from collections import Counter from contextlib import closing -import struct + import tornado.gen import tornado.tcpclient import tornado.testing diff --git a/pysparkling/tests/test_textFile.py b/src/unittest/python/test_textFile.py similarity index 100% rename from pysparkling/tests/test_textFile.py rename to src/unittest/python/test_textFile.py