Skip to content

Drop support for EOL Python 2.7, 3.5 and 3.6 #536

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 11 commits into from
12 changes: 3 additions & 9 deletions .appveyor.yml
Original file line number Diff line number Diff line change
@@ -1,27 +1,21 @@
# To activate, change the Appveyor settings to use `.appveyor.yml`.
environment:
global:
PATH: "C:\\Python27\\Scripts\\;%PATH%"
PATH: "C:\\Python38\\Scripts\\;%PATH%"
matrix:
- TOXENV: py27-base
- TOXENV: py27-optional
- TOXENV: py35-base
- TOXENV: py35-optional
- TOXENV: py36-base
- TOXENV: py36-optional
- TOXENV: py37-base
- TOXENV: py37-optional
- TOXENV: py38-base
- TOXENV: py38-optional

install:
- git submodule update --init --recursive
- python -m pip install tox
- C:\\Python38\\python.exe -m pip install tox

build: off

test_script:
- tox

after_test:
- python debug-info.py
- C:\\Python38\\python.exe debug-info.py
7 changes: 4 additions & 3 deletions .github/workflows/python-tox.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@ jobs:
if: github.event.push || github.event.pull_request.head.repo.full_name != github.repository
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python: [2.7, 3.5, 3.6, 3.7, 3.8, pypy-2.7, pypy3]
python: [3.7, 3.8, pypy3.8]
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
with:
submodules: true
- uses: actions/setup-python@v2
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python }}
- run: pip install tox
Expand Down
8 changes: 2 additions & 6 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,19 +1,15 @@
language: python
python:
- "pypy3"
- "pypy"
- "3.9"
- "3.8"
- "3.7"
- "3.6"
- "3.5"
- "2.7"
- "3.9-dev"

cache: pip

env:
global:
- TOXENV=base,optional,six19-optional
- TOXENV=base,optional

install:
- pip install tox
Expand Down
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ More documentation is available at https://html5lib.readthedocs.io/.
Installation
------------

html5lib works on CPython 2.7+, CPython 3.5+ and PyPy. To install:
html5lib works on CPython 3.6+ and PyPy3. To install:

.. code-block:: bash

Expand Down Expand Up @@ -127,7 +127,7 @@ Please report any bugs on the `issue tracker
Tests
-----

Unit tests require the ``pytest`` and ``mock`` libraries and can be
Unit tests require the ``pytest`` library and can be
run using the ``py.test`` command in the root directory.

Test data are contained in a separate `html5lib-tests
Expand Down
4 changes: 1 addition & 3 deletions debug-info.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from __future__ import print_function, unicode_literals

import platform
import sys

Expand All @@ -12,7 +10,7 @@
"maxsize": sys.maxsize
}

search_modules = ["chardet", "genshi", "html5lib", "lxml", "six"]
search_modules = ["chardet", "genshi", "html5lib", "lxml"]
found_modules = []

for m in search_modules:
Expand Down
3 changes: 1 addition & 2 deletions doc/conf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# html5lib documentation build configuration file, created by
# sphinx-quickstart on Wed May 8 00:04:49 2013.
Expand Down Expand Up @@ -92,7 +91,7 @@
]


class CExtMock(object):
class CExtMock:
"""Required for autodoc on readthedocs.org where you cannot build C extensions."""
def __init__(self, *args, **kwargs):
pass
Expand Down
1 change: 0 additions & 1 deletion html5lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
* :func:`~.serializer.serialize`
"""

from __future__ import absolute_import, division, unicode_literals

from .html5parser import HTMLParser, parse, parseFragment
from .treebuilders import getTreeBuilder
Expand Down
4 changes: 1 addition & 3 deletions html5lib/_ihatexml.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

import re
import warnings

Expand Down Expand Up @@ -184,7 +182,7 @@ def escapeRegexp(string):
nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]")


class InfosetFilter(object):
class InfosetFilter:
replacementRegexp = re.compile(r"U[\dA-F]{5,5}")

def __init__(self,
Expand Down
35 changes: 16 additions & 19 deletions html5lib/_inputstream.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
from __future__ import absolute_import, division, unicode_literals

from six import text_type
from six.moves import http_client, urllib

import codecs
import http.client
import re
import urllib
from io import BytesIO, StringIO

import webencodings
Expand All @@ -14,9 +11,9 @@
from . import _utils

# Non-unicode versions of constants for use in the pre-parser
spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
spaceCharactersBytes = frozenset(item.encode("ascii") for item in spaceCharacters)
asciiLettersBytes = frozenset(item.encode("ascii") for item in asciiLetters)
asciiUppercaseBytes = frozenset(item.encode("ascii") for item in asciiUppercase)
spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])


Expand Down Expand Up @@ -48,7 +45,7 @@
charsUntilRegEx = {}


class BufferedStream(object):
class BufferedStream:
"""Buffering for streams that do not have buffering of their own

The buffer is implemented as a list of chunks on the assumption that
Expand Down Expand Up @@ -86,7 +83,7 @@ def read(self, bytes):
return self._readFromBuffer(bytes)

def _bufferedBytes(self):
return sum([len(item) for item in self.buffer])
return sum(len(item) for item in self.buffer)

def _readStream(self, bytes):
data = self.stream.read(bytes)
Expand Down Expand Up @@ -125,15 +122,15 @@ def _readFromBuffer(self, bytes):
def HTMLInputStream(source, **kwargs):
# Work around Python bug #20007: read(0) closes the connection.
# http://bugs.python.org/issue20007
if (isinstance(source, http_client.HTTPResponse) or
if (isinstance(source, http.client.HTTPResponse) or
# Also check for addinfourl wrapping HTTPResponse
(isinstance(source, urllib.response.addbase) and
isinstance(source.fp, http_client.HTTPResponse))):
isinstance(source.fp, http.client.HTTPResponse))):
isUnicode = False
elif hasattr(source, "read"):
isUnicode = isinstance(source.read(0), text_type)
isUnicode = isinstance(source.read(0), str)
else:
isUnicode = isinstance(source, text_type)
isUnicode = isinstance(source, str)

if isUnicode:
encodings = [x for x in kwargs if x.endswith("_encoding")]
Expand All @@ -145,7 +142,7 @@ def HTMLInputStream(source, **kwargs):
return HTMLBinaryInputStream(source, **kwargs)


class HTMLUnicodeInputStream(object):
class HTMLUnicodeInputStream:
"""Provides a unicode stream of characters to the HTMLTokenizer.

This class takes care of character encoding and removing or replacing
Expand Down Expand Up @@ -325,7 +322,7 @@ def charsUntil(self, characters, opposite=False):
if __debug__:
for c in characters:
assert(ord(c) < 128)
regex = "".join(["\\x%02x" % ord(c) for c in characters])
regex = "".join("\\x%02x" % ord(c) for c in characters)
if not opposite:
regex = "^%s" % regex
chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex)
Expand Down Expand Up @@ -524,7 +521,7 @@ def changeEncoding(self, newEncoding):
self.rawStream.seek(0)
self.charEncoding = (newEncoding, "certain")
self.reset()
raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
raise _ReparseException(f"Encoding changed from {self.charEncoding[0]} to {newEncoding}")

def detectBOM(self):
"""Attempts to detect at BOM at the start of the stream. If
Expand Down Expand Up @@ -673,7 +670,7 @@ def jumpTo(self, bytes):
return True


class EncodingParser(object):
class EncodingParser:
"""Mini parser for detecting character encoding from meta elements"""

def __init__(self, data):
Expand Down Expand Up @@ -861,7 +858,7 @@ def getAttribute(self):
attrValue.append(c)


class ContentAttrParser(object):
class ContentAttrParser:
def __init__(self, data):
assert isinstance(data, bytes)
self.data = data
Expand Down
18 changes: 4 additions & 14 deletions html5lib/_tokenizer.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
from __future__ import absolute_import, division, unicode_literals

from six import unichr as chr

from collections import deque, OrderedDict
from sys import version_info
from collections import deque

from .constants import spaceCharacters
from .constants import entities
Expand All @@ -18,13 +13,8 @@

entitiesTrie = Trie(entities)

if version_info >= (3, 7):
attributeMap = dict
else:
attributeMap = OrderedDict


class HTMLTokenizer(object):
class HTMLTokenizer:
""" This class takes care of tokenizing HTML.

* self.currentToken
Expand All @@ -50,7 +40,7 @@ def __init__(self, stream, parser=None, **kwargs):

# The current token being created
self.currentToken = None
super(HTMLTokenizer, self).__init__()
super().__init__()

def __iter__(self):
""" This is where the magic happens.
Expand Down Expand Up @@ -236,7 +226,7 @@ def emitCurrentToken(self):
token["name"] = token["name"].translate(asciiUpper2Lower)
if token["type"] == tokenTypes["StartTag"]:
raw = token["data"]
data = attributeMap(raw)
data = dict(raw)
if len(raw) > len(data):
# we had some duplicated attribute, fix so first wins
data.update(raw[::-1])
Expand Down
2 changes: 0 additions & 2 deletions html5lib/_trie/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

from .py import Trie

__all__ = ["Trie"]
9 changes: 2 additions & 7 deletions html5lib/_trie/_base.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,12 @@
from __future__ import absolute_import, division, unicode_literals

try:
from collections.abc import Mapping
except ImportError: # Python 2.7
from collections import Mapping
from collections.abc import Mapping


class Trie(Mapping):
"""Abstract base class for tries"""

def keys(self, prefix=None):
# pylint:disable=arguments-differ
keys = super(Trie, self).keys()
keys = super().keys()

if prefix is None:
return set(keys)
Expand Down
5 changes: 1 addition & 4 deletions html5lib/_trie/py.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
from __future__ import absolute_import, division, unicode_literals
from six import text_type

from bisect import bisect_left

from ._base import Trie as ABCTrie


class Trie(ABCTrie):
def __init__(self, data):
if not all(isinstance(x, text_type) for x in data.keys()):
if not all(isinstance(x, str) for x in data.keys()):
raise TypeError("All keys must be strings")

self._data = data
Expand Down
22 changes: 5 additions & 17 deletions html5lib/_utils.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,9 @@
from __future__ import absolute_import, division, unicode_literals

from types import ModuleType

try:
from collections.abc import Mapping
except ImportError:
from collections import Mapping
from collections.abc import Mapping

from six import text_type, PY3

if PY3:
import xml.etree.ElementTree as default_etree
else:
try:
import xml.etree.cElementTree as default_etree
except ImportError:
import xml.etree.ElementTree as default_etree
import xml.etree.ElementTree as default_etree


__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
Expand All @@ -31,10 +19,10 @@
# escapes.
try:
_x = eval('"\\uD800"') # pylint:disable=eval-used
if not isinstance(_x, text_type):
if not isinstance(_x, str):
# We need this with u"" because of http://bugs.jython.org/issue2039
_x = eval('u"\\uD800"') # pylint:disable=eval-used
assert isinstance(_x, text_type)
assert isinstance(_x, str)
except Exception:
supports_lone_surrogates = False
else:
Expand Down Expand Up @@ -122,7 +110,7 @@ def moduleFactoryFactory(factory):
moduleCache = {}

def moduleFactory(baseModule, *args, **kwargs):
if isinstance(ModuleType.__name__, type("")):
if isinstance(ModuleType.__name__, str):
name = "_%s_factory" % baseModule.__name__
else:
name = b"_%s_factory" % baseModule.__name__
Expand Down
2 changes: 0 additions & 2 deletions html5lib/constants.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

import string

EOF = None
Expand Down
2 changes: 0 additions & 2 deletions html5lib/filters/alphabeticalattributes.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

from . import base

from collections import OrderedDict
Expand Down
Loading