Some steps towards Python 3 support.

This commit is contained in:
Itamar Turner-Trauring 2020-08-12 13:10:10 -04:00
parent e13c0cf4d3
commit 53bdc10e19
2 changed files with 51 additions and 38 deletions

View File

@ -1,5 +1,9 @@
from __future__ import print_function from __future__ import print_function
from future.utils import PY2
from past.builtins import unicode
lumiere_nfc = u"lumi\u00E8re" lumiere_nfc = u"lumi\u00E8re"
Artonwall_nfc = u"\u00C4rtonwall.mp3" Artonwall_nfc = u"\u00C4rtonwall.mp3"
Artonwall_nfd = u"A\u0308rtonwall.mp3" Artonwall_nfd = u"A\u0308rtonwall.mp3"
@ -44,7 +48,7 @@ if __name__ == "__main__":
open(os.path.join(tmpdir, fname), 'w').close() open(os.path.join(tmpdir, fname), 'w').close()
# Use Unicode API under Windows or MacOS X # Use Unicode API under Windows or MacOS X
if sys.platform in ('win32', 'darwin'): if PY2 and sys.platform in ('win32', 'darwin'):
dirlist = os.listdir(unicode(tmpdir)) dirlist = os.listdir(unicode(tmpdir))
else: else:
dirlist = os.listdir(tmpdir) dirlist = os.listdir(tmpdir)
@ -278,7 +282,11 @@ class StdlibUnicode(unittest.TestCase):
fn = lumiere_nfc + u'/' + lumiere_nfc + u'.txt' fn = lumiere_nfc + u'/' + lumiere_nfc + u'.txt'
open(fn, 'wb').close() open(fn, 'wb').close()
self.failUnless(os.path.exists(fn)) self.failUnless(os.path.exists(fn))
self.failUnless(os.path.exists(os.path.join(os.getcwdu(), fn))) if PY2:
getcwdu = os.getcwdu
else:
getcwdu = os.getcwd
self.failUnless(os.path.exists(os.path.join(getcwdu(), fn)))
filenames = listdir_unicode(lumiere_nfc) filenames = listdir_unicode(lumiere_nfc)
# We only require that the listing includes a filename that is canonically equivalent # We only require that the listing includes a filename that is canonically equivalent
@ -314,7 +322,7 @@ class QuoteOutput(ReallyEqualMixin, unittest.TestCase):
self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2) self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2)
if out[0:2] == 'b"': if out[0:2] == 'b"':
pass pass
elif isinstance(inp, str): elif isinstance(inp, bytes):
self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc, quote_newlines=quote_newlines), out) self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc, quote_newlines=quote_newlines), out)
self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2) self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2)
else: else:
@ -326,47 +334,47 @@ class QuoteOutput(ReallyEqualMixin, unittest.TestCase):
self._check(inp, out, enc, optional_quotes, quote_newlines) self._check(inp, out, enc, optional_quotes, quote_newlines)
# optional single quotes # optional single quotes
check("foo", "'foo'", True) check(b"foo", b"'foo'", True)
check("\\", "'\\'", True) check(b"\\", b"'\\'", True)
check("$\"`", "'$\"`'", True) check(b"$\"`", b"'$\"`'", True)
check("\n", "'\n'", True, quote_newlines=False) check(b"\n", b"'\n'", True, quote_newlines=False)
# mandatory single quotes # mandatory single quotes
check("\"", "'\"'") check(b"\"", b"'\"'")
# double quotes # double quotes
check("'", "\"'\"") check(b"'", b"\"'\"")
check("\n", "\"\\x0a\"", quote_newlines=True) check(b"\n", b"\"\\x0a\"", quote_newlines=True)
check("\x00", "\"\\x00\"") check(b"\x00", b"\"\\x00\"")
# invalid Unicode and astral planes # invalid Unicode and astral planes
check(u"\uFDD0\uFDEF", "\"\\ufdd0\\ufdef\"") check(u"\uFDD0\uFDEF", b"\"\\ufdd0\\ufdef\"")
check(u"\uDC00\uD800", "\"\\udc00\\ud800\"") check(u"\uDC00\uD800", b"\"\\udc00\\ud800\"")
check(u"\uDC00\uD800\uDC00", "\"\\udc00\\U00010000\"") check(u"\uDC00\uD800\uDC00", b"\"\\udc00\\U00010000\"")
check(u"\uD800\uDC00", "\"\\U00010000\"") check(u"\uD800\uDC00", b"\"\\U00010000\"")
check(u"\uD800\uDC01", "\"\\U00010001\"") check(u"\uD800\uDC01", b"\"\\U00010001\"")
check(u"\uD801\uDC00", "\"\\U00010400\"") check(u"\uD801\uDC00", b"\"\\U00010400\"")
check(u"\uDBFF\uDFFF", "\"\\U0010ffff\"") check(u"\uDBFF\uDFFF", b"\"\\U0010ffff\"")
check(u"'\uDBFF\uDFFF", "\"'\\U0010ffff\"") check(u"'\uDBFF\uDFFF", b"\"'\\U0010ffff\"")
check(u"\"\uDBFF\uDFFF", "\"\\\"\\U0010ffff\"") check(u"\"\uDBFF\uDFFF", b"\"\\\"\\U0010ffff\"")
# invalid UTF-8 # invalid UTF-8
check("\xFF", "b\"\\xff\"") check("\xFF", b"b\"\\xff\"")
check("\x00\"$\\`\x80\xFF", "b\"\\x00\\\"\\$\\\\\\`\\x80\\xff\"") check("\x00\"$\\`\x80\xFF", b"b\"\\x00\\\"\\$\\\\\\`\\x80\\xff\"")
def test_quote_output_ascii(self, enc='ascii'): def test_quote_output_ascii(self, enc='ascii'):
def check(inp, out, optional_quotes=False, quote_newlines=None): def check(inp, out, optional_quotes=False, quote_newlines=None):
self._check(inp, out, enc, optional_quotes, quote_newlines) self._check(inp, out, enc, optional_quotes, quote_newlines)
self._test_quote_output_all(enc) self._test_quote_output_all(enc)
check(u"\u00D7", "\"\\xd7\"") check(u"\u00D7", b"\"\\xd7\"")
check(u"'\u00D7", "\"'\\xd7\"") check(u"'\u00D7", b"\"'\\xd7\"")
check(u"\"\u00D7", "\"\\\"\\xd7\"") check(u"\"\u00D7", b"\"\\\"\\xd7\"")
check(u"\u2621", "\"\\u2621\"") check(u"\u2621", b"\"\\u2621\"")
check(u"'\u2621", "\"'\\u2621\"") check(u"'\u2621", b"\"'\\u2621\"")
check(u"\"\u2621", "\"\\\"\\u2621\"") check(u"\"\u2621", b"\"\\\"\\u2621\"")
check(u"\n", "'\n'", True, quote_newlines=False) check(u"\n", b"'\n'", True, quote_newlines=False)
check(u"\n", "\"\\x0a\"", quote_newlines=True) check(u"\n", b"\"\\x0a\"", quote_newlines=True)
def test_quote_output_latin1(self, enc='latin1'): def test_quote_output_latin1(self, enc='latin1'):
def check(inp, out, optional_quotes=False, quote_newlines=None): def check(inp, out, optional_quotes=False, quote_newlines=None):
@ -548,14 +556,14 @@ class OpenBSD(EncodingUtil, unittest.TestCase):
class TestToFromStr(ReallyEqualMixin, unittest.TestCase): class TestToFromStr(ReallyEqualMixin, unittest.TestCase):
def test_to_str(self): def test_to_str(self):
self.failUnlessReallyEqual(to_str("foo"), "foo") self.failUnlessReallyEqual(to_str("foo"), b"foo")
self.failUnlessReallyEqual(to_str("lumi\xc3\xa8re"), "lumi\xc3\xa8re") self.failUnlessReallyEqual(to_str("lumi\xc3\xa8re"), b"lumi\xc3\xa8re")
self.failUnlessReallyEqual(to_str("\xFF"), "\xFF") # passes through invalid UTF-8 -- is this what we want? self.failUnlessReallyEqual(to_str("\xFF"), "\xFF") # passes through invalid UTF-8 -- is this what we want?
self.failUnlessReallyEqual(to_str(u"lumi\u00E8re"), "lumi\xc3\xa8re") self.failUnlessReallyEqual(to_str(u"lumi\u00E8re"), b"lumi\xc3\xa8re")
self.failUnlessReallyEqual(to_str(None), None) self.failUnlessReallyEqual(to_str(None), None)
def test_from_utf8_or_none(self): def test_from_utf8_or_none(self):
self.failUnlessRaises(AssertionError, from_utf8_or_none, u"foo") self.failUnlessRaises(AssertionError, from_utf8_or_none, u"foo")
self.failUnlessReallyEqual(from_utf8_or_none("lumi\xc3\xa8re"), u"lumi\u00E8re") self.failUnlessReallyEqual(from_utf8_or_none(b"lumi\xc3\xa8re"), u"lumi\u00E8re")
self.failUnlessReallyEqual(from_utf8_or_none(None), None) self.failUnlessReallyEqual(from_utf8_or_none(None), None)
self.failUnlessRaises(UnicodeDecodeError, from_utf8_or_none, "\xFF") self.failUnlessRaises(UnicodeDecodeError, from_utf8_or_none, "\xFF")

View File

@ -3,6 +3,8 @@ Functions used to convert inputs from whatever encoding used in the system to
unicode and back. unicode and back.
""" """
from future.utils import PY2
from past.builtins import unicode from past.builtins import unicode
import sys, os, re, locale import sys, os, re, locale
@ -65,13 +67,13 @@ def _reload():
check_encoding(io_encoding) check_encoding(io_encoding)
is_unicode_platform = sys.platform in ["win32", "darwin"] is_unicode_platform = not PY2 or sys.platform in ["win32", "darwin"]
# Despite the Unicode-mode FilePath support added to Twisted in # Despite the Unicode-mode FilePath support added to Twisted in
# <https://twistedmatrix.com/trac/ticket/7805>, we can't yet use # <https://twistedmatrix.com/trac/ticket/7805>, we can't yet use
# Unicode-mode FilePaths with INotify on non-Windows platforms # Unicode-mode FilePaths with INotify on non-Windows platforms
# due to <https://twistedmatrix.com/trac/ticket/7928>. # due to <https://twistedmatrix.com/trac/ticket/7928>.
use_unicode_filepath = sys.platform == "win32" use_unicode_filepath = not PY2 or sys.platform == "win32"
_reload() _reload()
@ -92,6 +94,9 @@ def argv_to_unicode(s):
""" """
Decode given argv element to unicode. If this fails, raise a UsageError. Decode given argv element to unicode. If this fails, raise a UsageError.
""" """
if isinstance(s, unicode):
return s
precondition(isinstance(s, bytes), s) precondition(isinstance(s, bytes), s)
try: try:
@ -122,7 +127,7 @@ def unicode_to_argv(s, mangle=False):
if mangle and sys.platform == "win32": if mangle and sys.platform == "win32":
# This must be the same as 'mangle' in bin/tahoe-script.template. # This must be the same as 'mangle' in bin/tahoe-script.template.
return str(re.sub(u'[^\\x20-\\x7F]', lambda m: u'\x7F%x;' % (ord(m.group(0)),), s)) return bytes(re.sub(u'[^\\x20-\\x7F]', lambda m: u'\x7F%x;' % (ord(m.group(0)),), s))
else: else:
return s.encode(io_encoding) return s.encode(io_encoding)
@ -143,7 +148,7 @@ def to_str(s): # TODO rename to to_bytes
return s.encode('utf-8') return s.encode('utf-8')
def from_utf8_or_none(s): def from_utf8_or_none(s):
precondition(isinstance(s, str) or s is None, s) precondition(isinstance(s, bytes) or s is None, s)
if s is None: if s is None:
return s return s
return s.decode('utf-8') return s.decode('utf-8')