|
|
3b36b49 |
diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst
|
|
|
8fbcd4d |
index 195f63f..0d0a127 100644
|
|
|
3b36b49 |
--- a/Doc/using/cmdline.rst
|
|
|
3b36b49 |
+++ b/Doc/using/cmdline.rst
|
|
|
8fbcd4d |
@@ -713,6 +713,40 @@ conflict.
|
|
|
3b36b49 |
|
|
|
3b36b49 |
.. versionadded:: 3.6
|
|
|
3b36b49 |
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+.. envvar:: PYTHONCOERCECLOCALE
|
|
|
3b36b49 |
+
|
|
|
8fbcd4d |
+ If set to the value ``0``, causes the main Python command line application
|
|
|
3b36b49 |
+ to skip coercing the legacy ASCII-based C locale to a more capable UTF-8
|
|
|
3b36b49 |
+ based alternative. Note that this setting is checked even when the
|
|
|
3b36b49 |
+ :option:`-E` or :option:`-I` options are used, as it is handled prior to
|
|
|
3b36b49 |
+ the processing of command line options.
|
|
|
3b36b49 |
+
|
|
|
8fbcd4d |
+ If this variable is *not* set, or is set to a value other than ``0``, and
|
|
|
8fbcd4d |
+ the current locale reported for the ``LC_CTYPE`` category is the default
|
|
|
8fbcd4d |
+ ``C`` locale, then the Python CLI will attempt to configure one of the
|
|
|
8fbcd4d |
+ following locales for the given locale categories before loading the
|
|
|
8fbcd4d |
+ interpreter runtime:
|
|
|
3b36b49 |
+
|
|
|
8fbcd4d |
+ * ``C.UTF-8`` (``LC_ALL``)
|
|
|
8fbcd4d |
+ * ``C.utf8`` (``LC_ALL``)
|
|
|
8fbcd4d |
+ * ``UTF-8`` (``LC_CTYPE``)
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+ If setting one of these locale categories succeeds, then the matching
|
|
|
8fbcd4d |
+ environment variables will be set (both ``LC_ALL`` and ``LANG`` for the
|
|
|
8fbcd4d |
+ ``LC_ALL`` category, and ``LC_CTYPE`` for the ``LC_CTYPE`` category) in
|
|
|
8fbcd4d |
+ the current process environment before the Python runtime is initialized.
|
|
|
8fbcd4d |
+
|
|
|
8fbcd4d |
+ Configuring one of these locales (either explicitly or via the above
|
|
|
8fbcd4d |
+ implicit locale coercion) will automatically set the error handler for
|
|
|
8fbcd4d |
+ :data:`sys.stdin` and :data:`sys.stdout` to ``surrogateescape``. This
|
|
|
8fbcd4d |
+ behavior can be overridden using :envvar:`PYTHONIOENCODING` as usual.
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+ Availability: \*nix
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+ .. versionadded:: 3.7
|
|
|
3b36b49 |
+ See :pep:`538` for more details.
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
Debug-mode variables
|
|
|
3b36b49 |
~~~~~~~~~~~~~~~~~~~~
|
|
|
3b36b49 |
|
|
|
3b36b49 |
diff --git a/Lib/test/support/script_helper.py b/Lib/test/support/script_helper.py
|
|
|
8fbcd4d |
index ca5f9c2..7aa460b 100644
|
|
|
3b36b49 |
--- a/Lib/test/support/script_helper.py
|
|
|
3b36b49 |
+++ b/Lib/test/support/script_helper.py
|
|
|
3b36b49 |
@@ -51,8 +51,35 @@ def interpreter_requires_environment():
|
|
|
3b36b49 |
return __cached_interp_requires_environment
|
|
|
3b36b49 |
|
|
|
3b36b49 |
|
|
|
3b36b49 |
-_PythonRunResult = collections.namedtuple("_PythonRunResult",
|
|
|
3b36b49 |
- ("rc", "out", "err"))
|
|
|
3b36b49 |
+class _PythonRunResult(collections.namedtuple("_PythonRunResult",
|
|
|
3b36b49 |
+ ("rc", "out", "err"))):
|
|
|
3b36b49 |
+ """Helper for reporting Python subprocess run results"""
|
|
|
3b36b49 |
+ def fail(self, cmd_line):
|
|
|
3b36b49 |
+ """Provide helpful details about failed subcommand runs"""
|
|
|
3b36b49 |
+ # Limit to 80 lines to ASCII characters
|
|
|
3b36b49 |
+ maxlen = 80 * 100
|
|
|
3b36b49 |
+ out, err = self.out, self.err
|
|
|
3b36b49 |
+ if len(out) > maxlen:
|
|
|
3b36b49 |
+ out = b'(... truncated stdout ...)' + out[-maxlen:]
|
|
|
3b36b49 |
+ if len(err) > maxlen:
|
|
|
3b36b49 |
+ err = b'(... truncated stderr ...)' + err[-maxlen:]
|
|
|
3b36b49 |
+ out = out.decode('ascii', 'replace').rstrip()
|
|
|
3b36b49 |
+ err = err.decode('ascii', 'replace').rstrip()
|
|
|
3b36b49 |
+ raise AssertionError("Process return code is %d\n"
|
|
|
3b36b49 |
+ "command line: %r\n"
|
|
|
3b36b49 |
+ "\n"
|
|
|
3b36b49 |
+ "stdout:\n"
|
|
|
3b36b49 |
+ "---\n"
|
|
|
3b36b49 |
+ "%s\n"
|
|
|
3b36b49 |
+ "---\n"
|
|
|
3b36b49 |
+ "\n"
|
|
|
3b36b49 |
+ "stderr:\n"
|
|
|
3b36b49 |
+ "---\n"
|
|
|
3b36b49 |
+ "%s\n"
|
|
|
3b36b49 |
+ "---"
|
|
|
3b36b49 |
+ % (self.rc, cmd_line,
|
|
|
3b36b49 |
+ out,
|
|
|
3b36b49 |
+ err))
|
|
|
3b36b49 |
|
|
|
3b36b49 |
|
|
|
3b36b49 |
# Executing the interpreter in a subprocess
|
|
|
8fbcd4d |
@@ -110,30 +137,7 @@ def run_python_until_end(*args, **env_vars):
|
|
|
3b36b49 |
def _assert_python(expected_success, *args, **env_vars):
|
|
|
3b36b49 |
res, cmd_line = run_python_until_end(*args, **env_vars)
|
|
|
3b36b49 |
if (res.rc and expected_success) or (not res.rc and not expected_success):
|
|
|
3b36b49 |
- # Limit to 80 lines to ASCII characters
|
|
|
3b36b49 |
- maxlen = 80 * 100
|
|
|
3b36b49 |
- out, err = res.out, res.err
|
|
|
3b36b49 |
- if len(out) > maxlen:
|
|
|
3b36b49 |
- out = b'(... truncated stdout ...)' + out[-maxlen:]
|
|
|
3b36b49 |
- if len(err) > maxlen:
|
|
|
3b36b49 |
- err = b'(... truncated stderr ...)' + err[-maxlen:]
|
|
|
3b36b49 |
- out = out.decode('ascii', 'replace').rstrip()
|
|
|
3b36b49 |
- err = err.decode('ascii', 'replace').rstrip()
|
|
|
3b36b49 |
- raise AssertionError("Process return code is %d\n"
|
|
|
3b36b49 |
- "command line: %r\n"
|
|
|
3b36b49 |
- "\n"
|
|
|
3b36b49 |
- "stdout:\n"
|
|
|
3b36b49 |
- "---\n"
|
|
|
3b36b49 |
- "%s\n"
|
|
|
3b36b49 |
- "---\n"
|
|
|
3b36b49 |
- "\n"
|
|
|
3b36b49 |
- "stderr:\n"
|
|
|
3b36b49 |
- "---\n"
|
|
|
3b36b49 |
- "%s\n"
|
|
|
3b36b49 |
- "---"
|
|
|
3b36b49 |
- % (res.rc, cmd_line,
|
|
|
3b36b49 |
- out,
|
|
|
3b36b49 |
- err))
|
|
|
3b36b49 |
+ res.fail(cmd_line)
|
|
|
3b36b49 |
return res
|
|
|
3b36b49 |
|
|
|
3b36b49 |
def assert_python_ok(*args, **env_vars):
|
|
|
3b36b49 |
diff --git a/Lib/test/test_capi.py b/Lib/test/test_capi.py
|
|
|
8fbcd4d |
index 2a53f3d..391ca15 100644
|
|
|
3b36b49 |
--- a/Lib/test/test_capi.py
|
|
|
3b36b49 |
+++ b/Lib/test/test_capi.py
|
|
|
8fbcd4d |
@@ -369,14 +369,15 @@ class EmbeddingTests(unittest.TestCase):
|
|
|
8fbcd4d |
def tearDown(self):
|
|
|
8fbcd4d |
os.chdir(self.oldcwd)
|
|
|
8fbcd4d |
|
|
|
8fbcd4d |
- def run_embedded_interpreter(self, *args):
|
|
|
8fbcd4d |
+ def run_embedded_interpreter(self, *args, env=None):
|
|
|
8fbcd4d |
"""Runs a test in the embedded interpreter"""
|
|
|
8fbcd4d |
cmd = [self.test_exe]
|
|
|
8fbcd4d |
cmd.extend(args)
|
|
|
8fbcd4d |
p = subprocess.Popen(cmd,
|
|
|
8fbcd4d |
stdout=subprocess.PIPE,
|
|
|
8fbcd4d |
stderr=subprocess.PIPE,
|
|
|
8fbcd4d |
- universal_newlines=True)
|
|
|
8fbcd4d |
+ universal_newlines=True,
|
|
|
8fbcd4d |
+ env=env)
|
|
|
8fbcd4d |
(out, err) = p.communicate()
|
|
|
8fbcd4d |
self.assertEqual(p.returncode, 0,
|
|
|
8fbcd4d |
"bad returncode %d, stderr is %r" %
|
|
|
8fbcd4d |
@@ -386,7 +387,7 @@ class EmbeddingTests(unittest.TestCase):
|
|
|
3b36b49 |
def test_subinterps(self):
|
|
|
3b36b49 |
# This is just a "don't crash" test
|
|
|
3b36b49 |
out, err = self.run_embedded_interpreter("repeated_init_and_subinterpreters")
|
|
|
3b36b49 |
- if support.verbose:
|
|
|
3b36b49 |
+ if support.verbose > 1:
|
|
|
3b36b49 |
print()
|
|
|
3b36b49 |
print(out)
|
|
|
3b36b49 |
print(err)
|
|
|
8fbcd4d |
@@ -403,13 +404,14 @@ class EmbeddingTests(unittest.TestCase):
|
|
|
8fbcd4d |
|
|
|
3b36b49 |
def test_forced_io_encoding(self):
|
|
|
3b36b49 |
# Checks forced configuration of embedded interpreter IO streams
|
|
|
8fbcd4d |
- out, err = self.run_embedded_interpreter("forced_io_encoding")
|
|
|
3b36b49 |
- if support.verbose:
|
|
|
8fbcd4d |
+ env = {"PYTHONIOENCODING": "UTF-8:surrogateescape"}
|
|
|
8fbcd4d |
+ out, err = self.run_embedded_interpreter("forced_io_encoding", env=env)
|
|
|
3b36b49 |
+ if support.verbose > 1:
|
|
|
3b36b49 |
print()
|
|
|
3b36b49 |
print(out)
|
|
|
3b36b49 |
print(err)
|
|
|
3b36b49 |
- expected_errors = sys.__stdout__.errors
|
|
|
3b36b49 |
- expected_stdin_encoding = sys.__stdin__.encoding
|
|
|
3b36b49 |
+ expected_errors = "surrogateescape"
|
|
|
3b36b49 |
+ expected_stdin_encoding = "UTF-8"
|
|
|
3b36b49 |
expected_pipe_encoding = self._get_default_pipe_encoding()
|
|
|
3b36b49 |
expected_output = '\n'.join([
|
|
|
3b36b49 |
"--- Use defaults ---",
|
|
|
3b36b49 |
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
|
|
|
8fbcd4d |
index ae2bcd4..0a302ff 100644
|
|
|
3b36b49 |
--- a/Lib/test/test_cmd_line.py
|
|
|
3b36b49 |
+++ b/Lib/test/test_cmd_line.py
|
|
|
3b36b49 |
@@ -9,8 +9,9 @@ import sys
|
|
|
3b36b49 |
import subprocess
|
|
|
3b36b49 |
import tempfile
|
|
Iryna Shcherbina |
aba719b |
from test.support import script_helper, is_android
|
|
|
3b36b49 |
-from test.support.script_helper import (spawn_python, kill_python, assert_python_ok,
|
|
|
3b36b49 |
- assert_python_failure)
|
|
|
3b36b49 |
+from test.support.script_helper import (
|
|
|
3b36b49 |
+ spawn_python, kill_python, assert_python_ok, assert_python_failure
|
|
|
3b36b49 |
+)
|
|
|
3b36b49 |
|
|
|
3b36b49 |
|
|
|
3b36b49 |
# XXX (ncoghlan): Move to script_helper and make consistent with run_python
|
|
|
3b36b49 |
@@ -151,6 +152,7 @@ class CmdLineTest(unittest.TestCase):
|
|
|
3b36b49 |
env = os.environ.copy()
|
|
|
3b36b49 |
# Use C locale to get ascii for the locale encoding
|
|
|
3b36b49 |
env['LC_ALL'] = 'C'
|
|
|
3b36b49 |
+ env['PYTHONCOERCECLOCALE'] = '0'
|
|
|
3b36b49 |
code = (
|
|
|
3b36b49 |
b'import locale; '
|
|
|
3b36b49 |
b'print(ascii("' + undecodable + b'"), '
|
|
|
3b36b49 |
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
|
|
|
3b36b49 |
index df9ebd4..63145e4 100644
|
|
|
3b36b49 |
--- a/Lib/test/test_sys.py
|
|
|
3b36b49 |
+++ b/Lib/test/test_sys.py
|
|
|
3b36b49 |
@@ -680,6 +680,7 @@ class SysModuleTest(unittest.TestCase):
|
|
|
3b36b49 |
# Force the POSIX locale
|
|
|
3b36b49 |
env = os.environ.copy()
|
|
|
3b36b49 |
env["LC_ALL"] = "C"
|
|
|
3b36b49 |
+ env["PYTHONCOERCECLOCALE"] = "0"
|
|
|
3b36b49 |
code = '\n'.join((
|
|
|
3b36b49 |
'import sys',
|
|
|
3b36b49 |
'def dump(name):',
|
|
|
3b36b49 |
diff --git a/Programs/_testembed.c b/Programs/_testembed.c
|
|
|
8fbcd4d |
index a68d4fa..e28de1c 100644
|
|
|
3b36b49 |
--- a/Programs/_testembed.c
|
|
|
3b36b49 |
+++ b/Programs/_testembed.c
|
|
|
3b36b49 |
@@ -1,4 +1,5 @@
|
|
|
3b36b49 |
-#include <Python.h>
|
|
|
3b36b49 |
+#include "Python.h"
|
|
|
3b36b49 |
+#include "pyconfig.h"
|
|
|
3b36b49 |
#include <stdio.h>
|
|
|
3b36b49 |
|
|
|
3b36b49 |
/*********************************************************
|
|
|
8fbcd4d |
@@ -126,6 +127,20 @@ static int test_forced_io_encoding(void)
|
|
|
3b36b49 |
return 0;
|
|
|
3b36b49 |
}
|
|
|
3b36b49 |
|
|
|
3b36b49 |
+static int test_c_locale_warning(void)
|
|
|
3b36b49 |
+{
|
|
|
3b36b49 |
+#ifdef PY_WARN_ON_C_LOCALE
|
|
|
3b36b49 |
+ /* Force use of the C locale */
|
|
|
3b36b49 |
+ setenv("LC_ALL", "C", 1);
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+ _testembed_Py_Initialize();
|
|
|
3b36b49 |
+ Py_Finalize();
|
|
|
3b36b49 |
+#else
|
|
|
3b36b49 |
+ printf("C locale compatibility warning disabled at compile time\n");
|
|
|
3b36b49 |
+#endif
|
|
|
3b36b49 |
+ return 0;
|
|
|
3b36b49 |
+}
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
/* *********************************************************
|
|
|
3b36b49 |
* List of test cases and the function that implements it.
|
|
|
3b36b49 |
*
|
|
|
8fbcd4d |
@@ -147,6 +162,7 @@ struct TestCase
|
|
|
3b36b49 |
static struct TestCase TestCases[] = {
|
|
|
3b36b49 |
{ "forced_io_encoding", test_forced_io_encoding },
|
|
|
3b36b49 |
{ "repeated_init_and_subinterpreters", test_repeated_init_and_subinterpreters },
|
|
|
3b36b49 |
+ { "c_locale_warning", test_c_locale_warning },
|
|
|
3b36b49 |
{ NULL, NULL }
|
|
|
3b36b49 |
};
|
|
|
3b36b49 |
|
|
|
3b36b49 |
diff --git a/Programs/python.c b/Programs/python.c
|
|
|
8fbcd4d |
index a7afbc7..03f8295 100644
|
|
|
3b36b49 |
--- a/Programs/python.c
|
|
|
3b36b49 |
+++ b/Programs/python.c
|
|
|
8fbcd4d |
@@ -15,6 +15,21 @@ wmain(int argc, wchar_t **argv)
|
|
|
3b36b49 |
}
|
|
|
3b36b49 |
#else
|
|
|
3b36b49 |
|
|
|
8fbcd4d |
+/* Access private pylifecycle helper API to better handle the legacy C locale
|
|
|
3b36b49 |
+ *
|
|
|
3b36b49 |
+ * The legacy C locale assumes ASCII as the default text encoding, which
|
|
|
3b36b49 |
+ * causes problems not only for the CPython runtime, but also other
|
|
|
3b36b49 |
+ * components like GNU readline.
|
|
|
3b36b49 |
+ *
|
|
|
3b36b49 |
+ * Accordingly, when the CLI detects it, it attempts to coerce it to a
|
|
|
3b36b49 |
+ * more capable UTF-8 based alternative.
|
|
|
3b36b49 |
+ *
|
|
|
3b36b49 |
+ * See the documentation of the PYTHONCOERCECLOCALE setting for more details.
|
|
|
3b36b49 |
+ *
|
|
|
3b36b49 |
+ */
|
|
|
8fbcd4d |
+extern int _Py_LegacyLocaleDetected(void);
|
|
|
8fbcd4d |
+extern void _Py_CoerceLegacyLocale(void);
|
|
|
3b36b49 |
+
|
|
|
8fbcd4d |
int
|
|
|
8fbcd4d |
main(int argc, char **argv)
|
|
|
8fbcd4d |
{
|
|
|
8fbcd4d |
@@ -25,7 +40,11 @@ main(int argc, char **argv)
|
|
|
8fbcd4d |
char *oldloc;
|
|
|
8fbcd4d |
|
|
|
8fbcd4d |
/* Force malloc() allocator to bootstrap Python */
|
|
|
8fbcd4d |
+#ifdef Py_DEBUG
|
|
|
8fbcd4d |
+ (void)_PyMem_SetupAllocators("malloc_debug");
|
|
|
8fbcd4d |
+# else
|
|
|
8fbcd4d |
(void)_PyMem_SetupAllocators("malloc");
|
|
|
8fbcd4d |
+# endif
|
|
|
8fbcd4d |
|
|
|
8fbcd4d |
argv_copy = (wchar_t **)PyMem_RawMalloc(sizeof(wchar_t*) * (argc+1));
|
|
|
8fbcd4d |
argv_copy2 = (wchar_t **)PyMem_RawMalloc(sizeof(wchar_t*) * (argc+1));
|
|
|
8fbcd4d |
@@ -49,7 +68,21 @@ main(int argc, char **argv)
|
|
|
8fbcd4d |
return 1;
|
|
|
8fbcd4d |
}
|
|
|
8fbcd4d |
|
|
|
8fbcd4d |
+#ifdef __ANDROID__
|
|
|
8fbcd4d |
+ /* Passing "" to setlocale() on Android requests the C locale rather
|
|
|
8fbcd4d |
+ * than checking environment variables, so request C.UTF-8 explicitly
|
|
|
8fbcd4d |
+ */
|
|
|
8fbcd4d |
+ setlocale(LC_ALL, "C.UTF-8");
|
|
|
8fbcd4d |
+#else
|
|
|
8fbcd4d |
+ /* Reconfigure the locale to the default for this process */
|
|
|
8fbcd4d |
setlocale(LC_ALL, "");
|
|
|
8fbcd4d |
+#endif
|
|
|
8fbcd4d |
+
|
|
|
8fbcd4d |
+ if (_Py_LegacyLocaleDetected()) {
|
|
|
8fbcd4d |
+ _Py_CoerceLegacyLocale();
|
|
|
8fbcd4d |
+ }
|
|
|
8fbcd4d |
+
|
|
|
8fbcd4d |
+ /* Convert from char to wchar_t based on the locale settings */
|
|
|
8fbcd4d |
for (i = 0; i < argc; i++) {
|
|
|
8fbcd4d |
argv_copy[i] = Py_DecodeLocale(argv[i], NULL);
|
|
|
8fbcd4d |
if (!argv_copy[i]) {
|
|
|
8fbcd4d |
@@ -70,7 +103,11 @@ main(int argc, char **argv)
|
|
|
8fbcd4d |
|
|
|
8fbcd4d |
/* Force again malloc() allocator to release memory blocks allocated
|
|
|
8fbcd4d |
before Py_Main() */
|
|
|
8fbcd4d |
+#ifdef Py_DEBUG
|
|
|
8fbcd4d |
+ (void)_PyMem_SetupAllocators("malloc_debug");
|
|
|
8fbcd4d |
+# else
|
|
|
8fbcd4d |
(void)_PyMem_SetupAllocators("malloc");
|
|
|
8fbcd4d |
+# endif
|
|
|
8fbcd4d |
|
|
|
8fbcd4d |
for (i = 0; i < argc; i++) {
|
|
|
8fbcd4d |
PyMem_RawFree(argv_copy2[i]);
|
|
|
8fbcd4d |
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
|
|
|
8fbcd4d |
index a4f7f82..261ed34 100644
|
|
|
8fbcd4d |
--- a/Python/pylifecycle.c
|
|
|
8fbcd4d |
+++ b/Python/pylifecycle.c
|
|
|
8fbcd4d |
@@ -167,6 +167,7 @@ Py_SetStandardStreamEncoding(const char *encoding, const char *errors)
|
|
|
8fbcd4d |
return 0;
|
|
|
8fbcd4d |
}
|
|
|
8fbcd4d |
|
|
|
8fbcd4d |
+
|
|
|
8fbcd4d |
/* Global initializations. Can be undone by Py_FinalizeEx(). Don't
|
|
|
8fbcd4d |
call this twice without an intervening Py_FinalizeEx() call. When
|
|
|
8fbcd4d |
initializations fail, a fatal error is issued and the function does
|
|
|
8fbcd4d |
@@ -301,6 +302,173 @@ import_init(PyInterpreterState *interp, PyObject *sysmod)
|
|
|
8fbcd4d |
}
|
|
|
8fbcd4d |
|
|
|
8fbcd4d |
|
|
|
8fbcd4d |
+/* Helper functions to better handle the legacy C locale
|
|
|
8fbcd4d |
+ *
|
|
|
8fbcd4d |
+ * The legacy C locale assumes ASCII as the default text encoding, which
|
|
|
8fbcd4d |
+ * causes problems not only for the CPython runtime, but also other
|
|
|
8fbcd4d |
+ * components like GNU readline.
|
|
|
8fbcd4d |
+ *
|
|
|
8fbcd4d |
+ * Accordingly, when the CLI detects it, it attempts to coerce it to a
|
|
|
8fbcd4d |
+ * more capable UTF-8 based alternative as follows:
|
|
|
8fbcd4d |
+ *
|
|
|
8fbcd4d |
+ * if (_Py_LegacyLocaleDetected()) {
|
|
|
8fbcd4d |
+ * _Py_CoerceLegacyLocale();
|
|
|
8fbcd4d |
+ * }
|
|
|
8fbcd4d |
+ *
|
|
|
8fbcd4d |
+ * See the documentation of the PYTHONCOERCECLOCALE setting for more details.
|
|
|
8fbcd4d |
+ *
|
|
|
8fbcd4d |
+ * Locale coercion also impacts the default error handler for the standard
|
|
|
8fbcd4d |
+ * streams: while the usual default is "strict", the default for the legacy
|
|
|
8fbcd4d |
+ * C locale and for any of the coercion target locales is "surrogateescape".
|
|
|
8fbcd4d |
+ */
|
|
|
8fbcd4d |
+
|
|
|
8fbcd4d |
+int
|
|
|
8fbcd4d |
+_Py_LegacyLocaleDetected(void)
|
|
|
8fbcd4d |
+{
|
|
|
8fbcd4d |
+ const char *ctype_loc = setlocale(LC_CTYPE, NULL);
|
|
|
8fbcd4d |
+ return ctype_loc != NULL && strcmp(ctype_loc, "C") == 0;
|
|
|
8fbcd4d |
+}
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+typedef struct _CandidateLocale {
|
|
|
3b36b49 |
+ const char *locale_name;
|
|
|
3b36b49 |
+ int category;
|
|
|
3b36b49 |
+} _LocaleCoercionTarget;
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+static _LocaleCoercionTarget _TARGET_LOCALES[] = {
|
|
|
3b36b49 |
+ { "C.UTF-8", LC_ALL },
|
|
|
3b36b49 |
+ { "C.utf8", LC_ALL },
|
|
|
3b36b49 |
+ { "UTF-8", LC_CTYPE },
|
|
|
3b36b49 |
+ { NULL, 0 }
|
|
|
3b36b49 |
+};
|
|
|
3b36b49 |
+
|
|
|
8fbcd4d |
+static char *
|
|
|
8fbcd4d |
+get_default_standard_stream_error_handler(void)
|
|
|
8fbcd4d |
+{
|
|
|
8fbcd4d |
+ const char *ctype_loc = setlocale(LC_CTYPE, NULL);
|
|
|
8fbcd4d |
+ if (ctype_loc != NULL) {
|
|
|
8fbcd4d |
+ /* "surrogateescape" is the default in the legacy C locale */
|
|
|
8fbcd4d |
+ if (strcmp(ctype_loc, "C") == 0) {
|
|
|
8fbcd4d |
+ return "surrogateescape";
|
|
|
8fbcd4d |
+ }
|
|
|
8fbcd4d |
+
|
|
|
8fbcd4d |
+ /* "surrogateescape" is the default in locale coercion target locales */
|
|
|
8fbcd4d |
+ const _LocaleCoercionTarget *target = NULL;
|
|
|
8fbcd4d |
+ for (target = _TARGET_LOCALES; target->locale_name; target++) {
|
|
|
8fbcd4d |
+ if (strcmp(ctype_loc, target->locale_name) == 0) {
|
|
|
8fbcd4d |
+ return "surrogateescape";
|
|
|
8fbcd4d |
+ }
|
|
|
8fbcd4d |
+ }
|
|
|
8fbcd4d |
+ }
|
|
|
8fbcd4d |
+
|
|
|
8fbcd4d |
+ /* Otherwise return NULL to request the typical default error handler */
|
|
|
8fbcd4d |
+ return NULL;
|
|
|
8fbcd4d |
+}
|
|
|
8fbcd4d |
+
|
|
|
8fbcd4d |
+#ifdef PY_COERCE_C_LOCALE
|
|
|
8fbcd4d |
+static const char *_C_LOCALE_COERCION_WARNING =
|
|
|
8fbcd4d |
+ "Python detected LC_CTYPE=C: %.20s coerced to %.20s (set another locale "
|
|
|
8fbcd4d |
+ "or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior).\n";
|
|
|
8fbcd4d |
+
|
|
|
8fbcd4d |
+static void
|
|
|
3b36b49 |
+_coerce_default_locale_settings(const _LocaleCoercionTarget *target)
|
|
|
3b36b49 |
+{
|
|
|
3b36b49 |
+ const char *newloc = target->locale_name;
|
|
|
3b36b49 |
+ int category = target->category;
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+ /* Reset locale back to currently configured defaults */
|
|
|
3b36b49 |
+ setlocale(LC_ALL, "");
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+ /* Set the relevant locale environment variables */
|
|
|
3b36b49 |
+ if (category == LC_ALL) {
|
|
|
3b36b49 |
+ const char *env_vars_updated = "LC_ALL & LANG";
|
|
|
3b36b49 |
+ if (setenv("LC_ALL", newloc, 1)) {
|
|
|
3b36b49 |
+ fprintf(stderr,
|
|
|
3b36b49 |
+ "Error setting LC_ALL, skipping C locale coercion\n");
|
|
|
3b36b49 |
+ return;
|
|
|
3b36b49 |
+ }
|
|
|
3b36b49 |
+ if (setenv("LANG", newloc, 1)) {
|
|
|
3b36b49 |
+ fprintf(stderr,
|
|
|
3b36b49 |
+ "Error setting LANG during C locale coercion\n");
|
|
|
3b36b49 |
+ env_vars_updated = "LC_ALL";
|
|
|
3b36b49 |
+ }
|
|
|
3b36b49 |
+ fprintf(stderr, _C_LOCALE_COERCION_WARNING, env_vars_updated, newloc);
|
|
|
3b36b49 |
+ } else if (category == LC_CTYPE) {
|
|
|
3b36b49 |
+ if (setenv("LC_CTYPE", newloc, 1)) {
|
|
|
3b36b49 |
+ fprintf(stderr,
|
|
|
3b36b49 |
+ "Error setting LC_CTYPE, skipping C locale coercion\n");
|
|
|
3b36b49 |
+ return;
|
|
|
3b36b49 |
+ }
|
|
|
3b36b49 |
+ fprintf(stderr, _C_LOCALE_COERCION_WARNING, "LC_CTYPE", newloc);
|
|
|
3b36b49 |
+ } else {
|
|
|
3b36b49 |
+ fprintf(stderr, "Locale coercion must target LC_ALL or LC_CTYPE\n");
|
|
|
3b36b49 |
+ return;
|
|
|
3b36b49 |
+ }
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+ /* Reconfigure with the overridden environment variables */
|
|
|
3b36b49 |
+ setlocale(LC_ALL, "");
|
|
|
3b36b49 |
+}
|
|
|
3b36b49 |
+
|
|
|
8fbcd4d |
+static int
|
|
|
8fbcd4d |
+c_locale_coercion_is_expected(void)
|
|
|
3b36b49 |
+{
|
|
|
8fbcd4d |
+ /* This may be called prior to Py_Initialize, so we don't call any other
|
|
|
8fbcd4d |
+ * Python APIs, and we ignore the -E and -I flags
|
|
|
8fbcd4d |
+ */
|
|
|
3b36b49 |
+ const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
|
|
|
8fbcd4d |
+ if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) {
|
|
|
8fbcd4d |
+ return 1;
|
|
|
8fbcd4d |
+ }
|
|
|
8fbcd4d |
+ return 0;
|
|
|
8fbcd4d |
+}
|
|
|
8fbcd4d |
+#endif
|
|
|
8fbcd4d |
+
|
|
|
8fbcd4d |
+void
|
|
|
8fbcd4d |
+_Py_CoerceLegacyLocale(void)
|
|
|
8fbcd4d |
+{
|
|
|
8fbcd4d |
+#ifdef PY_COERCE_C_LOCALE
|
|
|
8fbcd4d |
+ /* We ignore the Python -E and -I flags here, as the CLI needs to sort out
|
|
|
3b36b49 |
+ * the locale settings *before* we try to do anything with the command
|
|
|
3b36b49 |
+ * line arguments. For cross-platform debugging purposes, we also need
|
|
|
3b36b49 |
+ * to give end users a way to force even scripts that are otherwise
|
|
|
3b36b49 |
+ * isolated from their environment to use the legacy ASCII-centric C
|
|
|
3b36b49 |
+ * locale.
|
|
|
3b36b49 |
+ */
|
|
|
8fbcd4d |
+ if (c_locale_coercion_is_expected()) {
|
|
|
3b36b49 |
+ /* PYTHONCOERCECLOCALE is not set, or is not set to exactly "0" */
|
|
|
3b36b49 |
+ const _LocaleCoercionTarget *target = NULL;
|
|
|
3b36b49 |
+ for (target = _TARGET_LOCALES; target->locale_name; target++) {
|
|
|
3b36b49 |
+ const char *reconfigured_locale = setlocale(target->category,
|
|
|
3b36b49 |
+ target->locale_name);
|
|
|
3b36b49 |
+ if (reconfigured_locale != NULL) {
|
|
|
3b36b49 |
+ /* Successfully configured locale, so make it the default */
|
|
|
3b36b49 |
+ _coerce_default_locale_settings(target);
|
|
|
3b36b49 |
+ return;
|
|
|
3b36b49 |
+ }
|
|
|
3b36b49 |
+ }
|
|
|
3b36b49 |
+ }
|
|
|
3b36b49 |
+ /* No C locale warning here, as Py_Initialize will emit one later */
|
|
|
3b36b49 |
+#endif
|
|
|
8fbcd4d |
+}
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+#ifdef PY_WARN_ON_C_LOCALE
|
|
|
3b36b49 |
+static const char *_C_LOCALE_WARNING =
|
|
|
3b36b49 |
+ "Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
|
|
|
3b36b49 |
+ "encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
|
|
|
3b36b49 |
+ "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
|
|
|
3b36b49 |
+ "locales is recommended.\n";
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+static void
|
|
|
3b36b49 |
+_emit_stderr_warning_for_c_locale(void)
|
|
|
3b36b49 |
+{
|
|
|
8fbcd4d |
+ if (c_locale_coercion_is_expected()) {
|
|
|
8fbcd4d |
+ if (_Py_LegacyLocaleDetected()) {
|
|
|
3b36b49 |
+ fprintf(stderr, "%s", _C_LOCALE_WARNING);
|
|
|
3b36b49 |
+ }
|
|
|
3b36b49 |
+ }
|
|
|
3b36b49 |
+}
|
|
|
3b36b49 |
+#endif
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
void
|
|
|
3b36b49 |
_Py_InitializeEx_Private(int install_sigs, int install_importlib)
|
|
|
3b36b49 |
{
|
|
|
8fbcd4d |
@@ -315,11 +483,19 @@ _Py_InitializeEx_Private(int install_sigs, int install_importlib)
|
|
|
3b36b49 |
initialized = 1;
|
|
|
3b36b49 |
_Py_Finalizing = NULL;
|
|
|
3b36b49 |
|
|
|
3b36b49 |
-#ifdef HAVE_SETLOCALE
|
|
|
3b36b49 |
+#ifdef __ANDROID__
|
|
|
3b36b49 |
+ /* Passing "" to setlocale() on Android requests the C locale rather
|
|
|
3b36b49 |
+ * than checking environment variables, so request C.UTF-8 explicitly
|
|
|
3b36b49 |
+ */
|
|
|
3b36b49 |
+ setlocale(LC_CTYPE, "C.UTF-8");
|
|
|
3b36b49 |
+#else
|
|
|
3b36b49 |
/* Set up the LC_CTYPE locale, so we can obtain
|
|
|
3b36b49 |
the locale's charset without having to switch
|
|
|
3b36b49 |
locales. */
|
|
|
3b36b49 |
setlocale(LC_CTYPE, "");
|
|
|
3b36b49 |
+#ifdef PY_WARN_ON_C_LOCALE
|
|
|
3b36b49 |
+ _emit_stderr_warning_for_c_locale();
|
|
|
3b36b49 |
+#endif
|
|
|
3b36b49 |
#endif
|
|
|
3b36b49 |
|
|
|
3b36b49 |
if ((p = Py_GETENV("PYTHONDEBUG")) && *p != '\0')
|
|
|
8fbcd4d |
@@ -1242,12 +1418,8 @@ initstdio(void)
|
|
|
8fbcd4d |
}
|
|
|
8fbcd4d |
}
|
|
|
8fbcd4d |
if (!errors && !(pythonioencoding && *pythonioencoding)) {
|
|
|
8fbcd4d |
- /* When the LC_CTYPE locale is the POSIX locale ("C locale"),
|
|
|
8fbcd4d |
- stdin and stdout use the surrogateescape error handler by
|
|
|
8fbcd4d |
- default, instead of the strict error handler. */
|
|
|
8fbcd4d |
- char *loc = setlocale(LC_CTYPE, NULL);
|
|
|
8fbcd4d |
- if (loc != NULL && strcmp(loc, "C") == 0)
|
|
|
8fbcd4d |
- errors = "surrogateescape";
|
|
|
8fbcd4d |
+ /* Choose the default error handler based on the current locale */
|
|
|
8fbcd4d |
+ errors = get_default_standard_stream_error_handler();
|
|
|
8fbcd4d |
}
|
|
|
8fbcd4d |
}
|
|
|
8fbcd4d |
|
|
|
3b36b49 |
diff --git a/configure b/configure
|
|
|
8fbcd4d |
index 2915246..39e5a27 100755
|
|
|
3b36b49 |
--- a/configure
|
|
|
3b36b49 |
+++ b/configure
|
|
|
3b36b49 |
@@ -834,6 +834,8 @@ with_thread
|
|
|
3b36b49 |
enable_ipv6
|
|
|
3b36b49 |
with_doc_strings
|
|
|
3b36b49 |
with_pymalloc
|
|
|
3b36b49 |
+with_c_locale_coercion
|
|
|
3b36b49 |
+with_c_locale_warning
|
|
|
3b36b49 |
with_valgrind
|
|
|
3b36b49 |
with_dtrace
|
|
|
3b36b49 |
with_fpectl
|
|
|
3b36b49 |
@@ -1527,6 +1529,12 @@ Optional Packages:
|
|
|
3b36b49 |
deprecated; use --with(out)-threads
|
|
|
3b36b49 |
--with(out)-doc-strings disable/enable documentation strings
|
|
|
3b36b49 |
--with(out)-pymalloc disable/enable specialized mallocs
|
|
|
3b36b49 |
+ --with(out)-c-locale-coercion
|
|
|
3b36b49 |
+ disable/enable C locale coercion to a UTF-8 based
|
|
|
3b36b49 |
+ locale
|
|
|
3b36b49 |
+ --with(out)-c-locale-warning
|
|
|
3b36b49 |
+ disable/enable locale compatibility warning in the C
|
|
|
3b36b49 |
+ locale
|
|
|
3b36b49 |
--with-valgrind Enable Valgrind support
|
|
|
3b36b49 |
--with(out)-dtrace disable/enable DTrace support
|
|
|
3b36b49 |
--with-fpectl enable SIGFPE catching
|
|
|
8fbcd4d |
@@ -11010,6 +11018,52 @@ fi
|
|
|
3b36b49 |
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_pymalloc" >&5
|
|
|
3b36b49 |
$as_echo "$with_pymalloc" >&6; }
|
|
|
3b36b49 |
|
|
|
3b36b49 |
+# Check for --with-c-locale-coercion
|
|
|
3b36b49 |
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for --with-c-locale-coercion" >&5
|
|
|
3b36b49 |
+$as_echo_n "checking for --with-c-locale-coercion... " >&6; }
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+# Check whether --with-c-locale-coercion was given.
|
|
|
3b36b49 |
+if test "${with_c_locale_coercion+set}" = set; then :
|
|
|
3b36b49 |
+ withval=$with_c_locale_coercion;
|
|
|
3b36b49 |
+fi
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+if test -z "$with_c_locale_coercion"
|
|
|
3b36b49 |
+then
|
|
|
3b36b49 |
+ with_c_locale_coercion="yes"
|
|
|
3b36b49 |
+fi
|
|
|
3b36b49 |
+if test "$with_c_locale_coercion" != "no"
|
|
|
3b36b49 |
+then
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+$as_echo "#define PY_COERCE_C_LOCALE 1" >>confdefs.h
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+fi
|
|
|
3b36b49 |
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_c_locale_coercion" >&5
|
|
|
3b36b49 |
+$as_echo "$with_c_locale_coercion" >&6; }
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+# Check for --with-c-locale-warning
|
|
|
3b36b49 |
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for --with-c-locale-warning" >&5
|
|
|
3b36b49 |
+$as_echo_n "checking for --with-c-locale-warning... " >&6; }
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+# Check whether --with-c-locale-warning was given.
|
|
|
3b36b49 |
+if test "${with_c_locale_warning+set}" = set; then :
|
|
|
3b36b49 |
+ withval=$with_c_locale_warning;
|
|
|
3b36b49 |
+fi
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+if test -z "$with_c_locale_warning"
|
|
|
3b36b49 |
+then
|
|
|
3b36b49 |
+ with_c_locale_warning="yes"
|
|
|
3b36b49 |
+fi
|
|
|
3b36b49 |
+if test "$with_c_locale_warning" != "no"
|
|
|
3b36b49 |
+then
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+$as_echo "#define PY_WARN_ON_C_LOCALE 1" >>confdefs.h
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+fi
|
|
|
3b36b49 |
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_c_locale_warning" >&5
|
|
|
3b36b49 |
+$as_echo "$with_c_locale_warning" >&6; }
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
# Check for Valgrind support
|
|
|
3b36b49 |
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for --with-valgrind" >&5
|
|
|
3b36b49 |
$as_echo_n "checking for --with-valgrind... " >&6; }
|
|
|
3b36b49 |
diff --git a/configure.ac b/configure.ac
|
|
|
8fbcd4d |
index 67dfba3..b9c9f04 100644
|
|
|
3b36b49 |
--- a/configure.ac
|
|
|
3b36b49 |
+++ b/configure.ac
|
|
|
8fbcd4d |
@@ -3279,6 +3279,40 @@ then
|
|
|
3b36b49 |
fi
|
|
|
3b36b49 |
AC_MSG_RESULT($with_pymalloc)
|
|
|
3b36b49 |
|
|
|
3b36b49 |
+# Check for --with-c-locale-coercion
|
|
|
3b36b49 |
+AC_MSG_CHECKING(for --with-c-locale-coercion)
|
|
|
3b36b49 |
+AC_ARG_WITH(c-locale-coercion,
|
|
|
3b36b49 |
+ AS_HELP_STRING([--with(out)-c-locale-coercion],
|
|
|
3b36b49 |
+ [disable/enable C locale coercion to a UTF-8 based locale]))
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+if test -z "$with_c_locale_coercion"
|
|
|
3b36b49 |
+then
|
|
|
3b36b49 |
+ with_c_locale_coercion="yes"
|
|
|
3b36b49 |
+fi
|
|
|
3b36b49 |
+if test "$with_c_locale_coercion" != "no"
|
|
|
3b36b49 |
+then
|
|
|
3b36b49 |
+ AC_DEFINE(PY_COERCE_C_LOCALE, 1,
|
|
|
3b36b49 |
+ [Define if you want to coerce the C locale to a UTF-8 based locale])
|
|
|
3b36b49 |
+fi
|
|
|
3b36b49 |
+AC_MSG_RESULT($with_c_locale_coercion)
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+# Check for --with-c-locale-warning
|
|
|
3b36b49 |
+AC_MSG_CHECKING(for --with-c-locale-warning)
|
|
|
3b36b49 |
+AC_ARG_WITH(c-locale-warning,
|
|
|
3b36b49 |
+ AS_HELP_STRING([--with(out)-c-locale-warning],
|
|
|
3b36b49 |
+ [disable/enable locale compatibility warning in the C locale]))
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
+if test -z "$with_c_locale_warning"
|
|
|
3b36b49 |
+then
|
|
|
3b36b49 |
+ with_c_locale_warning="yes"
|
|
|
3b36b49 |
+fi
|
|
|
3b36b49 |
+if test "$with_c_locale_warning" != "no"
|
|
|
3b36b49 |
+then
|
|
|
3b36b49 |
+ AC_DEFINE(PY_WARN_ON_C_LOCALE, 1,
|
|
|
3b36b49 |
+ [Define to emit a locale compatibility warning in the C locale])
|
|
|
3b36b49 |
+fi
|
|
|
3b36b49 |
+AC_MSG_RESULT($with_c_locale_warning)
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
# Check for Valgrind support
|
|
|
3b36b49 |
AC_MSG_CHECKING([for --with-valgrind])
|
|
|
3b36b49 |
AC_ARG_WITH([valgrind],
|
|
|
3b36b49 |
diff --git a/pyconfig.h.in b/pyconfig.h.in
|
|
|
8fbcd4d |
index b10c57f..0a6f3e2 100644
|
|
|
3b36b49 |
--- a/pyconfig.h.in
|
|
|
3b36b49 |
+++ b/pyconfig.h.in
|
|
|
8fbcd4d |
@@ -1244,9 +1244,15 @@
|
|
|
3b36b49 |
/* Define as the preferred size in bits of long digits */
|
|
|
3b36b49 |
#undef PYLONG_BITS_IN_DIGIT
|
|
|
3b36b49 |
|
|
|
3b36b49 |
+/* Define if you want to coerce the C locale to a UTF-8 based locale */
|
|
|
3b36b49 |
+#undef PY_COERCE_C_LOCALE
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
/* Define to printf format modifier for Py_ssize_t */
|
|
|
3b36b49 |
#undef PY_FORMAT_SIZE_T
|
|
|
3b36b49 |
|
|
|
3b36b49 |
+/* Define to emit a locale compatibility warning in the C locale */
|
|
|
3b36b49 |
+#undef PY_WARN_ON_C_LOCALE
|
|
|
3b36b49 |
+
|
|
|
3b36b49 |
/* Define if you want to build an interpreter with many run-time checks. */
|
|
|
3b36b49 |
#undef Py_DEBUG
|
|
|
3b36b49 |
|