From c7e92a6076578bd9418f872323b5d49ccf92878c Mon Sep 17 00:00:00 2001 From: Ray Donnelly Date: Thu, 17 Jun 2021 18:51:46 +0530 Subject: [PATCH 021/N] mingw: prefer unix sep if MSYSTEM environment variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Алексей Co-authored-by: Christoph Reiter Co-authored-by: cat Co-authored-by: Naveen M K --- Include/pylifecycle.h | 6 ++ Lib/ntpath.py | 106 ++++++++++++++++++--------------- Modules/posixmodule.c | 2 + Python/initconfig.c | 2 +- Python/pathconfig.c | 135 ++++++++++++++++++++++++++++++++++++++++++ Python/traceback.c | 2 +- 6 files changed, 203 insertions(+), 50 deletions(-) diff --git a/Include/pylifecycle.h b/Include/pylifecycle.h index e4c3b09..50d8b76 100644 --- a/Include/pylifecycle.h +++ b/Include/pylifecycle.h @@ -21,6 +21,12 @@ PyAPI_FUNC(int) Py_IsInitialized(void); PyAPI_FUNC(PyThreadState *) Py_NewInterpreter(void); PyAPI_FUNC(void) Py_EndInterpreter(PyThreadState *); +PyAPI_FUNC(wchar_t) Py_GetSepW(const wchar_t *); +PyAPI_FUNC(char) Py_GetSepA(const char *); + +PyAPI_FUNC(void) Py_NormalizeSepsW(wchar_t *); +PyAPI_FUNC(void) Py_NormalizeSepsA(char *); + /* Py_PyAtExit is for the atexit module, Py_AtExit is for low-level * exit functions. diff --git a/Lib/ntpath.py b/Lib/ntpath.py index c05e965..dedbe19 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -11,9 +11,7 @@ module as os.path. curdir = '.' pardir = '..' extsep = '.' -sep = '\\' pathsep = ';' -altsep = '/' defpath = '.;C:\\bin' devnull = 'nul' @@ -23,6 +21,14 @@ import stat import genericpath from genericpath import * +if sys.platform == "win32" and "MSYSTEM" in os.environ: + sep = '/' + altsep = '\\' +else: + sep = '\\' + altsep = '/' +bsep = str.encode(sep) +baltsep = str.encode(altsep) __all__ = ["normcase","isabs","join","splitdrive","splitroot","split","splitext", "basename","dirname","commonprefix","getsize","getmtime", @@ -34,9 +40,33 @@ __all__ = ["normcase","isabs","join","splitdrive","splitroot","split","splitext" def _get_bothseps(path): if isinstance(path, bytes): - return b'\\/' + return bsep+baltsep + else: + return sep+altsep + +def _get_sep(path): + if isinstance(path, bytes): + return bsep else: - return '\\/' + return sep + +def _get_altsep(path): + if isinstance(path, bytes): + return baltsep + else: + return altsep + +def _get_colon(path): + if isinstance(path, bytes): + return b':' + else: + return ':' + +def _get_unc_prefix(path): + if isinstance(path, bytes): + return b'\\\\?\\UNC\\' + else: + return '\\\\?\\UNC\\' # Normalize the case of a pathname and map slashes to backslashes. # Other normalizations (such as optimizing '../' away) are not done @@ -58,14 +88,14 @@ try: return s if isinstance(s, bytes): encoding = sys.getfilesystemencoding() - s = s.decode(encoding, 'surrogateescape').replace('/', '\\') + s = s.decode(encoding, 'surrogateescape').replace(altsep, sep) s = _LCMapStringEx(_LOCALE_NAME_INVARIANT, _LCMAP_LOWERCASE, s) return s.encode(encoding, 'surrogateescape') else: return _LCMapStringEx(_LOCALE_NAME_INVARIANT, _LCMAP_LOWERCASE, - s.replace('/', '\\')) + s.replace(altsep, sep)) except ImportError: def normcase(s): """Normalize case of pathname. @@ -74,8 +104,8 @@ except ImportError: """ s = os.fspath(s) if isinstance(s, bytes): - return os.fsencode(os.fsdecode(s).replace('/', '\\').lower()) - return s.replace('/', '\\').lower() + return os.fsencode(os.fsdecode(s).replace(altsep, sep).lower()) + return s.replace(altsep, sep).lower() # Return whether a path is absolute. @@ -87,14 +117,9 @@ except ImportError: def isabs(s): """Test whether a path is absolute""" s = os.fspath(s) - if isinstance(s, bytes): - sep = b'\\' - altsep = b'/' - colon_sep = b':\\' - else: - sep = '\\' - altsep = '/' - colon_sep = ':\\' + sep = _get_sep(s) + altsep = _get_altsep(s) + colon_sep = _get_colon(s) + sep s = s[:3].replace(altsep, sep) # Absolute: UNC, device, and paths with a drive and root. # LEGACY BUG: isabs("/x") should be false since the path has no drive. @@ -106,14 +131,9 @@ def isabs(s): # Join two (or more) paths. def join(path, *paths): path = os.fspath(path) - if isinstance(path, bytes): - sep = b'\\' - seps = b'\\/' - colon = b':' - else: - sep = '\\' - seps = '\\/' - colon = ':' + sep = _get_sep(path) + seps = _get_bothseps(path) + colon = _get_colon(path) try: if not paths: path[:0] + sep #23780: Ensure compatible data type even if p is null. @@ -188,18 +208,12 @@ def splitroot(p): splitroot('Windows/notepad') == ('', '', 'Windows/notepad') """ p = os.fspath(p) - if isinstance(p, bytes): - sep = b'\\' - altsep = b'/' - colon = b':' - unc_prefix = b'\\\\?\\UNC\\' - empty = b'' - else: - sep = '\\' - altsep = '/' - colon = ':' - unc_prefix = '\\\\?\\UNC\\' - empty = '' + sep = _get_sep(p) + altsep = _get_altsep(p) + colon = _get_colon(p) + unc_prefix = _get_unc_prefix(p) + empty = b'' if isinstance(p, bytes) else '' + normp = p.replace(altsep, sep) if normp[:1] == sep: if normp[1:2] == sep: @@ -257,9 +271,9 @@ def split(p): def splitext(p): p = os.fspath(p) if isinstance(p, bytes): - return genericpath._splitext(p, b'\\', b'/', b'.') + return genericpath._splitext(p, bsep, baltsep, b'.') else: - return genericpath._splitext(p, '\\', '/', '.') + return genericpath._splitext(p, sep, altsep, '.') splitext.__doc__ = genericpath._splitext.__doc__ @@ -527,14 +541,12 @@ except ImportError: def normpath(path): """Normalize path, eliminating double slashes, etc.""" path = os.fspath(path) + sep = _get_sep(path) + altsep = _get_altsep(path) if isinstance(path, bytes): - sep = b'\\' - altsep = b'/' curdir = b'.' pardir = b'..' else: - sep = '\\' - altsep = '/' curdir = '.' pardir = '..' path = path.replace(altsep, sep) @@ -762,6 +774,7 @@ else: # strip the prefix anyway. if ex.winerror == initial_winerror: path = spath + path = normpath(path) return path @@ -771,12 +784,11 @@ supports_unicode_filenames = True def relpath(path, start=None): """Return a relative version of a path""" path = os.fspath(path) + sep = _get_sep(path) if isinstance(path, bytes): - sep = b'\\' curdir = b'.' pardir = b'..' else: - sep = '\\' curdir = '.' pardir = '..' @@ -831,13 +843,11 @@ def commonpath(paths): raise ValueError('commonpath() arg is an empty sequence') paths = tuple(map(os.fspath, paths)) + sep = _get_sep(paths[0]) + altsep = _get_altsep(paths[0]) if isinstance(paths[0], bytes): - sep = b'\\' - altsep = b'/' curdir = b'.' else: - sep = '\\' - altsep = '/' curdir = '.' try: diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 1a62ddd..2cb0c5b 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -4006,6 +4006,7 @@ posix_getcwd(int use_bytes) return NULL; } + Py_NormalizeSepsW(wbuf2); PyObject *resobj = PyUnicode_FromWideChar(wbuf2, len); if (wbuf2 != wbuf) { PyMem_RawFree(wbuf2); @@ -4892,6 +4893,7 @@ os__getfinalpathname_impl(PyObject *module, path_t *path) target_path = tmp; } + Py_NormalizeSepsW(target_path); result = PyUnicode_FromWideChar(target_path, result_length); if (result && PyBytes_Check(path->object)) { Py_SETREF(result, PyUnicode_EncodeFSDefault(result)); diff --git a/Python/initconfig.c b/Python/initconfig.c index ed4c4e2..6ba6f5b 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -164,7 +164,7 @@ static const char usage_envvars[] = "PYTHONWARNINGS : warning control (-W)\n" ; -#if defined(MS_WINDOWS) +#if defined(_MSC_VER) # define PYTHONHOMEHELP "\\python{major}{minor}" #else # define PYTHONHOMEHELP "/lib/pythonX.X" diff --git a/Python/pathconfig.c b/Python/pathconfig.c index be0f97c..3d684d6 100644 --- a/Python/pathconfig.c +++ b/Python/pathconfig.c @@ -18,6 +18,140 @@ extern "C" { #endif +#ifdef __MINGW32__ +#define wcstok wcstok_s +#include +#endif + +static int +Py_StartsWithA(const char * str, const char * prefix) +{ + while(*prefix) + { + if(*prefix++ != *str++) + return 0; + } + + return 1; +} + +static int +Py_StartsWithW(const wchar_t * str, const wchar_t * prefix) +{ + while(*prefix) + { + if(*prefix++ != *str++) + return 0; + } + + return 1; +} + +char +Py_GetSepA(const char *name) +{ + char* msystem = (char*)2; /* So that non Windows use / as sep */ + static char sep = '\0'; +#ifdef _WIN32 + /* https://msdn.microsoft.com/en-gb/library/windows/desktop/aa365247%28v=vs.85%29.aspx + * The "\\?\" prefix .. indicate that the path should be passed to the system with minimal + * modification, which means that you cannot use forward slashes to represent path separators + */ + if (name != NULL && Py_StartsWithA(name, "\\\\?\\") != 0) + { + return '\\'; + } +#endif + if (sep != '\0') + return sep; +#if defined(__MINGW32__) + msystem = Py_GETENV("MSYSTEM"); +#endif + if (msystem != NULL) + sep = '/'; + else + sep = '\\'; + return sep; +} + +static char +Py_GetAltSepA(const char *name) +{ + char sep = Py_GetSepA(name); + if (sep == '/') + return '\\'; + return '/'; +} + +void +Py_NormalizeSepsA(char *name) +{ + assert(name != NULL); + char sep = Py_GetSepA(name); + char altsep = Py_GetAltSepA(name); + char* seps; + if (name[0] != '\0' && name[1] == ':') { + name[0] = toupper(name[0]); + } + seps = strchr(name, altsep); + while(seps) { + *seps = sep; + seps = strchr(seps, altsep); + } +} + +wchar_t +Py_GetSepW(const wchar_t *name) +{ + char* msystem = (char*)2; /* So that non Windows use / as sep */ + static wchar_t sep = L'\0'; +#ifdef _WIN32 + /* https://msdn.microsoft.com/en-gb/library/windows/desktop/aa365247%28v=vs.85%29.aspx + * The "\\?\" prefix .. indicate that the path should be passed to the system with minimal + * modification, which means that you cannot use forward slashes to represent path separators + */ + if (name != NULL && Py_StartsWithW(name, L"\\\\?\\") != 0) + { + return L'\\'; + } +#endif + if (sep != L'\0') + return sep; +#if defined(__MINGW32__) + msystem = Py_GETENV("MSYSTEM"); +#endif + if (msystem != NULL) + sep = L'/'; + else + sep = L'\\'; + return sep; +} + +static wchar_t +Py_GetAltSepW(const wchar_t *name) +{ + char sep = Py_GetSepW(name); + if (sep == L'/') + return L'\\'; + return L'/'; +} + +void +Py_NormalizeSepsW(wchar_t *name) +{ + assert(name != NULL); + wchar_t sep = Py_GetSepW(name); + wchar_t altsep = Py_GetAltSepW(name); + wchar_t* seps; + if (name[0] != L'\0' && name[1] == L':') { + name[0] = towupper(name[0]); + } + seps = wcschr(name, altsep); + while(seps) { + *seps = sep; + seps = wcschr(seps, altsep); + } +} /* External interface */ @@ -317,6 +451,7 @@ _Py_SetProgramFullPath(const wchar_t *program_full_path) if (has_value && _Py_path_config.program_full_path == NULL) { path_out_of_memory(__func__); } + Py_NormalizeSepsW(_Py_path_config.program_name); } diff --git a/Python/traceback.c b/Python/traceback.c index 1cb82c5..37df982 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -342,7 +342,7 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject * filepath = PyBytes_AS_STRING(filebytes); /* Search tail of filename in sys.path before giving up */ - tail = strrchr(filepath, SEP); + tail = strrchr(filepath, Py_GetSepA(filepath)); if (tail == NULL) tail = filepath; else