Fixed bug 4692 - Command line parsing

Galadrim

As I have seen, SDL implements its own command line parser for Windows in SDL_windows_main.c. Unfortunately, it doesn't seem to allow command line arguments with trailing backslashes if quoting is required.

Usually, when you write an application that gets command line arguments passed as argc and argv, the parsing is done by parse_cmdline. The Windows API also provides the function CommandLineToArgvW, so an application can parse itself if only the command line string is provided. Both functions behave almost identically according to their documentation. If the argument "\\" (including the quotes) is passed, they both turn it into a single backslash.

The SDL command line parser on the other hand doesn't recognize the second quote character as the closing character in this example and therefore includes it in the parsed argument. The parser does not count the number of backslashes preceding a quote. It always treats a quote as escaped if a backslash is in front of it. Therefore, it should be impossible to quote and escape an argument correctly, if it has a trailing backslash and contains characters that require quoting.

Of course, each application is allowed to implement its own parsing rules, so SDL is free to do so. But the problem I see is that there are arguments, that are impossible to be passed to the parser correctly, as I described above. Is there a reason, why SDL does not simply use CommandLineToArgvW instead of implementing its own parser?

Here are some links that show that correct argument parsing, as it is usually done in Windows, is quite complicated:

https://docs.microsoft.com/en-us/windows/desktop/api/shellapi/nf-shellapi-commandlinetoargvw

http://www.windowsinspired.com/how-a-windows-programs-splits-its-command-line-into-individual-arguments/
This commit is contained in:
Sam Lantinga 2019-07-31 09:11:20 -07:00
parent 4953e050f5
commit f8400cbba9

View file

@ -9,6 +9,7 @@
/* Include this so we define UNICODE properly */
#include "../../core/windows/SDL_windows.h"
#include <shellapi.h> /* CommandLineToArgvW() */
/* Include the SDL main definition header */
#include "SDL.h"
@ -18,87 +19,7 @@
# undef main
#endif /* main */
static void
UnEscapeQuotes(char *arg)
{
char *last = NULL;
while (*arg) {
if (*arg == '"' && (last != NULL && *last == '\\')) {
char *c_curr = arg;
char *c_last = last;
while (*c_curr) {
*c_last = *c_curr;
c_last = c_curr;
c_curr++;
}
*c_last = '\0';
}
last = arg;
arg++;
}
}
/* Parse a command line buffer into arguments */
static int
ParseCommandLine(char *cmdline, char **argv)
{
char *bufp;
char *lastp = NULL;
int argc, last_argc;
argc = last_argc = 0;
for (bufp = cmdline; *bufp;) {
/* Skip leading whitespace */
while (*bufp == ' ' || *bufp == '\t') {
++bufp;
}
/* Skip over argument */
if (*bufp == '"') {
++bufp;
if (*bufp) {
if (argv) {
argv[argc] = bufp;
}
++argc;
}
/* Skip over word */
lastp = bufp;
while (*bufp && (*bufp != '"' || *lastp == '\\')) {
lastp = bufp;
++bufp;
}
} else {
if (*bufp) {
if (argv) {
argv[argc] = bufp;
}
++argc;
}
/* Skip over word */
while (*bufp && (*bufp != ' ' && *bufp != '\t')) {
++bufp;
}
}
if (*bufp) {
if (argv) {
*bufp = '\0';
}
++bufp;
}
/* Strip out \ from \" sequences */
if (argv && last_argc != argc) {
UnEscapeQuotes(argv[last_argc]);
}
last_argc = argc;
}
if (argv) {
argv[argc] = NULL;
}
return (argc);
}
#define WIN_WStringToUTF8(S) SDL_iconv_string("UTF-8", "UTF-16LE", (char *)(S), (SDL_wcslen(S)+1)*sizeof(WCHAR))
/* Pop up an out of memory message, returns to Windows */
static BOOL
@ -119,65 +40,44 @@ OutOfMemory(void)
/* Gets the arguments with GetCommandLine, converts them to argc and argv
and calls SDL_main */
static int
main_getcmdline()
main_getcmdline(void)
{
LPWSTR *argvw;
char **argv;
int argc;
char *cmdline = NULL;
int retval = 0;
int cmdalloc = 0;
const TCHAR *text = GetCommandLine();
const TCHAR *ptr;
int argc_guess = 2; /* space for NULL and initial argument. */
int rc;
int i, argc;
int result;
/* make a rough guess of command line arguments. Overestimates if there
are quoted things. */
for (ptr = text; *ptr; ptr++) {
if ((*ptr == ' ') || (*ptr == '\t')) {
argc_guess++;
}
}
#if UNICODE
rc = WideCharToMultiByte(CP_UTF8, 0, text, -1, NULL, 0, NULL, NULL);
if (rc > 0) {
cmdalloc = rc + (sizeof (char *) * argc_guess);
argv = (char **) VirtualAlloc(NULL, cmdalloc, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
if (argv) {
int rc2;
cmdline = (char *) (argv + argc_guess);
rc2 = WideCharToMultiByte(CP_UTF8, 0, text, -1, cmdline, rc, NULL, NULL);
SDL_assert(rc2 == rc);
}
}
#else
/* !!! FIXME: are these in the system codepage? We need to convert to UTF-8. */
rc = ((int) SDL_strlen(text)) + 1;
cmdalloc = rc + (sizeof (char *) * argc_guess);
argv = (char **) VirtualAlloc(NULL, cmdalloc, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
if (argv) {
cmdline = (char *) (argv + argc_guess);
SDL_strcpy(cmdline, text);
}
#endif
if (cmdline == NULL) {
argvw = CommandLineToArgvW(GetCommandLineW(), &argc);
if (argvw == NULL) {
return OutOfMemory();
}
/* Parse it into argv and argc */
SDL_assert(ParseCommandLine(cmdline, NULL) <= argc_guess);
argc = ParseCommandLine(cmdline, argv);
argv = (char **)SDL_calloc(argc + 1, sizeof(*argv));
if (!argv) {
return OutOfMemory();
}
for (i = 0; i < argc; ++i) {
argv[i] = WIN_WStringToUTF8(argvw[i]);
if (!argv[i]) {
return OutOfMemory();
}
}
argv[i] = NULL;
LocalFree(argvw);
SDL_SetMainReady();
/* Run the application main() code */
retval = SDL_main(argc, argv);
result = SDL_main(argc, argv);
VirtualFree(argv, cmdalloc, MEM_DECOMMIT);
VirtualFree(argv, 0, MEM_RELEASE);
/* Free argv, to avoid memory leak */
for (i = 0; i < argc; ++i) {
SDL_free(argv[i]);
}
SDL_free(argv);
return retval;
return result;
}
/* This is where execution begins [console apps, ansi] */