Inspired by Ch4ozz's post (and my own need for a halfway decent non-strtok parser), I cooked something up and figured I would share it. It should fit most general use cases, as you can specify the delimiter and escaped character strings. It relies heavily on the use of strspn() but that should be trivial to implement.
Escaped characters are replaced with '\0' if there is a match (i.e. for quotations), otherwise they are ignored.
Multiple delimiters are likewise ignored (i.e. multiple spaces, tabs, etc). Hopefully someone can find this useful.
example usage as follows:
Code:
char** argv = tokenize("echo \"hello world\"", " ", "\"", &argc);
Code:
/* Released to public domain */
/* Simple argument parsing - returns an array of char* pointers
@str is the string to be parsed
@delim is a string containing characters to split by
@escape is a string containing characters to escape ("\"\'\n", etc) - optional
@_argc is a pointer to integer containing the size of returned pointer array */
char** tokenize(const char* str, const char* delim, const char* escape, int* _argc) {
if (!str || !_argc || !delim)
return NULL;
int i, argc = 0;
char* split = strdup(str);
/* first pass, approximate how many arguments there are for allocation */
while(*str) {
if (strspn(str, delim)) {
argc++;
str += strspn(str, delim);
}
str++;
}
/* allocate a pointer array of the proper size */
char** ret = malloc(sizeof(char*) * argc);
argc = 0;
ret[argc++] = split;
/* second pass, split strings by the delimiter */
while(*split) {
if (escape && strspn(split, escape)) {
i = strspn(split, escape);
/* save the escaped character for matching */
char c = *split;
split += i;
/* if we can't find a matching character to escape, ignore it */
if (!strchr(split, c)) {
ret[argc++] = split - i;
split++;
continue;
}
*(split - i) = '\0';
ret[argc++] = split;
/* pointer to first matching character */
split = strchr(split, c);
*split = '\0';
split++;
/* Make sure we don't return the last character as an argument */
if (!strspn(split, delim) && (strlen(split) > 1))
ret[argc++] = split;
}
if (strspn(split, delim)) {
i = strspn(split, delim);
*split = '\0';
split += i;
/* if there's an escaped character, try to escape it */
if (escape && strspn(split, escape))
continue;
ret[argc++] = split;
}
else
split++;
}
*_argc = argc;
return ret;
}