Universal escaping in KConfigINI: Summary and patch

Thu May 24 10:12:35 BST 2007

On Thu, May 24, 2007 at 03:55:56AM +0200, Andreas Hartmetz wrote:
> It encodes as \x<nn> (<nn> being two lowercase hex chars):
> - chars with value < 32 except for '\n' (LF), '\r' (CR), and '\t'
> (tab).
>   There already are standard encodings for them, "\n", "\r", and "\t".
>
good.

> - '['
> - ']'
> - '='
>
do not escape them in values. it brings no advantage to simple textual
processing (any reasonable regexp looks for the first occurrence and
swallows the rest without looking at it), but it makes it harder to read
for a human.

> The patch also contains some porting to the new QByteArray, which is
> automatically zero-terminated, and has a constData() function that
> avoids deep copying the raw data.
>
i would commit this separately before the rest. even though common
practice in kde, mixing unrelated changes is generally no good idea.

> --- kdelibs/kdecore/config/kconfigini.cpp	(Revision 667794)
> +++ kdelibs/kdecore/config/kconfigini.cpp	(Arbeitskopie)
> @@ -53,163 +53,186 @@
>  #include "kconfig.h"
>  
>  extern bool checkAccess(const QString& pathname, int mode);
> +
> +inline static char hexToChar(const char *str)
> +{
> +    char ret = 0;
> +    for (int i = 0; i <= 1; i++) {
> +        ret <<= 4;
> +        switch (str[i]) {
> +        case '0': break;
> +        case '1': ret |= 0x01; break;
> +        case '2': ret |= 0x02; break;
> +        case '3': ret |= 0x03; break;
> +        case '4': ret |= 0x04; break;
> +        case '5': ret |= 0x05; break;
> +        case '6': ret |= 0x06; break;
> +        case '7': ret |= 0x07; break;
> +        case '8': ret |= 0x08; break;
> +        case '9': ret |= 0x09; break;
> +        case 'a': ret |= 0x0a; break;
> +        case 'b': ret |= 0x0b; break;
> +        case 'c': ret |= 0x0c; break;
> +        case 'd': ret |= 0x0d; break;
> +        case 'e': ret |= 0x0e; break;
> +        case 'f': ret |= 0x0f; break;
> +        default:
>
print warning.

> +            return 'x';
> +        }
> +    }
> +    return ret;
> +}
> +
> +inline static void charToHex(char c, char *str)
> +{
> +    *str = 'x';
> +    for (int i = 1; i <= 2; i++) {
> +        switch (c & 0xf0) {
> +        case 0x00: str[i] = '0'; break;
> +        case 0x10: str[i] = '1'; break;
> +        case 0x20: str[i] = '2'; break;
> +        case 0x30: str[i] = '3'; break;
> +        case 0x40: str[i] = '4'; break;
> +        case 0x50: str[i] = '5'; break;
> +        case 0x60: str[i] = '6'; break;
> +        case 0x70: str[i] = '7'; break;
> +        case 0x80: str[i] = '8'; break;
> +        case 0x90: str[i] = '9'; break;
> +        case 0xa0: str[i] = 'a'; break;
> +        case 0xb0: str[i] = 'b'; break;
> +        case 0xc0: str[i] = 'c'; break;
> +        case 0xd0: str[i] = 'd'; break;
> +        case 0xe0: str[i] = 'e'; break;
> +        case 0xf0: str[i] = 'f'; break;
> +        }
> +        c <<= 4;
> +    }
> +}
> +
these two are "somewhat" unelegant. charToHex is usually done by
indexing a static const char array, hexToChar by calling hexToNibble
twice, which in turn uses range comparisons instead of a massive case
block - these implementations are both shorter and faster. also, parsing
uppercase hex might be not wrong to avoid surprises.

>  /* translate escaped escape sequences to their actual values. */
>  static QByteArray printableToString(const char *str, int l)
>  {
>      // Strip leading white-space.
> -    while((l>0) &&
> -          ((*str == ' ') || (*str == '\t') || (*str == '\r')))
> -    {
> +    while((l > 0) && ((*str == ' ') || (*str == '\t') || (*str == '\r'))) {
>          str++; l--;
>      }
>  
>      // Strip trailing white-space.
> -    while((l>0) &&
> -          ((str[l-1] == ' ') || (str[l-1] == '\t') || (str[l-1] == '\r')))
> -    {
> +    while((l>0) && ((str[l-1] == ' ') || (str[l-1] == '\t') || (str[l-1] == '\r'))) {
>          l--;
>      }
>  
i find it counterproductive that you joined the lines. it's really
barely readable now.
but you could add spaces between keywords and opening parens if you
like. in a separate commit, that is. :)

> -    QByteArray result(l + 1, 0);
> +    QByteArray result(l, 0);
>      char *r = result.data();
>  
> -    for(int i = 0; i < l;i++, str++)
> -    {
> -        if (*str == '\\')
> -        {
> -            i++, str++;
> -            if (i >= l) // End of line. (Line ends with single slash)
> -            {
> -                *r++ = '\\';
> +    for(int i = 0; i < l; i++, r++) {
> +        if (str[i] != '\\') {
> +
> +            *r = str[i];
> +
> +        } else {
> +            // Probable escape sequence
> +            i++;
> +            if (i >= l) { // Line ends after backslash - stop.
> +                *r = '\\';
>                  break;
>              }
> -            switch(*str)
> -            {
> +
> +            switch(str[i]) {
>              case 's':
> -                *r++ = ' ';
> +                *r = ' ';
>                  break;
>              case 't':
> -                *r++ = '\t';
> +                *r = '\t';
>                  break;
>              case 'n':
> -                *r++ = '\n';
> +                *r = '\n';
>                  break;
>              case 'r':
> -                *r++ = '\r';
> +                *r = '\r';
>                  break;
>              case '\\':
> -                *r++ = '\\';
> +                *r = '\\';
> +                break;
> +            case 'x':
> +                if (i + 2 < l) {
> +                    *r = hexToChar(str + i);
> +                    i += 2;
> +                } else {

> +                    *r = 'x';
>
print warning.

> +                    i = l - 1;
> +                }
>                  break;

>              default:
> -                *r++ = '\\';
> -                *r++ = *str;
> +                *r = '\\';
> +                i--; // Don't eat a char we didn't use
>
that part pure evil (yes, i know you didn't change the behavior). this
is a recipe for incompatibility. it should print a warning and eat the
char, so the mistake becomes more or less obvious.

>              }
>          }
> -        else
> -        {
> -            *r++ = *str;
> -        }
>      }

> -    result.truncate(r-result.data());
> +    result.truncate(r - result.constData());
>      return result;
>  }
>  

-- 
Hi! I'm a .signature virus! Copy me into your ~/.signature, please!
--
Chaos, panic, and disorder - my work here is done.