/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include #include #include #include "prefread.h" #include "nsString.h" #include "nsUTF8Utils.h" #ifdef TEST_PREFREAD #include #define NS_WARNING(_s) printf(">>> " _s "!\n") #define NS_NOTREACHED(_s) NS_WARNING(_s) #else #include "nsDebug.h" // for NS_WARNING #endif /* pref parser states */ enum { PREF_PARSE_INIT, PREF_PARSE_MATCH_STRING, PREF_PARSE_UNTIL_NAME, PREF_PARSE_QUOTED_STRING, PREF_PARSE_UNTIL_COMMA, PREF_PARSE_UNTIL_VALUE, PREF_PARSE_INT_VALUE, PREF_PARSE_COMMENT_MAYBE_START, PREF_PARSE_COMMENT_BLOCK, PREF_PARSE_COMMENT_BLOCK_MAYBE_END, PREF_PARSE_ESC_SEQUENCE, PREF_PARSE_HEX_ESCAPE, PREF_PARSE_UTF16_LOW_SURROGATE, PREF_PARSE_UNTIL_OPEN_PAREN, PREF_PARSE_UNTIL_CLOSE_PAREN, PREF_PARSE_UNTIL_SEMICOLON, PREF_PARSE_UNTIL_EOL }; #define UTF16_ESC_NUM_DIGITS 4 #define HEX_ESC_NUM_DIGITS 2 #define BITS_PER_HEX_DIGIT 4 static const char kUserPref[] = "user_pref"; static const char kPref[] = "pref"; static const char kTrue[] = "true"; static const char kFalse[] = "false"; /** * pref_GrowBuf * * this function will increase the size of the buffer owned * by the given pref parse state. We currently use a simple * doubling algorithm, but the only hard requirement is that * it increase the buffer by at least the size of the ps->esctmp * buffer used for escape processing (currently 6 bytes). * * this buffer is used to store partial pref lines. it is * freed when the parse state is destroyed. * * @param ps * parse state instance * * this function updates all pointers that reference an * address within lb since realloc may relocate the buffer. * * @return false if insufficient memory. */ static bool pref_GrowBuf(PrefParseState *ps) { int bufLen, curPos, valPos; bufLen = ps->lbend - ps->lb; curPos = ps->lbcur - ps->lb; valPos = ps->vb - ps->lb; if (bufLen == 0) bufLen = 128; /* default buffer size */ else bufLen <<= 1; /* double buffer size */ #ifdef TEST_PREFREAD fprintf(stderr, ">>> realloc(%d)\n", bufLen); #endif ps->lb = (char*) realloc(ps->lb, bufLen); if (!ps->lb) return false; ps->lbcur = ps->lb + curPos; ps->lbend = ps->lb + bufLen; ps->vb = ps->lb + valPos; return true; } /** * pref_DoCallback * * this function is called when a complete pref name-value pair has * been extracted from the input data. * * @param ps * parse state instance * * @return false to indicate a fatal error. */ static bool pref_DoCallback(PrefParseState *ps) { PrefValue value; switch (ps->vtype) { case PREF_STRING: value.stringVal = ps->vb; break; case PREF_INT: if ((ps->vb[0] == '-' || ps->vb[0] == '+') && ps->vb[1] == '\0') { NS_WARNING("malformed integer value"); return false; } value.intVal = atoi(ps->vb); break; case PREF_BOOL: value.boolVal = (ps->vb == kTrue); break; default: break; } (*ps->reader)(ps->closure, ps->lb, value, ps->vtype, ps->fdefault); return true; } void PREF_InitParseState(PrefParseState *ps, PrefReader reader, void *closure) { memset(ps, 0, sizeof(*ps)); ps->reader = reader; ps->closure = closure; } void PREF_FinalizeParseState(PrefParseState *ps) { if (ps->lb) free(ps->lb); } /** * Pseudo-BNF * ---------- * function = LJUNK function-name JUNK function-args * function-name = "user_pref" | "pref" * function-args = "(" JUNK pref-name JUNK "," JUNK pref-value JUNK ")" JUNK ";" * pref-name = quoted-string * pref-value = quoted-string | "true" | "false" | integer-value * JUNK = *(WS | comment-block | comment-line) * LJUNK = *(WS | comment-block | comment-line | bcomment-line) * WS = SP | HT | LF | VT | FF | CR * SP = * HT = * LF = * VT = * FF = * CR = * comment-block = * comment-line = * bcomment-line = */ bool PREF_ParseBuf(PrefParseState *ps, const char *buf, int bufLen) { const char *end; char c; char udigit; int state; state = ps->state; for (end = buf + bufLen; buf != end; ++buf) { c = *buf; switch (state) { /* initial state */ case PREF_PARSE_INIT: if (ps->lbcur != ps->lb) { /* reset state */ ps->lbcur = ps->lb; ps->vb = nullptr; ps->vtype = PREF_INVALID; ps->fdefault = false; } switch (c) { case '/': /* begin comment block or line? */ state = PREF_PARSE_COMMENT_MAYBE_START; break; case '#': /* accept shell style comments */ state = PREF_PARSE_UNTIL_EOL; break; case 'u': /* indicating user_pref */ case 'p': /* indicating pref */ ps->smatch = (c == 'u' ? kUserPref : kPref); ps->sindex = 1; ps->nextstate = PREF_PARSE_UNTIL_OPEN_PAREN; state = PREF_PARSE_MATCH_STRING; break; /* else skip char */ } break; /* string matching */ case PREF_PARSE_MATCH_STRING: if (c == ps->smatch[ps->sindex++]) { /* if we've matched all characters, then move to next state. */ if (ps->smatch[ps->sindex] == '\0') { state = ps->nextstate; ps->nextstate = PREF_PARSE_INIT; /* reset next state */ } /* else wait for next char */ } else { NS_WARNING("malformed pref file"); return false; } break; /* quoted string parsing */ case PREF_PARSE_QUOTED_STRING: /* we assume that the initial quote has already been consumed */ if (ps->lbcur == ps->lbend && !pref_GrowBuf(ps)) return false; /* out of memory */ if (c == '\\') state = PREF_PARSE_ESC_SEQUENCE; else if (c == ps->quotechar) { *ps->lbcur++ = '\0'; state = ps->nextstate; ps->nextstate = PREF_PARSE_INIT; /* reset next state */ } else *ps->lbcur++ = c; break; /* name parsing */ case PREF_PARSE_UNTIL_NAME: if (c == '\"' || c == '\'') { ps->fdefault = (ps->smatch == kPref); ps->quotechar = c; ps->nextstate = PREF_PARSE_UNTIL_COMMA; /* return here when done */ state = PREF_PARSE_QUOTED_STRING; } else if (c == '/') { /* allow embedded comment */ ps->nextstate = state; /* return here when done with comment */ state = PREF_PARSE_COMMENT_MAYBE_START; } else if (!isspace(c)) { NS_WARNING("malformed pref file"); return false; } break; /* parse until we find a comma separating name and value */ case PREF_PARSE_UNTIL_COMMA: if (c == ',') { ps->vb = ps->lbcur; state = PREF_PARSE_UNTIL_VALUE; } else if (c == '/') { /* allow embedded comment */ ps->nextstate = state; /* return here when done with comment */ state = PREF_PARSE_COMMENT_MAYBE_START; } else if (!isspace(c)) { NS_WARNING("malformed pref file"); return false; } break; /* value parsing */ case PREF_PARSE_UNTIL_VALUE: /* the pref value type is unknown. so, we scan for the first * character of the value, and determine the type from that. */ if (c == '\"' || c == '\'') { ps->vtype = PREF_STRING; ps->quotechar = c; ps->nextstate = PREF_PARSE_UNTIL_CLOSE_PAREN; state = PREF_PARSE_QUOTED_STRING; } else if (c == 't' || c == 'f') { ps->vb = (char *) (c == 't' ? kTrue : kFalse); ps->vtype = PREF_BOOL; ps->smatch = ps->vb; ps->sindex = 1; ps->nextstate = PREF_PARSE_UNTIL_CLOSE_PAREN; state = PREF_PARSE_MATCH_STRING; } else if (isdigit(c) || (c == '-') || (c == '+')) { ps->vtype = PREF_INT; /* write c to line buffer... */ if (ps->lbcur == ps->lbend && !pref_GrowBuf(ps)) return false; /* out of memory */ *ps->lbcur++ = c; state = PREF_PARSE_INT_VALUE; } else if (c == '/') { /* allow embedded comment */ ps->nextstate = state; /* return here when done with comment */ state = PREF_PARSE_COMMENT_MAYBE_START; } else if (!isspace(c)) { NS_WARNING("malformed pref file"); return false; } break; case PREF_PARSE_INT_VALUE: /* grow line buffer if necessary... */ if (ps->lbcur == ps->lbend && !pref_GrowBuf(ps)) return false; /* out of memory */ if (isdigit(c)) *ps->lbcur++ = c; else { *ps->lbcur++ = '\0'; /* stomp null terminator; we are done. */ if (c == ')') state = PREF_PARSE_UNTIL_SEMICOLON; else if (c == '/') { /* allow embedded comment */ ps->nextstate = PREF_PARSE_UNTIL_CLOSE_PAREN; state = PREF_PARSE_COMMENT_MAYBE_START; } else if (isspace(c)) state = PREF_PARSE_UNTIL_CLOSE_PAREN; else { NS_WARNING("malformed pref file"); return false; } } break; /* comment parsing */ case PREF_PARSE_COMMENT_MAYBE_START: switch (c) { case '*': /* comment block */ state = PREF_PARSE_COMMENT_BLOCK; break; case '/': /* comment line */ state = PREF_PARSE_UNTIL_EOL; break; default: /* pref file is malformed */ NS_WARNING("malformed pref file"); return false; } break; case PREF_PARSE_COMMENT_BLOCK: if (c == '*') state = PREF_PARSE_COMMENT_BLOCK_MAYBE_END; break; case PREF_PARSE_COMMENT_BLOCK_MAYBE_END: switch (c) { case '/': state = ps->nextstate; ps->nextstate = PREF_PARSE_INIT; break; case '*': /* stay in this state */ break; default: state = PREF_PARSE_COMMENT_BLOCK; } break; /* string escape sequence parsing */ case PREF_PARSE_ESC_SEQUENCE: /* not necessary to resize buffer here since we should be writing * only one character and the resize check would have been done * for us in the previous state */ switch (c) { case '\"': case '\'': case '\\': break; case 'r': c = '\r'; break; case 'n': c = '\n'; break; case 'x': /* hex escape -- always interpreted as Latin-1 */ case 'u': /* UTF16 escape */ ps->esctmp[0] = c; ps->esclen = 1; ps->utf16[0] = ps->utf16[1] = 0; ps->sindex = (c == 'x' ) ? HEX_ESC_NUM_DIGITS : UTF16_ESC_NUM_DIGITS; state = PREF_PARSE_HEX_ESCAPE; continue; default: NS_WARNING("preserving unexpected JS escape sequence"); /* Invalid escape sequence so we do have to write more than * one character. Grow line buffer if necessary... */ if ((ps->lbcur+1) == ps->lbend && !pref_GrowBuf(ps)) return false; /* out of memory */ *ps->lbcur++ = '\\'; /* preserve the escape sequence */ break; } *ps->lbcur++ = c; state = PREF_PARSE_QUOTED_STRING; break; /* parsing a hex (\xHH) or utf16 escape (\uHHHH) */ case PREF_PARSE_HEX_ESCAPE: if ( c >= '0' && c <= '9' ) udigit = (c - '0'); else if ( c >= 'A' && c <= 'F' ) udigit = (c - 'A') + 10; else if ( c >= 'a' && c <= 'f' ) udigit = (c - 'a') + 10; else { /* bad escape sequence found, write out broken escape as-is */ NS_WARNING("preserving invalid or incomplete hex escape"); *ps->lbcur++ = '\\'; /* original escape slash */ if ((ps->lbcur + ps->esclen) >= ps->lbend && !pref_GrowBuf(ps)) return false; for (int i = 0; i < ps->esclen; ++i) *ps->lbcur++ = ps->esctmp[i]; /* push the non-hex character back for re-parsing. */ /* (++buf at the top of the loop keeps this safe) */ --buf; state = PREF_PARSE_QUOTED_STRING; continue; } /* have a digit */ ps->esctmp[ps->esclen++] = c; /* preserve it */ ps->utf16[1] <<= BITS_PER_HEX_DIGIT; ps->utf16[1] |= udigit; ps->sindex--; if (ps->sindex == 0) { /* have the full escape. Convert to UTF8 */ int utf16len = 0; if (ps->utf16[0]) { /* already have a high surrogate, this is a two char seq */ utf16len = 2; } else if (0xD800 == (0xFC00 & ps->utf16[1])) { /* a high surrogate, can't convert until we have the low */ ps->utf16[0] = ps->utf16[1]; ps->utf16[1] = 0; state = PREF_PARSE_UTF16_LOW_SURROGATE; break; } else { /* a single utf16 character */ ps->utf16[0] = ps->utf16[1]; utf16len = 1; } /* actual conversion */ /* make sure there's room, 6 bytes is max utf8 len (in */ /* theory; 4 bytes covers the actual utf16 range) */ if (ps->lbcur+6 >= ps->lbend && !pref_GrowBuf(ps)) return false; ConvertUTF16toUTF8 converter(ps->lbcur); converter.write(ps->utf16, utf16len); ps->lbcur += converter.Size(); state = PREF_PARSE_QUOTED_STRING; } break; /* looking for beginning of utf16 low surrogate */ case PREF_PARSE_UTF16_LOW_SURROGATE: if (ps->sindex == 0 && c == '\\') { ++ps->sindex; } else if (ps->sindex == 1 && c == 'u') { /* escape sequence is correct, now parse hex */ ps->sindex = UTF16_ESC_NUM_DIGITS; ps->esctmp[0] = 'u'; ps->esclen = 1; state = PREF_PARSE_HEX_ESCAPE; } else { /* didn't find expected low surrogate. Ignore high surrogate * (it would just get converted to nothing anyway) and start * over with this character */ --buf; if (ps->sindex == 1) state = PREF_PARSE_ESC_SEQUENCE; else state = PREF_PARSE_QUOTED_STRING; continue; } break; /* function open and close parsing */ case PREF_PARSE_UNTIL_OPEN_PAREN: /* tolerate only whitespace and embedded comments */ if (c == '(') state = PREF_PARSE_UNTIL_NAME; else if (c == '/') { ps->nextstate = state; /* return here when done with comment */ state = PREF_PARSE_COMMENT_MAYBE_START; } else if (!isspace(c)) { NS_WARNING("malformed pref file"); return false; } break; case PREF_PARSE_UNTIL_CLOSE_PAREN: /* tolerate only whitespace and embedded comments */ if (c == ')') state = PREF_PARSE_UNTIL_SEMICOLON; else if (c == '/') { ps->nextstate = state; /* return here when done with comment */ state = PREF_PARSE_COMMENT_MAYBE_START; } else if (!isspace(c)) { NS_WARNING("malformed pref file"); return false; } break; /* function terminator ';' parsing */ case PREF_PARSE_UNTIL_SEMICOLON: /* tolerate only whitespace and embedded comments */ if (c == ';') { if (!pref_DoCallback(ps)) return false; state = PREF_PARSE_INIT; } else if (c == '/') { ps->nextstate = state; /* return here when done with comment */ state = PREF_PARSE_COMMENT_MAYBE_START; } else if (!isspace(c)) { NS_WARNING("malformed pref file"); return false; } break; /* eol parsing */ case PREF_PARSE_UNTIL_EOL: /* need to handle mac, unix, or dos line endings. * PREF_PARSE_INIT will eat the next \n in case * we have \r\n. */ if (c == '\r' || c == '\n' || c == 0x1A) { state = ps->nextstate; ps->nextstate = PREF_PARSE_INIT; /* reset next state */ } break; } } ps->state = state; return true; } #ifdef TEST_PREFREAD static void pref_reader(void *closure, const char *pref, PrefValue val, PrefType type, bool defPref) { printf("%spref(\"%s\", ", defPref ? "" : "user_", pref); switch (type) { case PREF_STRING: printf("\"%s\");\n", val.stringVal); break; case PREF_INT: printf("%i);\n", val.intVal); break; case PREF_BOOL: printf("%s);\n", val.boolVal == false ? "false" : "true"); break; } } int main(int argc, char **argv) { PrefParseState ps; char buf[4096]; /* i/o buffer */ FILE *fp; int n; if (argc == 1) { printf("usage: prefread file.js\n"); return -1; } fp = fopen(argv[1], "r"); if (!fp) { printf("failed to open file\n"); return -1; } PREF_InitParseState(&ps, pref_reader, nullptr); while ((n = fread(buf, 1, sizeof(buf), fp)) > 0) PREF_ParseBuf(&ps, buf, n); PREF_FinalizeParseState(&ps); fclose(fp); return 0; } #endif /* TEST_PREFREAD */