+added utf8 bom detection even with MB_ENABLE_UNICODE disabled; *fixed a bug in asc with utf8 character.

This commit is contained in:
paladin-t 2016-05-19 10:37:12 +08:00
parent 848485be25
commit f83c12ca57
3 changed files with 23 additions and 12 deletions

View File

@ -1,3 +1,7 @@
May. 19 2016
Added UTF8 BOM detection even with MB_ENABLE_UNICODE disabled
Fixed a bug in ASC with UTF8 character
May. 17 2016 May. 17 2016
Fixed an evaluation bug when accessing a collection by brackets Fixed an evaluation bug when accessing a collection by brackets
Fixed a memory leak with expression calculation Fixed a memory leak with expression calculation

View File

@ -222,6 +222,7 @@ static const char* _ERR_DESC[] = {
"Syntax error", "Syntax error",
"Invalid data type", "Invalid data type",
"Type does not match", "Type does not match",
"Number overflow",
"Invalid string", "Invalid string",
"Index out of bound", "Index out of bound",
"Cannot find with given index", "Cannot find with given index",
@ -1193,8 +1194,8 @@ static char* mb_strupr(char* s);
/** Unicode handling */ /** Unicode handling */
#ifdef MB_ENABLE_UNICODE
static int mb_uu_getbom(const char** ch); static int mb_uu_getbom(const char** ch);
#ifdef MB_ENABLE_UNICODE
static int mb_uu_ischar(const char* ch); static int mb_uu_ischar(const char* ch);
static int mb_uu_strlen(const char* ch); static int mb_uu_strlen(const char* ch);
static int mb_uu_substr(const char* ch, int begin, int count, char** o); static int mb_uu_substr(const char* ch, int begin, int count, char** o);
@ -2922,7 +2923,6 @@ static char* mb_strupr(char* s) {
/** Unicode handling */ /** Unicode handling */
#ifdef MB_ENABLE_UNICODE
/* Determine whether a string begins with a BOM, and ignore it */ /* Determine whether a string begins with a BOM, and ignore it */
static int mb_uu_getbom(const char** ch) { static int mb_uu_getbom(const char** ch) {
if(!ch && !(*ch)) if(!ch && !(*ch))
@ -2941,6 +2941,7 @@ static int mb_uu_getbom(const char** ch) {
return 0; return 0;
} }
#ifdef MB_ENABLE_UNICODE
/* Determine whether a buffer is a UTF8 encoded character, and return taken bytes */ /* Determine whether a buffer is a UTF8 encoded character, and return taken bytes */
static int mb_uu_ischar(const char* ch) { static int mb_uu_ischar(const char* ch) {
/* Copyright 2008, 2009 Bjoern Hoehrmann, http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ */ /* Copyright 2008, 2009 Bjoern Hoehrmann, http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ */
@ -4296,7 +4297,6 @@ static char* _load_file(mb_interpreter_t* s, const char* f, const char* prefix)
if(prefix) if(prefix)
memcpy(buf, prefix, i); memcpy(buf, prefix, i);
fread(buf + i, 1, l, fp); fread(buf + i, 1, l, fp);
#ifdef MB_ENABLE_UNICODE
do { do {
char* off = buf + i; char* off = buf + i;
int b = mb_uu_getbom((const char**)&off); int b = mb_uu_getbom((const char**)&off);
@ -4305,7 +4305,6 @@ static char* _load_file(mb_interpreter_t* s, const char* f, const char* prefix)
buf[l - b] = _ZERO_CHAR; buf[l - b] = _ZERO_CHAR;
} }
} while(0); } while(0);
#endif /* MB_ENABLE_UNICODE */
fclose(fp); fclose(fp);
buf[l] = _ZERO_CHAR; buf[l] = _ZERO_CHAR;
} }
@ -4876,7 +4875,7 @@ static _data_e _get_symbol_type(mb_interpreter_t* s, char* sym, _raw_t* value) {
if(s->import_handler && s->import_handler(s, sym + 1) == MB_FUNC_OK) { if(s->import_handler && s->import_handler(s, sym + 1) == MB_FUNC_OK) {
_ls_pushback(context->imported, mb_strdup(sym + 1, strlen(sym + 1) + 1)); _ls_pushback(context->imported, mb_strdup(sym + 1, strlen(sym + 1) + 1));
} else { } else {
_handle_error_now(s, SE_PS_FILE_OPEN_FAILED, s->source_file, MB_FUNC_ERR); _handle_error_now(s, SE_PS_OPEN_FILE_FAILED, s->source_file, MB_FUNC_ERR);
} }
} }
} }
@ -11923,9 +11922,7 @@ int mb_load_string(struct mb_interpreter_t* s, const char* l, bool_t reset) {
context = s->parsing_context; context = s->parsing_context;
#ifdef MB_ENABLE_UNICODE
mb_uu_getbom(&l); mb_uu_getbom(&l);
#endif /* MB_ENABLE_UNICODE */
while(*l) { while(*l) {
int n = 1; int n = 1;
#ifdef MB_ENABLE_UNICODE_ID #ifdef MB_ENABLE_UNICODE_ID
@ -11988,7 +11985,7 @@ int mb_load_file(struct mb_interpreter_t* s, const char* f) {
if(result) if(result)
goto _exit; goto _exit;
} else { } else {
_set_current_error(s, SE_PS_FILE_OPEN_FAILED, 0); _set_current_error(s, SE_PS_OPEN_FILE_FAILED, 0);
result = MB_FUNC_ERR; result = MB_FUNC_ERR;
} }
@ -14928,7 +14925,9 @@ static int _std_asc(mb_interpreter_t* s, void** l) {
int result = MB_FUNC_OK; int result = MB_FUNC_OK;
char* arg = 0; char* arg = 0;
int_t val = 0; int_t val = 0;
#ifdef MB_ENABLE_UNICODE
size_t sz = 0; size_t sz = 0;
#endif /* MB_ENABLE_UNICODE */
mb_assert(s && l); mb_assert(s && l);
@ -14943,9 +14942,16 @@ static int _std_asc(mb_interpreter_t* s, void** l) {
goto _exit; goto _exit;
} }
sz = strlen(arg); #ifdef MB_ENABLE_UNICODE
if(sizeof(int_t) < sz) sz = sizeof(int_t); sz = (size_t)mb_uu_ischar(arg);
if(sizeof(int_t) < sz) {
sz = sizeof(int_t);
_handle_error_on_obj(s, SE_RN_NUMBER_OVERFLOW, s->source_file, TON(l), MB_FUNC_WARNING, _exit, result);
}
memcpy(&val, arg, sz); memcpy(&val, arg, sz);
#else /* MB_ENABLE_UNICODE */
val = (int_t)arg[0];
#endif /* MB_ENABLE_UNICODE */
mb_check(mb_push_int(s, l, val)); mb_check(mb_push_int(s, l, val));
_exit: _exit:

View File

@ -342,11 +342,11 @@ struct mb_interpreter_t;
typedef enum mb_error_e { typedef enum mb_error_e {
SE_NO_ERR = 0, SE_NO_ERR = 0,
/** Common */ /** Common */
SE_CM_MB_OPEN_FAILED, SE_CM_OPEN_MB_FAILED,
SE_CM_FUNC_EXISTS, SE_CM_FUNC_EXISTS,
SE_CM_FUNC_NOT_EXISTS, SE_CM_FUNC_NOT_EXISTS,
/** Parsing */ /** Parsing */
SE_PS_FILE_OPEN_FAILED, SE_PS_OPEN_FILE_FAILED,
SE_PS_SYMBOL_TOO_LONG, SE_PS_SYMBOL_TOO_LONG,
SE_PS_INVALID_CHAR, SE_PS_INVALID_CHAR,
/** Running */ /** Running */
@ -355,6 +355,7 @@ typedef enum mb_error_e {
SE_RN_SYNTAX_ERROR, SE_RN_SYNTAX_ERROR,
SE_RN_INVALID_DATA_TYPE, SE_RN_INVALID_DATA_TYPE,
SE_RN_TYPE_NOT_MATCH, SE_RN_TYPE_NOT_MATCH,
SE_RN_NUMBER_OVERFLOW,
SE_RN_INVALID_STRING, SE_RN_INVALID_STRING,
SE_RN_INDEX_OUT_OF_BOUND, SE_RN_INDEX_OUT_OF_BOUND,
SE_RN_CANNOT_FIND_WITH_GIVEN_INDEX, SE_RN_CANNOT_FIND_WITH_GIVEN_INDEX,