#include "EXTERN.h" #include "perl.h" #include "XSUB.h" #include "ppport.h" /* Grammar: INTEGER: /-?(?:0|[1-9][0-9]+)/x FLOAT: /-?(?:0|[1-9][0-9]+)\.([0-9]+)/x BAREWORD: / [A-Za-z_]+ [0-9A-Za-z_]* /x Q_STRING: /'(?:\\\\|\\'|.)*'/s QQ_STRING: /"(?:\\x\{[A-Fa-f0-9]+\}|\\[0-6]{1,2}(?![0-6])|\\[0-6]{3}|\\.|.)*"/i SIMPLE_COMMA: ',' FAT_COMMA: '=>' OPEN_HASH: '{' CLOSE_HASH: '}' OPEN_ARRAY: '[' CLOSE_ARRAY: ']' UNDEF: 'undef' VALUE: UNDEF | INTEGER | FLOAT | QUOTED_STRING | HASH | ARRAY KEY: INTEGER | FLOAT | BAREWORD | QUOTED_STRING KEY_VALUE_LIST: (KEY FAT_COMMA VALUE (SIMPLE_COMMA KEY FAT_COMMA VALUE)*)? VALUE_LIST: (VALUE (SIMPLE_COMMA VALUE)*)? HASH: OPEN_HASH KEY_VALUE_LIST CLOSE_HASH ARRAY: OPEN_ARRAY VALUE_LIST CLOSE_ARRAY */ /* 1 shows errors, 2 trace */ #define MYDEBUG 0 #define TOKEN_ERROR 0 #define TOKEN_COMMA 1 #define TOKEN_OPEN 2 #define TOKEN_CLOSE 3 #define TOKEN_UNDEF 4 #define TOKEN_IV 5 #define TOKEN_NV 6 #define TOKEN_BAREWORD 7 #define TOKEN_Q_STRING 8 #define TOKEN_QQ_STRING 9 #define TOKEN_WS 10 #define TOKEN_BLESS 11 #define TOKEN_REF 12 #define TOKEN_UNKNOWN 13 #define COND_ISWHITE(ch) ( (ch) == ' ' || (ch) == '\n' || (ch) == '\t' || (ch) == '\r' ) #define CASE_ISWHITE ' ': case '\n': case '\t': case '\r' #define EAT_WHITES_AND_COMMENTS(p) STMT_START { while ( COND_ISWHITE(*p) || *p == '#' ) { if (*p == '#') { while ( *p && *p != '\n' ) p++; } else p++; } } STMT_END const char * const token_name[]= { "TOKEN_ERROR", "TOKEN_COMMA", "TOKEN_OPEN", "TOKEN_CLOSE", "TOKEN_UNDEF", "TOKEN_IV", "TOKEN_NV", "TOKEN_BAREWORD", "TOKEN_Q_STRING", "TOKEN_QQ_STRING", "TOKEN_WS", "TOKEN_BLESS", "TOKEN_UNKNOWN" }; const char bareword_start[]= { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 0, 0, 0, 0, 95, 0, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const char bareword_rest[]= { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 0, 0, 0, 0, 0, 0, 0, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 0, 0, 0, 0, 95, 0, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; typedef struct parse_state { STRLEN string_len; const char *string_start; const char *string_end; SV *parse_sv; const char *parse_ptr; U32 line_num; } parse_state; typedef struct frame_state { const char *token_start; const char *first_escape; const char *key; STRLEN key_len; SV *thing; SV *got_key; SV *got; U8 token; U8 depth; char stop_char; U32 flags; U32 refs; } frame_state; SV* _undump(pTHX_ parse_state *ps, char obj_char, U8 call_depth); /* undump a DD style data structure. Takes a plain SV (magic not currently respected) * containing a DD Terse/Deepcopy style data dumper output (no $VAR1 = at the front * allowed currently) and returns either undef for a failed parse, or a scalar value * of the value parsed. * * Possible future enhancements: * qr// * ref to object. Eg \['foo'] * Make it possible to parse a list instead of a scalar. * Blessed objects? * Cyclic structures? * Less/more tolerant parsing rules? * Filters? (Block things by their position in the structure?) * Conversion? (IE, we have '[1,1,1]' in the input, and we know we wont need it * so parse it as '1,1,1' instead. */ SV* undump(pTHX_ SV* sv) { parse_state ps; SV *undumped= 0; if ( !SvOK(sv) ) { sv_setpv(ERRSV,"Bad argument\n"); return newSV(0); } ps.parse_sv= sv; ps.parse_ptr= ps.string_start= SvPV(sv, ps.string_len); ps.string_end= ps.string_start + ps.string_len; ps.line_num= 0; if ( SvLEN(sv) <= ps.string_len || ps.parse_ptr[ps.string_len] != 0 ) { sv_setpv(ERRSV,"Malformed input string in undump (missing tail null)\n"); return newSV(0); } EAT_WHITES_AND_COMMENTS(ps.parse_ptr); if (ps.parse_ptr < ps.string_end) { undumped= _undump(aTHX_ &ps, 0, 0); EAT_WHITES_AND_COMMENTS(ps.parse_ptr); } if (undumped) { if (ps.parse_ptr < ps.string_end) { sv_setpv(ERRSV,"Unhandled tail garbage\n"); SvREFCNT_dec(undumped); return newSV(0); } else { sv_setsv(ERRSV,&PL_sv_undef); return undumped; } } else { return newSV(0); } } #define fs_token (fs->token) #define fs_token_start (fs->token_start) #define fs_first_escape (fs->first_escape) #define fs_stop_char (fs->stop_char) #define fs_key (fs->key) #define fs_key_len (fs->key_len) #define fs_thing (fs->thing) #define fs_got_key (fs->got_key) #define fs_got (fs->got) #define fs_depth (fs->depth) #define fs_flags (fs->flags) #define fs_refs (fs->refs) #define ps_parse_sv (ps->parse_sv) #define ps_string_start (ps->string_start) #define ps_string_end (ps->string_end) #define ps_string_len (ps->string_len) #define ps_parse_ptr (ps->parse_ptr) #define ps_line_num (ps->line_num) #define fsf_WANT_KEY 0x00000001 #define fsf_ALLOW_COMMA 0x00000002 #define fsf_REQUIRE_FAT_COMMA 0x00000004 #define WANT_KEY(fs) (fs_flags & fsf_WANT_KEY) #define WANT_KEY_on(fs) (fs_flags= fs_flags | fsf_WANT_KEY) #define WANT_KEY_off(fs) (fs_flags= fs_flags & (~fsf_WANT_KEY)) #define ALLOW_COMMA(fs) (fs_flags & fsf_ALLOW_COMMA) #define ALLOW_COMMA_on(fs) (fs_flags= fs_flags | fsf_ALLOW_COMMA) #define ALLOW_COMMA_off(fs) (fs_flags= fs_flags & (~fsf_ALLOW_COMMA)) #define REQUIRE_FAT_COMMA(fs) (fs_flags & fsf_REQUIRE_FAT_COMMA) #define REQUIRE_FAT_COMMA_on(fs) (fs_flags= fs_flags | fsf_REQUIRE_FAT_COMMA) #define REQUIRE_FAT_COMMA_off(fs) (fs_flags= fs_flags & (~fsf_REQUIRE_FAT_COMMA)) #define DEPTH(D,T) ( ( (D) * 4 ) + ( ( (T) == TOKEN_OPEN || (T) == TOKEN_BLESS ) - ( (T) == TOKEN_CLOSE ) ) * 2 ) #define BAIL(ps,fs) STMT_START { \ if(fs_got) SvREFCNT_dec(fs_got); \ if(fs_got_key) SvREFCNT_dec(fs_got_key); \ if(fs_thing) SvREFCNT_dec(fs_thing); \ return 0; \ } STMT_END #define SHOW_POSITION( ps, fs, show_len ) STMT_START {\ int remaining= (ps_string_end) - (ps_parse_ptr); \ int token_len= (ps_parse_ptr) - (fs_token_start); \ int backup_len=0;\ const char *backup_pos;\ if (ps_string_start < fs_token_start) { \ backup_len= fs_token_start - ps_string_start; \ if (backup_len > show_len) { \ backup_len= show_len; \ } \ backup_pos= fs_token_start - backup_len; \ } else { \ backup_pos= 0; \ } \ warn("%*sprior:'%.*s'\n" \ "%*stoken:'%.*s'%s\n" \ "%*sto-go:'%.*s'%s\n\n", \ DEPTH((fs_depth),(fs_token)), "", \ backup_len, backup_pos, \ DEPTH((fs_depth),(fs_token)), "", \ ( ( token_len > (show_len) ) ? (show_len) : token_len) , (fs_token_start), \ ( ( token_len > (show_len) ) ? "..." : ""), \ DEPTH((fs_depth),(fs_token)), "", \ ( ( remaining > (show_len) ) ? (show_len) : remaining ) , (ps_parse_ptr), \ ( ( remaining > (show_len) ) ? "..." : "") \ ); \ } STMT_END #define PANIC(ps,fs,X) STMT_START { \ if (MYDEBUG) { \ warn("%*s%s\n",DEPTH((fs_depth),(fs_token)) , "", (X)); \ SHOW_POSITION(ps,fs, 32); \ } \ sv_setpvf( ERRSV, "%s\n", (X) ); \ BAIL(ps,fs);\ } STMT_END #define PANICf1(ps,fs,F,X) STMT_START { \ if (MYDEBUG) { \ warn("%*s" F "\n",DEPTH((fs_depth),(fs_token)),"", (X)); \ SHOW_POSITION(ps,fs, 32); \ }\ sv_setpvf( ERRSV, F "\n", (X) ); \ BAIL(ps,fs);\ } STMT_END #define PANICf2(ps,fs,F,X,Y) STMT_START { \ if (MYDEBUG) { \ warn("%*s" F "\n",DEPTH((fs_depth),(fs_token)),"", (X),(Y)); \ SHOW_POSITION(ps,fs, 32); \ } \ sv_setpvf( ERRSV, F "\n", (X), (Y)); \ BAIL(ps,fs);\ } STMT_END #define ERROR(ps,fs,X) STMT_START { \ if (MYDEBUG>1) { \ warn("%*s%s\n",DEPTH((fs_depth),(fs_token)) , "", (X)); \ SHOW_POSITION(ps,fs, 32); \ } \ sv_setpvf( ERRSV, "%s\n", (X) ); \ BAIL(ps,fs);\ } STMT_END #define ERRORf1(ps,fs,F,X) STMT_START { \ if (MYDEBUG>1) { \ warn("%*s" F "\n",DEPTH((fs_depth),(fs_token)),"", (X)); \ SHOW_POSITION(ps,fs, 32); \ } \ sv_setpvf( ERRSV, F "\n", (X) ); \ BAIL(ps,fs);\ } STMT_END #define ERRORf2(ps, fs, F,X,Y) STMT_START { \ if (MYDEBUG>1) { \ warn("%*s" F "\n",DEPTH((fs_depth),(fs_token)),"", (X),(Y)); \ SHOW_POSITION(ps, fs, 32); \ } \ sv_setpvf( ERRSV, F "\n", (X), (Y)); \ BAIL(ps,fs);\ } STMT_END #define SHOW_TOKEN(ps,fs) \ if (MYDEBUG>1) warn("%*s%-2d %*s %.*s\n", DEPTH((fs_depth), (fs_token)), "", \ fs_token, -20, token_name[((fs_token) 127) { ERROR(ps,fs,"Illegal character in input"); } } else if (*ps_parse_ptr == '$' || *ps_parse_ptr == '@') { ERROR(ps,fs,"Unescaped '$' and '@' are illegal in double quoted strings"); } else if (*ps_parse_ptr > 127) { ERROR(ps,fs,"Illegal character in input"); } ps_parse_ptr++; } if (ps_parse_ptr >= ps_string_end) { ERROR(ps,fs,"unterminated double quoted string"); } assert(*ps_parse_ptr == '"'); ps_parse_ptr++; /* skip over the trailing quote */ return TOKEN_QQ_STRING; } static inline U8 scan_single_quote(parse_state* const ps, frame_state* const fs) { fs_first_escape= 0; while (ps_parse_ptr < ps_string_end && *ps_parse_ptr != '\'') { /* check if its an escape */ if (*ps_parse_ptr == '\\') { if (!fs_first_escape) fs_first_escape= ps_parse_ptr; if (*++ps_parse_ptr > 127) { ERROR(ps,fs,"Illegal character in input"); } } else if (*ps_parse_ptr > 127) { ERROR(ps,fs,"Illegal character in input"); } ps_parse_ptr++; } if (ps_parse_ptr >= ps_string_end) { ERROR(ps,fs,"unterminated single quoted string"); } assert(*ps_parse_ptr == '\''); ps_parse_ptr++; /* skip over the trailing quote */ return TOKEN_Q_STRING; } /* recursively undump a DD style dump * * If called with an obj_char then we are building an object of that type, otherwise we are searching for * a new value. When we encounter a '[' we gobble it up and then call the child with '[' as the obj_char * where it then parses until it encounters a ']' which it eats, and then return whatever it built. * * We restrict to objects nested 100 items deep. * * If anything bogus in the input stream is encountered we free everything created so far, and return 0 * to unwind. * * Returns either an SV or NULL indicating an error. * */ SV* _undump(pTHX_ parse_state *ps, char obj_char, U8 call_depth) { char ch= 0; frame_state fss; frame_state *fs= &fss; fs_token= TOKEN_ERROR; fs_flags= 0; fs_refs= 0; fs_key= 0; fs_key_len= 0; fs_thing= 0; fs_got_key= 0; fs_got= 0; fs_depth= call_depth; if (call_depth > 100) { PANIC(ps,fs,"Structure is nested too deep"); } if (!obj_char) { fs_stop_char= 0; } else if (obj_char == '[') { fs_thing= (SV*)newAV(); fs_stop_char= ']'; } else if (obj_char == '{') { fs_key= 0; fs_key_len= 0; fs_thing= (SV*)newHV(); WANT_KEY_on(fs); fs_stop_char= '}'; } else { PANICf1(ps,fs, "Unknown obj char '%c'", obj_char); } REPARSE: while ( ps_parse_ptr < ps_string_end) { fs_token_start= ps_parse_ptr; fs_token= TOKEN_ERROR; ch= *(ps_parse_ptr++); switch (ch) { case '#': while ( *ps_parse_ptr && *ps_parse_ptr != '\n' ) ps_parse_ptr++; case CASE_ISWHITE: EAT_WHITES_AND_COMMENTS(ps_parse_ptr); goto REPARSE; case '=': if ( *ps_parse_ptr != '>' ) { ERROR(ps,fs,"Encountered assignment '=' or unterminated fat comma '=>'"); } REQUIRE_FAT_COMMA_off(fs); ps_parse_ptr++; /* fallthrough */ case ',': /* comma */ if ( REQUIRE_FAT_COMMA(fs) ) { ERROR(ps,fs,"expected fat comma after bareword"); } else if ( ! ALLOW_COMMA(fs) ) { ERRORf2(ps,fs,"unexpected %s when expecting a %s", (ch=='=' ? "fat comma" : "comma"),(WANT_KEY(fs) ? "key" : "value")); } ALLOW_COMMA_off(fs); goto REPARSE; case '$': case '%': case '@': ps_parse_ptr--; ERROR(ps,fs,"Encountered variable in input. This is not eval - can not undump code"); case '{': case '[': fs_token= TOKEN_OPEN; break; case ']': case '}': fs_token= TOKEN_CLOSE; break; case '\\': fs_token= TOKEN_REF; break; case '\'': if (!(fs_token= scan_single_quote(ps,fs))) BAIL(ps,fs); break; case '"': if (!(fs_token= scan_double_quote(ps,fs))) BAIL(ps,fs); break; case '-': ch= *ps_parse_ptr; if ( '0' == ch ) { ps_parse_ptr++; if (*ps_parse_ptr != '.') { ERROR(ps,fs,"Negative number start with a zero that is not fractional is illegal"); } goto DO_DECIMAL; } else if ( '1' <= ch && ch <= '9' ) { ps_parse_ptr++; } else { ERROR(ps,fs,"bare '-' only allowed to signify negative number"); } goto DO_NUMBER; case '0': if ( ps_parse_ptr < ps_string_end && ('0' <= *ps_parse_ptr && *ps_parse_ptr <= '9')) { ERROR(ps,fs,"Zero may not be followed by another digit at the start of a number"); } case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': DO_NUMBER: /* number */ ch= *ps_parse_ptr; while ( '0' <= ch && ch <= '9' ) { ch= *(++ps_parse_ptr); } if ( ch == '.' ) { DO_DECIMAL: ch= *(++ps_parse_ptr); if ( '0' <= ch && ch <= '9' ) { ch= *(++ps_parse_ptr); } else { ERROR(ps,fs,"Unexpected end of floating point number after decimal point"); } while ('0' <= ch && ch <= '9') { ch= *(++ps_parse_ptr); } fs_token= TOKEN_NV; } else { fs_token= TOKEN_IV; } break; default: if (bareword_start[(U8)ch]) { ch= *ps_parse_ptr; while ( bareword_rest[(U8)ch] ) { ch= *(++ps_parse_ptr); } } else { ps_parse_ptr--; PANICf2(ps,fs,"Unexpected character '%c' codepoint 0x%02x while parsing bareword",ch,ch); } /* for some reason all the interesting keywords are 5 characters long */ if ( 5 == (ps_parse_ptr - fs_token_start) ) { if ( 'b' == fs_token_start[0] && 'l' == fs_token_start[1] && 'e' == fs_token_start[2] && 's' == fs_token_start[3] && 's' == fs_token_start[4] ){ fs_token= TOKEN_BLESS; } else if ( 'u' == fs_token_start[0] && 'n' == fs_token_start[1] && 'd' == fs_token_start[2] && 'e' == fs_token_start[3] && 'f' == fs_token_start[4] ){ fs_token= TOKEN_UNDEF; } else { fs_token= TOKEN_BAREWORD; } } else { fs_token= TOKEN_BAREWORD; } } /* switch */ if (REQUIRE_FAT_COMMA(fs)) { ERROR(ps,fs,"expected fat comma after bareword"); } else if (ALLOW_COMMA(fs) && fs_token != TOKEN_CLOSE) { ERRORf1(ps,fs,"Expecting comma got %s",token_name[fs_token]); } SHOW_TOKEN(ps,fs); switch (fs_token) { case TOKEN_REF: if (WANT_KEY(fs)) { ERRORf1(ps,fs,"unexpected open bracket '%c' when expecting a key", ch); } if (fs_got) { ERROR(ps,fs,"Multiple objects in stream?"); } fs_refs++; break; case TOKEN_BLESS: if ( *ps_parse_ptr != '(') { ERROR(ps,fs,"expected a '(' after 'bless'"); } else { ps_parse_ptr++; /* ERROR(ps,fs,"after bless("); */ } if (WANT_KEY(fs)) { ERROR(ps,fs,"unexpected bless() call when expecting a key"); } ch= 0; /* flag for blessing */ case TOKEN_OPEN: if (WANT_KEY(fs)) { ERRORf1(ps,fs,"unexpected open bracket '%c' when expecting a key", ch); } if (fs_got) { ERROR(ps,fs,"Multiple objects in stream?"); } fs_got= _undump( aTHX_ ps, ch, call_depth+1); if (!fs_got) { BAIL(ps,fs); } else { if ( ch == 0 ) { char quote; HV *stash; EAT_WHITES_AND_COMMENTS(ps_parse_ptr); if (*ps_parse_ptr == ',') { ps_parse_ptr++; EAT_WHITES_AND_COMMENTS(ps_parse_ptr); } else { ERROR(ps,fs,"expected a comma after object in bless()"); } ch= *ps_parse_ptr; if ( ch != '\'' && ch != '\"') { ERROR(ps,fs,"Expected quoted class name after object in bless()"); } quote= ch; fs_token_start= ++ps_parse_ptr; if (!bareword_start[(U8)*ps_parse_ptr]) { ERROR(ps,fs,"Expected classname to start with [A-Za-z_]"); } do { ch= *(++ps_parse_ptr); if (ch == ':') { ch= *(++ps_parse_ptr); if (ch != ':') { ERROR(ps,fs,"Single colon in class name?"); } else { ch= *(++ps_parse_ptr); } } } while (bareword_rest[(U8)ch]); if (ch != quote) { ERROR(ps,fs,"Unterminated or corrupt classname"); } /* XXX: mortalize 'got; here? can this die? */ stash= gv_stashpvn(fs_token_start, ps_parse_ptr - fs_token_start, 1); if (!stash) { PANIC(ps,fs,"Failed to load stash"); } ++ps_parse_ptr; /* skip quote */ EAT_WHITES_AND_COMMENTS(ps_parse_ptr); /* eat optional whitespace after quote */ ch= *ps_parse_ptr; /* check we have a close paren */ if (ch != ')') { ERRORf1(ps,fs,"expecting a close paren for bless but got a '%c'",ch); } else { ps_parse_ptr++; } /* and finally do the blessing */ if(0) do_sv_dump(0, Perl_debug_log, fs_got, 0, 4, 0, 0); sv_bless(fs_got,stash); if(0) do_sv_dump(0, Perl_debug_log, fs_got, 0, 4, 0, 0); } goto GOT_SV; } /* unreached */ break; case TOKEN_CLOSE: { if (fs_stop_char == ch) { if (fs_got_key || fs_key) { ERROR(ps,fs,"Odd number of items in hash constructor"); } return newRV_noinc((SV *)fs_thing); } else if (!fs_stop_char) { ERRORf1(ps,fs,"Unexpected close bracket '%c'",ch); } else { ERRORf2(ps,fs,"Unexpected close '%c' while parsing %s", ch, (fs_stop_char == '}') ? "HASH" : "ARRAY"); } break; } case TOKEN_UNDEF: if (WANT_KEY(fs)) { ERROR(ps,fs,"got an undef when we wanted a key"); } if (fs_got) { ERROR(ps,fs,"Multiple objects in stream?"); } fs_got= newSV(0); goto GOT_SV; /* unreached */ break; case TOKEN_Q_STRING: case TOKEN_QQ_STRING:{ /* nothing to unescape - we are done now */ fs_token_start++; /* skip the first quote */ if ( !fs_first_escape ) { /* it didnt contain any escapes */ if ( WANT_KEY(fs) ) { fs_key= fs_token_start; fs_key_len= ps_parse_ptr - fs_token_start - 1; /* remove trailing quote */ WANT_KEY_off(fs); ALLOW_COMMA_on(fs); break; } if (fs_got) { ERROR(ps,fs,"Multiple objects in stream?"); } fs_got= newSVpvn(fs_token_start, ps_parse_ptr - fs_token_start - 1); /* remove trailing quote */ goto GOT_SV; } else { /* contains escapes - so we have to unescape it */ STRLEN len= 0; I32 grok_flags= 0; const char *grok_start; char is_uni= 0; char must_upgrade= 0; char *new_str_begin; const char *esc_read; const char *esc_read_end; char *esc_write; char *esc_write_end; if (fs_got) { ERROR(ps,fs,"Multiple objects in stream?"); } /* create a new SV with a copy of the raw escaped string in it * note that we always have "room" to do this, all escaped structures * convert to something shorter than their escaped form, so we can do * things in place */ fs_got= newSVpvn(fs_token_start, ps_parse_ptr - fs_token_start - 1); /* remove trailing quote */ /* the sv now contains a copy of the unescaped string */ new_str_begin= SvPV(fs_got, len); esc_write= new_str_begin + ( fs_first_escape - fs_token_start ); esc_write_end= new_str_begin + len; esc_read_end= fs_token_start + len; esc_read= fs_token_start + ( fs_first_escape - fs_token_start ); if (*esc_read != '\\' || *esc_write != '\\') { PANIC(ps,fs,"when parsing quoted string failed start quote sanity check"); } if (fs_token == TOKEN_Q_STRING) { do { if (*esc_read == '\\' && (esc_read[1] == '\\' || esc_read[1] == '\'')) { esc_read++; } *esc_write++= *esc_read++; } while (esc_read < esc_read_end); } else { /* TOKEN_QQ_STRING */ while (esc_read < esc_read_end) { U32 cp= *esc_read++; if ( cp == '\\' ) { if (esc_read >= esc_read_end) { PANIC(ps,fs,"ran off end of string"); } ch= *esc_read++; switch (ch) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': /* first octal digit */ grok_start= esc_read - 1; /* it was advanced earlier */ ch= *esc_read; if ('0' <= ch && ch <= '7') { /* second octal digit */ esc_read++; ch= *esc_read; if ('0' <= ch && ch <= '7') { /* third octal digit */ esc_read++; } } len= esc_read - grok_start; cp= grok_oct((char *)grok_start, &len, &grok_flags, 0); if (cp>127) { must_upgrade=1; } break; case 'x': if (*esc_read != '{') { ERROR(ps,fs,"truncated \\x{} sequence?"); } else { esc_read++; } grok_start= esc_read; while (*esc_read && *esc_read != '}') esc_read++; if (*esc_read != '}') { ERROR(ps,fs,"unterminated \\x{} in double quoted string"); } else { len= esc_read - grok_start; esc_read++; /* skip '}' */ } if (0) warn("hex: %.*s\n", (int)len, grok_start); if (len) { cp= grok_hex((char *)grok_start, &len, &grok_flags, 0); } else { ERROR(ps,fs,"empty \\x{} escape?"); } if (0) warn("cp: %d\n len: %d flags: %d", cp, (int)len, grok_flags); if ( !is_uni ) { /* otherwise it would be in octal */ STRLEN len; is_uni= 1; if (must_upgrade) { SvCUR_set(fs_got, esc_write - new_str_begin); len= sv_utf8_upgrade_nomg(fs_got); new_str_begin= SvPV_nolen(fs_got); esc_write= new_str_begin+len; esc_write_end= new_str_begin + SvLEN(fs_got); } else { SvUTF8_on(fs_got); } } if (cp>0x10FFFF) { ERRORf1(ps,fs,"Illegal codepoint in \\x{%x}",cp); } break; /* printf-style backslashes, formfeeds, newlines, etc */ case 'a': cp= '\007'; break; /* "\a" => "\\a", */ case 'b': cp= '\b'; break; /* "\b" => "\\b", */ case 'e': cp= '\033'; break; /* "\e" => "\\e", */ case 'f': cp= '\f'; break; /* "\f" => "\\f", */ case 'n': cp= '\n'; break; /* "\n" => "\\n", */ case 'r': cp= '\r'; break; /* "\r" => "\\r", */ case 't': cp= '\t'; break; /* "\t" => "\\t", */ default: cp= ch; break; /* literal */ } /* switch on escape type */ if (is_uni) { /* if ( esc_write_end - esc_write < UTF8_MAXBYTES + 1 ) { SvCUR_set(fs_got, esc_write - new_str_begin); new_str_begin= SvGROW(fs_got, UTF8_MAXBYTES + 1); esc_write= SvEND(fs_got); esc_write_end= new_str_begin + SvLEN(fs_got); } */ esc_write= uvchr_to_utf8(esc_write, cp); } else { *esc_write++= (char)cp; } } /* end - is an escape */ else { const char *no_esc_start= esc_read - 1; while ( *esc_read != '\\' && esc_read < esc_read_end ) { esc_read++; } len= esc_read - no_esc_start + 1; /* if ( is_uni && esc_write_end - esc_write < len ) { SvCUR_set(fs_got, esc_write - new_str_begin); new_str_begin= SvGROW(fs_got, len); esc_write= SvEND(fs_got); esc_write_end= new_str_begin + SvLEN(fs_got); } */ Copy(no_esc_start, esc_write, len - 1, char); esc_write += esc_read - no_esc_start; } } /* while */ } /* TOKEN_Q_STRING or TOKEN_QQ_STRING */ SvCUR_set(fs_got, esc_write - new_str_begin); *esc_write++= 0; if (WANT_KEY(fs)) { /* we contain stuff that will be used as a hash key lookup */ fs_got_key= fs_got; /* swap got over to the got_key var for later */ fs_key= 0; /* make sure we dont get confused about two keys */ fs_got= 0; /* clear got */ DONE_KEY_break; } else { /* and now do something with the SV */ goto GOT_SV; } } /* not reached */ } case TOKEN_BAREWORD: /* fallthrough */ REQUIRE_FAT_COMMA_on(fs); if (WANT_KEY(fs)) { DONE_KEY_SIMPLE_break; } if (fs_got) { ERROR(ps,fs,"Multiple objects in stream?"); } fs_got= newSVpvn(fs_token_start, ps_parse_ptr - fs_token_start); goto GOT_SV; case TOKEN_IV:{ /* fallthrough */ if (WANT_KEY(fs)) { DONE_KEY_SIMPLE_break; } ch= ps_parse_ptr - fs_token_start; if (fs_token_start[0] == '-') { IV iv= 0; if ( ch < 12) { fs_token_start++; switch (ch) { case 11: iv -= (*fs_token_start++ - '0') * 1000000000L; case 10: iv -= (*fs_token_start++ - '0') * 100000000L; case 9: iv -= (*fs_token_start++ - '0') * 10000000L; case 8: iv -= (*fs_token_start++ - '0') * 1000000L; case 7: iv -= (*fs_token_start++ - '0') * 100000L; case 6: iv -= (*fs_token_start++ - '0') * 10000L; case 5: iv -= (*fs_token_start++ - '0') * 1000L; case 4: iv -= (*fs_token_start++ - '0') * 100L; case 3: iv -= (*fs_token_start++ - '0') * 10L; case 2: iv -= (*fs_token_start++ - '0') * 1L; break; default: PANICf1(ps,fs,"Strange length for negative integer in switch: %d", ch); } fs_got= newSViv(iv); } else { goto MAKE_SV; } } else { if (ch < 11 ) { UV uv= 0; switch (ch) { case 10: uv += (*fs_token_start++ - '0') * 1000000000L; case 9: uv += (*fs_token_start++ - '0') * 100000000L; case 8: uv += (*fs_token_start++ - '0') * 10000000L; case 7: uv += (*fs_token_start++ - '0') * 1000000L; case 6: uv += (*fs_token_start++ - '0') * 100000L; case 5: uv += (*fs_token_start++ - '0') * 10000L; case 4: uv += (*fs_token_start++ - '0') * 1000L; case 3: uv += (*fs_token_start++ - '0') * 100L; case 2: uv += (*fs_token_start++ - '0') * 10L; case 1: uv += (*fs_token_start++ - '0') * 1L; break; default: PANICf1(ps,fs,"Strange length for integer in switch: %d", ch); } fs_got= newSVuv(uv); } else { goto MAKE_SV; } } goto GOT_SV; } case TOKEN_NV: { if (WANT_KEY(fs)) { DONE_KEY_SIMPLE_break; } MAKE_SV: if (fs_got) { ERROR(ps,fs,"Multiple objects in stream?"); } fs_got= newSVpvn(fs_token_start, ps_parse_ptr - fs_token_start); GOT_SV: while ( fs_refs > 0 ) { fs_got= newRV_noinc((SV*)fs_got); fs_refs--; } if (obj_char == '{') { HE *ent; if (fs_key) { ent= hv_common((HV *)fs_thing, NULL, fs_key, fs_key_len, 0, HV_FETCH_LVALUE, NULL, 0); } else if (fs_got_key) { ent= hv_common((HV *)fs_thing, fs_got_key, NULL, 0, 0, HV_FETCH_LVALUE, NULL, 0); SvREFCNT_dec(fs_got_key); fs_got_key= 0; } else { PANIC(ps,fs,"got something to store, but no key?"); } if (!ent) { PANIC(ps,fs,"failed to store in hash"); } if (SvOK(HeVAL(ent))) { ERRORf2(ps,fs,"duplicate key '%.*s' is illegal", (int)fs_key_len, fs_key); } else { SvREFCNT_dec(HeVAL(ent)); HeVAL(ent)= fs_got; } fs_key= 0; fs_got= 0; WANT_KEY_on(fs); ALLOW_COMMA_on(fs); } else if (obj_char == '[') { /* av_push does not return anything - a little worrying? maybe better to av_store()*/ av_push((AV*)fs_thing, fs_got); fs_got= 0; ALLOW_COMMA_on(fs); } else { return fs_got; } break; } default: PANICf2(ps,fs,"unhandled fs_token %d '%s'", fs_token, token_name[fs_token