#include "EXTERN.h" #include "perl.h" #include "XSUB.h" #include #include /* * the expectation that perl strings have an appended zero is spread all over this file, yet * it breaks it itself almost everywhere. */ /*****************************************************************************/ typedef struct pconv { iconv_t *iconv; SV *to, *from, *fallback; } *PApp__Recode__Pconv; static struct pconv * pconv_new (SV *tocode, SV *fromcode, SV *fallback) { struct pconv *self; iconv_t iconv; if ((iconv = iconv_open (SvPV_nolen (tocode), SvPV_nolen (fromcode))) == (iconv_t) - 1) return 0; New (0, self, 1, struct pconv); /* selten so eine hässliche API gesehen... */ self->iconv = iconv; self->to = newSVsv (tocode); self->from = newSVsv (fromcode); self->fallback = fallback ? newSVsv (fallback) : 0; return self; } static void pconv_destroy (struct pconv *self) { (void) iconv_close (self->iconv); SvREFCNT_dec (self->to); SvREFCNT_dec (self->from); if (self->fallback) SvREFCNT_dec (self->fallback); Safefree (self); } /* * if !fallback, do a plain conversion. if !!fallback, do a conversion * from utf8 with fallback and warn if the iconv implementation does an * implementation defined conversion. */ static SV * plain_iconv (iconv_t self, SV *string, SV *fallback) { char *icursor, *save_icursor; char *ocursor; size_t ibl, save_ibl, obl, obufsize, obufnext; SV *res; /* Perl return string */ SV *replace = 0; /* fallback replacement */ static int warned = 0; res = newSVpvn ("", 0); icursor = SvPV (string, ibl); /* start with a slightly larger buffer than necessary, * then increase it exponentially. maybe we should start with * a fixed but relatively large size and increase in constant steps? */ obufsize = ibl + ((ibl + 15) >> 4) + 1; obufnext = (ibl << 1) + 1; ocursor = SvGROW (res, obl = obufsize); while (ibl != 0) { size_t ret; ret = iconv (self, &icursor, &ibl, &ocursor, &obl); if (ret == (size_t)-1) { switch (errno) { case E2BIG: /* enlarge buffer and position pointer where we left */ ret = ocursor - SvPVX (res); ocursor = SvGROW (res, obufsize) + ret; obl = obufsize - ret; obufsize = obufnext; obufnext <<= 1; break; case EILSEQ: if (fallback) { if (replace) { ocursor = "PApp::Recode::PConv: conversion of fallback sequence '%s' failed"; goto raiserr; } else { dSP; STRLEN retlen; int count; UV chr = #ifdef utf8_to_uv utf8_to_uv (icursor, ibl, &retlen, UTF8_CHECK_ONLY); /* <=DEVEL9916 */ #endif if (retlen == (STRLEN) -1) { ocursor = "PApp::Recode::PConv: non-utf8-character in input detected"; goto raiserr; } /* call fallback */ ENTER; SAVETMPS; PUSHMARK(SP); XPUSHs (sv_2mortal (newSVuv (chr))); PUTBACK; count = call_sv (fallback, G_SCALAR); if (count != 1) { ocursor = "PApp::Recode::PConv: fallback function did not return a single value"; goto raiserr; } save_icursor = icursor + retlen; save_ibl = ibl - retlen; replace = SvREFCNT_inc (POPs); icursor = SvPV (replace, retlen); ibl = retlen; SPAGAIN; FREETMPS; LEAVE; if (!ibl) { ocursor = "pconv: fallback function did not provide non-empty replacement string"; goto raiserr; } break; } } else { /*ocursor = "pconv: illegal multibyte character sequence encountered during character conversion";*/ SvREFCNT_dec (res); return 0; } abort (); /* NOTREACHED */ case EINVAL: ocursor = "incomplete multibyte character sequence encountered during character conversion"; goto raiserr; default: /* some unknown error */ ocursor = "PApp::Recode::PConv: illegal multibyte character sequence encountered during character conversion"; icursor = strerror (errno); goto raiserr; } } else if (ret > 0) { if (fallback && !warned) { warn ("pconv warning: your iconv implementation is not supported because it does NOT act funny"); warned = 1; } } else if (replace) { icursor = save_icursor; ibl = save_ibl; SvREFCNT_dec (replace); replace = 0; } } SvCUR_set (res, ocursor - SvPVX (res)); return res; raiserr: SvREFCNT_dec (res); if (replace) SvREFCNT_dec (replace); croak (ocursor, icursor); } /* this is called when plain_iconv failed. In this case we do it the slow way. */ static SV * safe_iconv (const char *tocode, const char *fromcode, SV *string, SV *fallback) { SV *res; iconv_t iconv; if (strcmp (fromcode, "utf-8")) { iconv = iconv_open ("utf-8", fromcode); if (!iconv) croak ("PApp::Recode::Pconv: conversion from %s to utf-8 not available (%s)", fromcode, strerror (errno)); string = plain_iconv (iconv, string, 0); iconv_close (iconv); if (!string) return Nullsv; } else /* from utf-8 is a veeery common case, since PAPP_CHARSET == UTF-8 */ SvREFCNT_inc (string); iconv = iconv_open (tocode, "utf-8"); if (!iconv) { SvREFCNT_dec (string); croak ("PApp::Recode::Pconv: conversion from utf-8 to %s not available (%s)", tocode, strerror (errno)); } /* now we relying on glibc's non-unix behaviour of returning EILSEQ on unconvertible characters. warn for other implementations. */ res = plain_iconv (iconv, string, fallback); iconv_close (iconv); SvREFCNT_dec (string); return res; } MODULE = PApp::Recode PACKAGE = PApp::Recode::Pconv PROTOTYPES: ENABLE PApp::Recode::Pconv new(class, tocode, fromcode, fallback = Nullsv) SV * class SV * tocode SV * fromcode SV * fallback PROTOTYPE: $$$;$ CODE: RETVAL = pconv_new (tocode, fromcode, fallback); OUTPUT: RETVAL PApp::Recode::Pconv open(tocode, fromcode, fallback = Nullsv) SV * tocode SV * fromcode SV * fallback PROTOTYPE: $$;$ CODE: RETVAL = pconv_new (tocode, fromcode, fallback); OUTPUT: RETVAL SV * convert(self, string, reset = FALSE) PApp::Recode::Pconv self SV * string int reset PROTOTYPE: $$;$ ALIAS: convert_fresh = 1 reset = 2 CODE: /* reset conversion state when restart != 0 */ if (reset || ix) { size_t ibl, obl; (void) iconv (self->iconv, NULL, &ibl, NULL, &obl); if (ix == 2) XSRETURN_EMPTY; } RETVAL = plain_iconv (self->iconv, string, 0); if (!RETVAL && self->fallback) RETVAL = safe_iconv (SvPV_nolen (self->to), SvPV_nolen (self->from), string, self->fallback); if (!RETVAL) croak ("PApp::Recode::PConv: character conversion from %s to %s failed (%s)", SvPV_nolen (self->from), SvPV_nolen (self->to), strerror (errno)); OUTPUT: RETVAL void DESTROY(self) PApp::Recode::Pconv self CODE: pconv_destroy (self);