/*------- * Module: win_unicode.c * * Description: This module contains utf8 <-> ucs2 conversion routines * under WIndows * *------- */ #ifdef UNICODE_SUPPORT #include "unicode_support.h" #include #include #include #ifdef WIN32 #define FORMAT_SIZE_T "%Iu" #else #define FORMAT_SIZE_T "%zu" #endif #if (defined(__STDC_ISO_10646__) && defined(HAVE_MBSTOWCS) && defined(HAVE_WCSTOMBS)) || defined(WIN32) #define __WCS_ISO10646__ static BOOL use_wcs = FALSE; #endif #if (defined(__STDC_UTF_16__) && defined(HAVE_UCHAR_H) && defined(HAVE_MBRTOC16) && defined(HAVE_C16RTOMB)) #define __CHAR16_UTF_16__ #include static BOOL use_c16 = FALSE; #endif static int convtype = -1; int get_convtype(void) { const UCHAR *cdt; #if defined(__WCS_ISO10646__) if (convtype < 0) { wchar_t *wdt = L"a"; int sizeof_w = sizeof(wchar_t); cdt = (UCHAR *) wdt; switch (sizeof_w) { case 2: if ('a' == cdt[0] && '\0' == cdt[1] && '\0' == cdt[2] && '\0' == cdt[3]) { MYLOG(0, " UTF-16LE detected\n"); convtype = WCSTYPE_UTF16_LE; use_wcs = TRUE; } break; case 4: if ('a' == cdt[0] && '\0' == cdt[1] && '\0' == cdt[2] && '\0' == cdt[3] && '\0' == cdt[4] && '\0' == cdt[5] && '\0' == cdt[6] && '\0' == cdt[7]) { MYLOG(0, " UTF32-LE detected\n"); convtype = WCSTYPE_UTF32_LE; use_wcs = TRUE; } break; } } #endif /* __WCS_ISO10646__ */ #ifdef __CHAR16_UTF_16__ if (convtype < 0) { char16_t *c16dt = u"a"; cdt = (UCHAR *) c16dt; if ('a' == cdt[0] && '\0' == cdt[1] && '\0' == cdt[2] && '\0' == cdt[3]) { MYLOG(0, " C16_UTF-16LE detected\n"); convtype = C16TYPE_UTF16_LE; use_c16 = TRUE; } } #endif /* __CHAR16_UTF_16__ */ if (convtype < 0) convtype = CONVTYPE_UNKNOWN; /* unknown */ return convtype; } #define byte3check 0xfffff800 #define byte2_base 0x80c0 #define byte2_mask1 0x07c0 #define byte2_mask2 0x003f #define byte3_base 0x8080e0 #define byte3_mask1 0xf000 #define byte3_mask2 0x0fc0 #define byte3_mask3 0x003f #define surrog_check 0xfc00 #define surrog1_bits 0xd800 #define surrog2_bits 0xdc00 #define byte4_base 0x808080f0 #define byte4_sr1_mask1 0x0700 #define byte4_sr1_mask2 0x00fc #define byte4_sr1_mask3 0x0003 #define byte4_sr2_mask1 0x03c0 #define byte4_sr2_mask2 0x003f #define surrogate_adjust (0x10000 >> 10) static int little_endian = -1; SQLULEN ucs2strlen(const SQLWCHAR *ucs2str) { SQLULEN len; for (len = 0; ucs2str[len]; len++) ; return len; } char *ucs2_to_utf8(const SQLWCHAR *ucs2str, SQLLEN ilen, SQLLEN *olen, BOOL lower_identifier) { char * utf8str; int len = 0; MYLOG(0, "%p ilen=" FORMAT_LEN " ", ucs2str, ilen); if (!ucs2str) { if (olen) *olen = SQL_NULL_DATA; return NULL; } if (little_endian < 0) { int crt = 1; little_endian = (0 != ((char *) &crt)[0]); } if (ilen < 0) ilen = ucs2strlen(ucs2str); MYPRINTF(0, " newlen=" FORMAT_LEN, ilen); utf8str = (char *) malloc(ilen * 4 + 1); if (utf8str) { int i = 0; UInt2 byte2code; Int4 byte4code, surrd1, surrd2; const SQLWCHAR *wstr; for (i = 0, wstr = ucs2str; i < ilen; i++, wstr++) { if (!*wstr) break; else if (0 == (*wstr & 0xffffff80)) /* ASCII */ { if (lower_identifier) utf8str[len++] = (char) tolower(*wstr); else utf8str[len++] = (char) *wstr; } else if ((*wstr & byte3check) == 0) { byte2code = byte2_base | ((byte2_mask1 & *wstr) >> 6) | ((byte2_mask2 & *wstr) << 8); if (little_endian) memcpy(utf8str + len, (char *) &byte2code, sizeof(byte2code)); else { utf8str[len] = ((char *) &byte2code)[1]; utf8str[len + 1] = ((char *) &byte2code)[0]; } len += sizeof(byte2code); } /* surrogate pair check for non ucs-2 code */ else if (surrog1_bits == (*wstr & surrog_check)) { surrd1 = (*wstr & ~surrog_check) + surrogate_adjust; wstr++; i++; surrd2 = (*wstr & ~surrog_check); byte4code = byte4_base | ((byte4_sr1_mask1 & surrd1) >> 8) | ((byte4_sr1_mask2 & surrd1) << 6) | ((byte4_sr1_mask3 & surrd1) << 20) | ((byte4_sr2_mask1 & surrd2) << 10) | ((byte4_sr2_mask2 & surrd2) << 24); if (little_endian) memcpy(utf8str + len, (char *) &byte4code, sizeof(byte4code)); else { utf8str[len] = ((char *) &byte4code)[3]; utf8str[len + 1] = ((char *) &byte4code)[2]; utf8str[len + 2] = ((char *) &byte4code)[1]; utf8str[len + 3] = ((char *) &byte4code)[0]; } len += sizeof(byte4code); } else { byte4code = byte3_base | ((byte3_mask1 & *wstr) >> 12) | ((byte3_mask2 & *wstr) << 2) | ((byte3_mask3 & *wstr) << 16); if (little_endian) memcpy(utf8str + len, (char *) &byte4code, 3); else { utf8str[len] = ((char *) &byte4code)[3]; utf8str[len + 1] = ((char *) &byte4code)[2]; utf8str[len + 2] = ((char *) &byte4code)[1]; } len += 3; } } utf8str[len] = '\0'; if (olen) *olen = len; } MYPRINTF(0, " olen=%d utf8str=%s\n", len, utf8str ? utf8str : ""); return utf8str; } #define byte3_m1 0x0f #define byte3_m2 0x3f #define byte3_m3 0x3f #define byte2_m1 0x1f #define byte2_m2 0x3f #define byte4_m1 0x07 #define byte4_m2 0x3f #define byte4_m31 0x30 #define byte4_m32 0x0f #define byte4_m4 0x3f /* * Convert a string from UTF-8 encoding to UCS-2. * * utf8str - input string in UTF-8 * ilen - length of input string in bytes (or minus) * lfconv - TRUE if line feeds (LF) should be converted to CR + LF * ucs2str - output buffer * bufcount - size of output buffer * errcheck - if TRUE, check for invalidly encoded input characters * * Returns the number of SQLWCHARs copied to output buffer. If the output * buffer is too small, the output is truncated. The output string is * NULL-terminated, except when the output is truncated. */ SQLULEN utf8_to_ucs2_lf(const char *utf8str, SQLLEN ilen, BOOL lfconv, SQLWCHAR *ucs2str, SQLULEN bufcount, BOOL errcheck) { int i; SQLULEN rtn, ocount, wcode; const UCHAR *str; MYLOG(DETAIL_LOG_LEVEL, "ilen=" FORMAT_LEN " bufcount=" FORMAT_ULEN, ilen, bufcount); if (!utf8str) return 0; MYPRINTF(DETAIL_LOG_LEVEL, " string=%s", utf8str); if (!bufcount) ucs2str = NULL; else if (!ucs2str) bufcount = 0; if (ilen < 0) ilen = strlen(utf8str); for (i = 0, ocount = 0, str = (SQLCHAR *) utf8str; i < ilen && *str;) { if ((*str & 0x80) == 0) { if (lfconv && PG_LINEFEED == *str && (i == 0 || PG_CARRIAGE_RETURN != str[-1])) { if (ocount < bufcount) ucs2str[ocount] = PG_CARRIAGE_RETURN; ocount++; } if (ocount < bufcount) ucs2str[ocount] = *str; ocount++; i++; str++; } else if (0xf8 == (*str & 0xf8)) /* more than 5 byte code */ { ocount = (SQLULEN) -1; goto cleanup; } else if (0xf0 == (*str & 0xf8)) /* 4 byte code */ { if (errcheck) { if (i + 4 > ilen || 0 == (str[1] & 0x80) || 0 == (str[2] & 0x80) || 0 == (str[3] & 0x80)) { ocount = (SQLULEN) -1; goto cleanup; } } if (ocount < bufcount) { wcode = (surrog1_bits | ((((UInt4) *str) & byte4_m1) << 8) | ((((UInt4) str[1]) & byte4_m2) << 2) | ((((UInt4) str[2]) & byte4_m31) >> 4)) - surrogate_adjust; ucs2str[ocount] = (SQLWCHAR) wcode; } ocount++; if (ocount < bufcount) { wcode = surrog2_bits | ((((UInt4) str[2]) & byte4_m32) << 6) | (((UInt4) str[3]) & byte4_m4); ucs2str[ocount] = (SQLWCHAR) wcode; } ocount++; i += 4; str += 4; } else if (0xe0 == (*str & 0xf0)) /* 3 byte code */ { if (errcheck) { if (i + 3 > ilen || 0 == (str[1] & 0x80) || 0 == (str[2] & 0x80)) { ocount = (SQLULEN) -1; goto cleanup; } } if (ocount < bufcount) { wcode = ((((UInt4) *str) & byte3_m1) << 12) | ((((UInt4) str[1]) & byte3_m2) << 6) | (((UInt4) str[2]) & byte3_m3); ucs2str[ocount] = (SQLWCHAR) wcode; } ocount++; i += 3; str += 3; } else if (0xc0 == (*str & 0xe0)) /* 2 byte code */ { if (errcheck) { if (i + 2 > ilen || 0 == (str[1] & 0x80)) { ocount = (SQLULEN) -1; goto cleanup; } } if (ocount < bufcount) { wcode = ((((UInt4) *str) & byte2_m1) << 6) | (((UInt4) str[1]) & byte2_m2); ucs2str[ocount] = (SQLWCHAR) wcode; } ocount++; i += 2; str += 2; } else { ocount = (SQLULEN) -1; goto cleanup; } } cleanup: rtn = ocount; if (ocount == (SQLULEN) -1) { if (!errcheck) rtn = 0; ocount = 0; } if (ocount < bufcount && ucs2str) ucs2str[ocount] = 0; MYPRINTF(DETAIL_LOG_LEVEL, " ocount=" FORMAT_ULEN "\n", ocount); return rtn; } #ifdef __WCS_ISO10646__ /* UCS4 => utf8 */ #define byte4check 0xffff0000 #define byte4_check 0x10000 #define byte4_mask1 0x1c0000 #define byte4_mask2 0x3f000 #define byte4_mask3 0x0fc0 #define byte4_mask4 0x003f #define byte4_m3 0x3f static SQLULEN ucs4strlen(const UInt4 *ucs4str) { SQLULEN len; for (len = 0; ucs4str[len]; len++) ; return len; } static char *ucs4_to_utf8(const UInt4 *ucs4str, SQLLEN ilen, SQLLEN *olen, BOOL lower_identifier) { char * utf8str; int len = 0; MYLOG(0, " %p ilen=" FORMAT_LEN "\n", ucs4str, ilen); if (!ucs4str) { if (olen) *olen = SQL_NULL_DATA; return NULL; } if (little_endian < 0) { int crt = 1; little_endian = (0 != ((char *) &crt)[0]); } if (ilen < 0) ilen = ucs4strlen(ucs4str); MYLOG(0, " newlen=" FORMAT_LEN "\n", ilen); utf8str = (char *) malloc(ilen * 4 + 1); if (utf8str) { int i; UInt2 byte2code; Int4 byte4code; const UInt4 *wstr; for (i = 0, wstr = ucs4str; i < ilen; i++, wstr++) { if (!*wstr) break; else if (0 == (*wstr & 0xffffff80)) /* ASCII */ { if (lower_identifier) utf8str[len++] = (char) tolower(*wstr); else utf8str[len++] = (char) *wstr; } else if ((*wstr & byte3check) == 0) { byte2code = byte2_base | ((byte2_mask1 & *wstr) >> 6) | ((byte2_mask2 & *wstr) << 8); if (little_endian) memcpy(utf8str + len, (char *) &byte2code, sizeof(byte2code)); else { utf8str[len] = ((char *) &byte2code)[1]; utf8str[len + 1] = ((char *) &byte2code)[0]; } len += sizeof(byte2code); } else if ((*wstr & byte4check) == 0) { byte4code = byte3_base | ((byte3_mask1 & *wstr) >> 12) | ((byte3_mask2 & *wstr) << 2) | ((byte3_mask3 & *wstr) << 16); if (little_endian) memcpy(utf8str + len, (char *) &byte4code, 3); else { utf8str[len] = ((char *) &byte4code)[3]; utf8str[len + 1] = ((char *) &byte4code)[2]; utf8str[len + 2] = ((char *) &byte4code)[1]; } len += 3; } else { byte4code = byte4_base | ((byte4_mask1 & *wstr) >> 18) | ((byte4_mask2 & *wstr) >> 4) | ((byte4_mask3 & *wstr) << 10) | ((byte4_mask4 & *wstr) << 24); if (little_endian) memcpy(utf8str + len, (char *) &byte4code, sizeof(byte4code)); else { utf8str[len] = ((char *) &byte4code)[3]; utf8str[len + 1] = ((char *) &byte4code)[2]; utf8str[len + 2] = ((char *) &byte4code)[1]; utf8str[len + 3] = ((char *) &byte4code)[0]; } len += sizeof(byte4code); } } utf8str[len] = '\0'; if (olen) *olen = len; } MYLOG(0, " olen=%d %s\n", len, utf8str ? utf8str : ""); return utf8str; } /* * Convert a string from UTF-8 encoding to UTF-32. * * utf8str - input string in UTF-8 * ilen - length of input string in bytes (or minus) * lfconv - TRUE if line feeds (LF) should be converted to CR + LF * ucs4str - output buffer * bufcount - size of output buffer * errcheck - if TRUE, check for invalidly encoded input characters * * Returns the number of UInt4s copied to output buffer. If the output * buffer is too small, the output is truncated. The output string is * NULL-terminated, except when the output is truncated. */ static SQLULEN utf8_to_ucs4_lf(const char *utf8str, SQLLEN ilen, BOOL lfconv, UInt4 *ucs4str, SQLULEN bufcount, BOOL errcheck) { int i; SQLULEN rtn, ocount, wcode; const UCHAR *str; MYLOG(0, " ilen=" FORMAT_LEN " bufcount=" FORMAT_ULEN "\n", ilen, bufcount); if (!utf8str) return 0; MYLOG(99, " string=%s\n", utf8str); if (!bufcount) ucs4str = NULL; else if (!ucs4str) bufcount = 0; if (ilen < 0) ilen = strlen(utf8str); for (i = 0, ocount = 0, str = (SQLCHAR *) utf8str; i < ilen && *str;) { if ((*str & 0x80) == 0) { if (lfconv && PG_LINEFEED == *str && (i == 0 || PG_CARRIAGE_RETURN != str[-1])) { if (ocount < bufcount) ucs4str[ocount] = PG_CARRIAGE_RETURN; ocount++; } if (ocount < bufcount) ucs4str[ocount] = *str; ocount++; i++; str++; } else if (0xf8 == (*str & 0xf8)) /* more than 5 byte code */ { ocount = (SQLULEN) -1; goto cleanup; } else if (0xf0 == (*str & 0xf8)) /* 4 byte code */ { if (errcheck) { if (i + 4 > ilen || 0 == (str[1] & 0x80) || 0 == (str[2] & 0x80) || 0 == (str[3] & 0x80)) { ocount = (SQLULEN) -1; goto cleanup; } } if (ocount < bufcount) { wcode = (((((UInt4) *str) & byte4_m1) << 18) | ((((UInt4) str[1]) & byte4_m2) << 12) | ((((UInt4) str[2]) & byte4_m3) << 6)) | (((UInt4) str[3]) & byte4_m4); ucs4str[ocount] = wcode; } ocount++; i += 4; str += 4; } else if (0xe0 == (*str & 0xf0)) /* 3 byte code */ { if (errcheck) { if (i + 3 > ilen || 0 == (str[1] & 0x80) || 0 == (str[2] & 0x80)) { ocount = (SQLULEN) -1; goto cleanup; } } if (ocount < bufcount) { wcode = ((((UInt4) *str) & byte3_m1) << 12) | ((((UInt4) str[1]) & byte3_m2) << 6) | (((UInt4) str[2]) & byte3_m3); ucs4str[ocount] = wcode; } ocount++; i += 3; str += 3; } else if (0xc0 == (*str & 0xe0)) /* 2 byte code */ { if (errcheck) { if (i + 2 > ilen || 0 == (str[1] & 0x80)) { ocount = (SQLULEN) -1; goto cleanup; } } if (ocount < bufcount) { wcode = ((((UInt4) *str) & byte2_m1) << 6) | (((UInt4) str[1]) & byte2_m2); ucs4str[ocount] = (SQLWCHAR) wcode; } ocount++; i += 2; str += 2; } else { ocount = (SQLULEN) -1; goto cleanup; } } cleanup: rtn = ocount; if (ocount == (SQLULEN) -1) { if (!errcheck) rtn = 0; ocount = 0; } if (ocount < bufcount && ucs4str) ucs4str[ocount] = 0; MYLOG(0, " ocount=" FORMAT_ULEN "\n", ocount); return rtn; } #define SURROGATE_CHECK 0xfc #define SURROG1_BYTE 0xd8 #define SURROG2_BYTE 0xdc static int ucs4_to_ucs2_lf(const unsigned int *ucs4str, SQLLEN ilen, SQLWCHAR *ucs2str, int bufcount, BOOL lfconv) { int outlen = 0, i; UCHAR *ucdt; SQLWCHAR *sqlwdt, dmy_wchar; UCHAR * const udt = (UCHAR *) &dmy_wchar; unsigned int uintdt; MYLOG(0, " ilen=" FORMAT_LEN " bufcount=%d\n", ilen, bufcount); if (ilen < 0) ilen = ucs4strlen(ucs4str); for (i = 0; i < ilen && (uintdt = ucs4str[i]); i++) { sqlwdt = (SQLWCHAR *)&uintdt; ucdt = (UCHAR *)&uintdt; if (0 == sqlwdt[1]) { if (lfconv && PG_LINEFEED == ucdt[0] && (i == 0 || PG_CARRIAGE_RETURN != *((UCHAR *)&ucs4str[i - 1])) ) { if (outlen < bufcount) { udt[0] = PG_CARRIAGE_RETURN; udt[1] = 0; ucs2str[outlen] = *((SQLWCHAR *) udt); } outlen++; } if (outlen < bufcount) ucs2str[outlen] = sqlwdt[0]; outlen++; continue; } sqlwdt[1]--; udt[0] = ((0xfc & ucdt[1]) >> 2) | ((0x3 & ucdt[2]) << 6); udt[1] = SURROG1_BYTE | ((0xc & ucdt[2]) >> 2); if (outlen < bufcount) ucs2str[outlen] = *((SQLWCHAR *)udt); outlen++; udt[0] = ucdt[0]; udt[1] = SURROG2_BYTE | (0x3 & ucdt[1]); if (outlen < bufcount) ucs2str[outlen] = *((SQLWCHAR *)udt); outlen++; } if (outlen < bufcount) ucs2str[outlen] = 0; return outlen; } static int ucs2_to_ucs4(const SQLWCHAR *ucs2str, SQLLEN ilen, unsigned int *ucs4str, int bufcount) { int outlen = 0, i; UCHAR *ucdt; SQLWCHAR sqlwdt; unsigned int dmy_uint; UCHAR * const udt = (UCHAR *) &dmy_uint; MYLOG(0, " ilen=" FORMAT_LEN " bufcount=%d\n", ilen, bufcount); if (ilen < 0) ilen = ucs2strlen(ucs2str); udt[3] = 0; /* always */ for (i = 0; i < ilen && (sqlwdt = ucs2str[i]); i++) { ucdt = (UCHAR *)(ucs2str + i); if ((ucdt[1] & SURROGATE_CHECK) != SURROG1_BYTE) { if (outlen < bufcount) { udt[0] = ucdt[0]; udt[1] = ucdt[1]; udt[2] = 0; ucs4str[outlen] = *((unsigned int *)udt); } outlen++; continue; } /* surrogate pair */ udt[0] = ucdt[2]; udt[1] = (ucdt[3] & 0x3) | ((ucdt[0] & 0x3f) << 2); udt[2] = (((ucdt[0] & 0xc0) >> 6) | ((ucdt[1] & 0x3) << 2)) + 1; // udt[3] = 0; needless if (outlen < bufcount) ucs4str[outlen] = *((unsigned int *)udt); outlen++; i++; } if (outlen < bufcount) ucs4str[outlen] = 0; return outlen; } #endif /* __WCS_ISO10646__ */ #if defined(__WCS_ISO10646__) static SQLULEN utf8_to_wcs_lf(const char *utf8str, SQLLEN ilen, BOOL lfconv, wchar_t *wcsstr, SQLULEN bufcount, BOOL errcheck) { switch (get_convtype()) { case WCSTYPE_UTF16_LE: return utf8_to_ucs2_lf(utf8str, ilen, lfconv, (SQLWCHAR *) wcsstr, bufcount, errcheck); case WCSTYPE_UTF32_LE: return utf8_to_ucs4_lf(utf8str, ilen, lfconv, (UInt4 *) wcsstr, bufcount, errcheck); } return -1; } static char *wcs_to_utf8(const wchar_t *wcsstr, SQLLEN ilen, SQLLEN *olen, BOOL lower_identifier) { switch (get_convtype()) { case WCSTYPE_UTF16_LE: return ucs2_to_utf8((const SQLWCHAR *) wcsstr, ilen, olen, lower_identifier); case WCSTYPE_UTF32_LE: return ucs4_to_utf8((const UInt4 *) wcsstr, ilen, olen, lower_identifier); } return NULL; } /* * Input strings must be NULL terminated. * Output wide character strings would be NULL terminated. The result * outmsg would be truncated when the buflen is small. * * The output NULL terminator is counted as buflen. * if outmsg is NULL or buflen is 0, only output length is returned. * As for return values, NULL terminators aren't counted. */ static int msgtowstr(const char *inmsg, wchar_t *outmsg, int buflen) { int outlen = -1; MYLOG(0, " inmsg=%p buflen=%d\n", inmsg, buflen); #ifdef WIN32 if (NULL == outmsg) buflen = 0; if ((outlen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED | MB_ERR_INVALID_CHARS, inmsg, -1, outmsg, buflen)) > 0) outlen--; else if (ERROR_INSUFFICIENT_BUFFER == GetLastError()) outlen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED | MB_ERR_INVALID_CHARS, inmsg, -1, NULL, 0) - 1; else outlen = -1; #else if (0 == buflen) outmsg = NULL; outlen = mbstowcs((wchar_t *) outmsg, inmsg, buflen); #endif /* WIN32 */ if (outmsg && outlen >= buflen) { outmsg[buflen - 1] = 0; MYLOG(0, " out=%dchars truncated to %d\n", outlen, buflen - 1); } MYLOG(0, " buf=%dchars out=%dchars\n", buflen, outlen); return outlen; } /* * Input wide character strings must be NULL terminated. * Output strings would be NULL terminated. The result outmsg would be * truncated when the buflen is small. * * The output NULL terminator is counted as buflen. * if outmsg is NULL or buflen is 0, only output length is returned. * As for return values, NULL terminators aren't counted. */ static int wstrtomsg(const wchar_t *wstr, char *outmsg, int buflen) { int outlen = -1; MYLOG(0, " wstr=%p buflen=%d\n", wstr, buflen); #ifdef WIN32 if (NULL == outmsg) buflen = 0; if ((outlen = WideCharToMultiByte(CP_ACP, 0, wstr, -1, outmsg, buflen, NULL, NULL)) > 0) outlen--; else if (ERROR_INSUFFICIENT_BUFFER == GetLastError()) outlen = WideCharToMultiByte(CP_ACP, 0, wstr, -1, NULL, 0, NULL, NULL) - 1; else outlen = -1; #else if (0 == buflen) outmsg = NULL; outlen = wcstombs(outmsg, wstr, buflen); #endif /* WIN32 */ if (outmsg && outlen >= buflen) { outmsg[buflen - 1] = 0; MYLOG(0, " out=%dbytes truncated to %d\n", outlen, buflen - 1); } MYLOG(0, " buf=%dbytes outlen=%dbytes\n", buflen, outlen); return outlen; } #endif /* __WCS_ISO10646__ */ #if defined(__CHAR16_UTF_16__) static mbstate_t initial_state; static SQLLEN mbstoc16_lf(char16_t *c16dt, const char *c8dt, size_t n, BOOL lf_conv) { int i; size_t brtn; const char *cdt; mbstate_t mbst = initial_state; MYLOG(0, " c16dt=%p size=%lu\n", c16dt, n); for (i = 0, cdt = c8dt; i < n || (!c16dt); i++) { if (lf_conv && PG_LINEFEED == *cdt && i > 0 && PG_CARRIAGE_RETURN != cdt[-1]) { if (c16dt) c16dt[i] = PG_CARRIAGE_RETURN; i++; } brtn = mbrtoc16(c16dt ? c16dt + i : NULL, cdt, 4, &mbst); if (0 == brtn) break; if (brtn == (size_t) -1 || brtn == (size_t) -2) return -1; if (brtn == (size_t) -3) continue; cdt += brtn; } if (c16dt && i >= n) c16dt[n - 1] = 0; return i; } static SQLLEN c16tombs(char *c8dt, const char16_t *c16dt, size_t n) { int i; SQLLEN result = 0; size_t brtn; char *cdt, c4byte[4]; mbstate_t mbst = initial_state; MYLOG(0, " c8dt=%p size=%lu\n", c8dt, n); if (!c8dt) n = 0; for (i = 0, cdt = c8dt; c16dt[i] && (result < n || (!cdt)); i++) { if (NULL != cdt && result + 4 < n) brtn = c16rtomb(cdt, c16dt[i], &mbst); else { brtn = c16rtomb(c4byte, c16dt[i], &mbst); if (brtn < 5) { SQLLEN result_n = result + brtn; if (result_n < n) memcpy(cdt, c4byte, brtn); else { if (cdt && n > 0) { c8dt[result] = '\0'; /* truncate */ return result_n; } } } } if (brtn == (size_t) -1) { if (n > 0) c8dt[n - 1] = '\0'; return -1; } if (cdt) cdt += brtn; result += brtn; } if (cdt) *cdt = '\0'; return result; } #endif /* __CHAR16_UTF_16__ */ // // SQLBindParameter SQL_C_CHAR to UTF-8 case // the current locale => UTF-8 // SQLLEN bindpara_msg_to_utf8(const char *ldt, char **wcsbuf, SQLLEN used) { SQLLEN l = (-2); char *utf8 = NULL, *ldt_nts, *alloc_nts = NULL, ntsbuf[128]; int count; if (SQL_NTS == used) { count = strlen(ldt); ldt_nts = (char *) ldt; } else if (used < 0) { return -1; } else { count = used; if (used < sizeof(ntsbuf)) ldt_nts = ntsbuf; else { if (NULL == (alloc_nts = malloc(used + 1))) return l; ldt_nts = alloc_nts; } memcpy(ldt_nts, ldt, used); ldt_nts[used] = '\0'; } get_convtype(); MYLOG(0, " \n"); #if defined(__WCS_ISO10646__) if (use_wcs) { wchar_t *wcsdt = (wchar_t *) malloc((count + 1) * sizeof(wchar_t)); if ((l = msgtowstr(ldt_nts, (wchar_t *) wcsdt, count + 1)) >= 0) utf8 = wcs_to_utf8(wcsdt, -1, &l, FALSE); free(wcsdt); } #endif /* __WCS_ISO10646__ */ #ifdef __CHAR16_UTF_16__ if (use_c16) { SQLWCHAR *utf16 = (SQLWCHAR *) malloc((count + 1) * sizeof(SQLWCHAR)); if ((l = mbstoc16_lf((char16_t *) utf16, ldt_nts, count + 1, FALSE)) >= 0) utf8 = ucs2_to_utf8(utf16, -1, &l, FALSE); free(utf16); } #endif /* __CHAR16_UTF_16__ */ if (l < 0 && NULL != utf8) free(utf8); else *wcsbuf = (char *) utf8; if (NULL != alloc_nts) free(alloc_nts); return l; } // // SQLBindParameter hybrid case // SQLWCHAR(UTF-16) => the current locale // SQLLEN bindpara_wchar_to_msg(const SQLWCHAR *utf16, char **wcsbuf, SQLLEN used) { SQLLEN l = (-2); char *ldt = NULL; SQLWCHAR *utf16_nts, *alloc_nts = NULL, ntsbuf[128]; int count; if (SQL_NTS == used) { count = ucs2strlen(utf16); utf16_nts = (SQLWCHAR *) utf16; } else if (used < 0) return -1; else { count = used / WCLEN; if (used + WCLEN <= sizeof(ntsbuf)) utf16_nts = ntsbuf; else { if (NULL == (alloc_nts = (SQLWCHAR *) malloc(used + WCLEN))) return l; utf16_nts = alloc_nts; } memcpy(utf16_nts, utf16, used); utf16_nts[count] = 0; } get_convtype(); MYLOG(0, "\n"); #if defined(__WCS_ISO10646__) if (use_wcs) { if (sizeof(SQLWCHAR) == sizeof(wchar_t)) { ldt = (char *) malloc(2 * count + 1); l = wstrtomsg((wchar_t *) utf16_nts, ldt, 2 * count + 1); } else { unsigned int *utf32 = (unsigned int *) malloc((count + 1) * sizeof(unsigned int)); l = ucs2_to_ucs4(utf16_nts, -1, utf32, count + 1); if ((l = wstrtomsg((wchar_t *)utf32, NULL, 0)) >= 0) { ldt = (char *) malloc(l + 1); l = wstrtomsg((wchar_t *)utf32, ldt, l + 1); } free(utf32); } } #endif /* __WCS_ISO10646__ */ #ifdef __CHAR16_UTF_16__ if (use_c16) { ldt = (char *) malloc(4 * count + 1); l = c16tombs(ldt, (const char16_t *) utf16_nts, 4 * count + 1); } #endif /* __CHAR16_UTF_16__ */ if (l < 0 && NULL != ldt) free(ldt); else *wcsbuf = ldt; if (NULL != alloc_nts) free(alloc_nts); return l; } size_t convert_linefeeds(const char *s, char *dst, size_t max, BOOL convlf, BOOL *changed); // // SQLBindCol hybrid case // the current locale => SQLWCHAR(UTF-16) // SQLLEN bindcol_hybrid_estimate(const char *ldt, BOOL lf_conv, char **wcsbuf) { SQLLEN l = (-2); get_convtype(); MYLOG(0, " lf_conv=%d\n", lf_conv); #if defined(__WCS_ISO10646__) if (use_wcs) { unsigned int *utf32 = NULL; if (sizeof(SQLWCHAR) == sizeof(wchar_t)) { l = msgtowstr(ldt, (wchar_t *) NULL, 0); if (l >= 0 && lf_conv) { BOOL changed; size_t len; len = convert_linefeeds(ldt, NULL, 0, TRUE, &changed); if (changed) { l += (len - strlen(ldt)); *wcsbuf = (char *) malloc(len + 1); convert_linefeeds(ldt, *wcsbuf, len + 1, TRUE, NULL); } } } else { int count = strlen(ldt); utf32 = (unsigned int *) malloc((count + 1) * sizeof(unsigned int)); if ((l = msgtowstr(ldt, (wchar_t *) utf32, count + 1)) >= 0) { l = ucs4_to_ucs2_lf(utf32, -1, NULL, 0, lf_conv); *wcsbuf = (char *) utf32; } } if (l < 0 && NULL != utf32) free(utf32); } #endif /* __WCS_ISO10646__ */ #ifdef __CHAR16_UTF_16__ if (use_c16) l = mbstoc16_lf((char16_t *) NULL, ldt, 0, lf_conv); #endif /* __CHAR16_UTF_16__ */ return l; } SQLLEN bindcol_hybrid_exec(SQLWCHAR *utf16, const char *ldt, size_t n, BOOL lf_conv, char **wcsbuf) { SQLLEN l = (-2); get_convtype(); MYLOG(0, " size=" FORMAT_SIZE_T " lf_conv=%d\n", n, lf_conv); #if defined(__WCS_ISO10646__) if (use_wcs) { unsigned int *utf32 = NULL; BOOL midbuf = (wcsbuf && *wcsbuf); if (sizeof(SQLWCHAR) == sizeof(wchar_t)) { if (midbuf) l = msgtowstr(*wcsbuf, (wchar_t *) utf16, n); else l = msgtowstr(ldt, (wchar_t *) utf16, n); } else if (midbuf) { utf32 = (unsigned int *) *wcsbuf; l = ucs4_to_ucs2_lf(utf32, -1, utf16, n, lf_conv); } else { int count = strlen(ldt); utf32 = (unsigned int *) malloc((count + 1) * sizeof(unsigned int)); if ((l = msgtowstr(ldt, (wchar_t *) utf32, count + 1)) >= 0) { l = ucs4_to_ucs2_lf(utf32, -1, utf16, n, lf_conv); } free(utf32); } if (midbuf) { free(*wcsbuf); *wcsbuf = NULL; } } #endif /* __WCS_ISO10646__ */ #ifdef __CHAR16_UTF_16__ if (use_c16) { l = mbstoc16_lf((char16_t *) utf16, ldt, n, lf_conv); } #endif /* __CHAR16_UTF_16__ */ return l; } SQLLEN locale_to_sqlwchar(SQLWCHAR *utf16, const char *ldt, size_t n, BOOL lf_conv) { return bindcol_hybrid_exec(utf16, ldt, n, lf_conv, NULL); } // // SQLBindCol localize case // UTF-8 => the current locale // SQLLEN bindcol_localize_estimate(const char *utf8dt, BOOL lf_conv, char **wcsbuf) { SQLLEN l = (-2); char *convalc = NULL; get_convtype(); MYLOG(0, " lf_conv=%d\n", lf_conv); #if defined(__WCS_ISO10646__) if (use_wcs) { wchar_t *wcsalc = NULL; l = utf8_to_wcs_lf(utf8dt, -1, lf_conv, NULL, 0, FALSE); wcsalc = (wchar_t *) malloc(sizeof(wchar_t) * (l + 1)); convalc = (char *) wcsalc; l = utf8_to_wcs_lf(utf8dt, -1, lf_conv, wcsalc, l + 1, FALSE); l = wstrtomsg(wcsalc, NULL, 0); } #endif /* __WCS_ISO10646__ */ #ifdef __CHAR16_UTF_16__ if (use_c16) { SQLWCHAR *wcsalc = NULL; l = utf8_to_ucs2_lf(utf8dt, -1, lf_conv, (SQLWCHAR *) NULL, 0, FALSE); wcsalc = (SQLWCHAR *) malloc(sizeof(SQLWCHAR) * (l + 1)); convalc = (char *) wcsalc; l = utf8_to_ucs2_lf(utf8dt, -1, lf_conv, wcsalc, l + 1, FALSE); l = c16tombs(NULL, (char16_t *) wcsalc, 0); } #endif /* __CHAR16_UTF_16__ */ if (l < 0 && NULL != convalc) free(convalc); else if (NULL != convalc) *wcsbuf = (char *) convalc; MYLOG(0, " return=" FORMAT_LEN "\n", l); return l; } SQLLEN bindcol_localize_exec(char *ldt, size_t n, BOOL lf_conv, char **wcsbuf) { SQLLEN l = (-2); get_convtype(); MYLOG(0, " size=" FORMAT_SIZE_T "\n", n); #if defined(__WCS_ISO10646__) if (use_wcs) { wchar_t *wcsalc = (wchar_t *) *wcsbuf; l = wstrtomsg(wcsalc, ldt, n); } #endif /* __WCS_ISO10646__ */ #ifdef __CHAR16_UTF_16__ if (use_c16) { char16_t *wcsalc = (char16_t *) *wcsbuf; l = c16tombs(ldt, (char16_t *) wcsalc, n); } #endif /* __CHAR16_UTF_16__ */ free(*wcsbuf); *wcsbuf = NULL; MYLOG(0, " return=" FORMAT_LEN "\n", l); return l; } SQLLEN utf8_to_locale(char *ldt, const char *utf8dt, size_t n, BOOL lf_conv) { SQLLEN l; char * tmpbuf; if ((l = bindcol_localize_estimate(utf8dt, lf_conv, &tmpbuf)) >= 0) l = bindcol_localize_exec(ldt, n, lf_conv, &tmpbuf); return l; } #endif /* UNICODE_SUPPORT */