resolution for bug 1010313

resolution for bug 1010313

am 29.07.2008 09:36:26 von Marcin Ligorowski

I've resolved bug number 1010313 (it was neccessary for me because I use ps=
qlodbc driver=20
on Sun Solaris Sparc with UTF-8 characters).

Top resolve this bug I've added is_big_endian() that determines whether pla=
tform supports
big endian or little endian bytes order.
This method is also used becuase it is not possible to use any standard def=
ine,
that is "standard define" and works for different compiler and operating sy=
stems.

Additionaly, becuase I don't want to calculate every time in ucs2_to_utf8 f=
unction
platform endianness I've used two static variables, first big_endian stores=
endianness status
and second is_endian_calc determines whether endianness was calculated.

And finally, in ucs2_to_utf8 I've added swapping bytes for big_endian platf=
orm.

Below is source code that I've implemented
(I've send all ucs2_to_utf8 function source code)

int is_big_endian()
{
union { long l; char c[sizeof (long)]; } u;
u.l =3D 1;=20
return (u.c[sizeof (long) - 1] == 1);
}

char *ucs2_to_utf8(const SQLWCHAR *ucs2str, SQLLEN ilen, SQLLEN *olen, BOOL=
lower_identifier)
{
char * utf8str;
/*mylog("ucs2_to_utf8 %p ilen=3D%d ", ucs2str, ilen);*/

/*
* is this a bigendian machine ?
*/
static int is_endian_calc =3D 0;
static int big_endian =3D 0;
if(!is_endian_calc)
{
big_endian =3D is_big_endian();
is_endian_calc =3D 1;
}

if (!ucs2str)
{
*olen =3D SQL_NULL_DATA;
return NULL;
}
if (SQL_NTS == ilen)
ilen =3D ucs2strlen(ucs2str);
/*mylog(" newlen=3D%d", ilen);*/
utf8str =3D (char *) malloc(ilen * 4 + 1);
if (utf8str)
{
int i, len =3D 0;
union { UInt2 i; char c[sizeof (UInt2)]; } byte2code;
union { Int4 i; char c[sizeof (Int4)]; } byte4code, surrd1, surrd2;
const SQLWCHAR *wstr;

for (i =3D 0, wstr =3D ucs2str; i < ilen; i++, wstr++)
{
if (!*wstr)
break;
else if (0 == (*wstr & 0xffffff80)) /* ASCII */
{
if (lower_identifier)
utf8str[len++] =3D (char) tolower(*wstr);
else
utf8str[len++] =3D (char) *wstr;
}
else if ((*wstr & byte3check) == 0)
{
byte2code.i =3D byte2_base |
((byte2_mask1 & *wstr) >> 6) |
((byte2_mask2 & *wstr) << 8);
if(big_endian)
{
memcpy(utf8str + len, (char *) &byte2code.c[1], 1);
memcpy(utf8str + len + 1, (char *) &byte2code.c[0], 1);
}
else
{
memcpy(utf8str + len, (char *) &byte2code.i, sizeof(byte2code.i));
}
len +=3D sizeof(byte2code.i);=20
}
/* surrogate pair check for non ucs-2 code */=20
else if (surrog1_bits == (*wstr & surrog_check))
{
surrd1.i =3D (*wstr & ~surrog_check) + surrogate_adjust;
wstr++;
i++;
surrd2.i =3D (*wstr & ~surrog_check);
byte4code.i =3D byte4_base |
((byte4_sr1_mask1 & surrd1.i) >> 8) |
((byte4_sr1_mask2 & surrd1.i) << 6) |
((byte4_sr1_mask3 & surrd1.i) << 20) |
((byte4_sr2_mask1 & surrd2.i) << 10) |
((byte4_sr2_mask2 & surrd2.i) << 24);
if(big_endian)
{
memcpy(utf8str + len, (char *) &byte2code.c[3], 1);
memcpy(utf8str + len + 1, (char *) &byte2code.c[2], 1);
memcpy(utf8str + len + 2, (char *) &byte2code.c[1], 1);
memcpy(utf8str + len + 3, (char *) &byte2code.c[0], 1);
}
else
{
memcpy(utf8str + len, (char *) &byte4code.i, sizeof(byte4code.i));
}
len +=3D sizeof(byte4code.i);
}
else
{
byte4code.i =3D byte3_base |
((byte3_mask1 & *wstr) >> 12) |=20
((byte3_mask2 & *wstr) << 2) |=20
((byte3_mask3 & *wstr) << 16);
if(big_endian)
{
memcpy(utf8str + len, (char *) &byte2code.c[3], 1);
memcpy(utf8str + len + 1, (char *) &byte2code.c[2], 1);
memcpy(utf8str + len + 2, (char *) &byte2code.c[1], 1);
}
else
{
memcpy(utf8str + len, (char *) &byte4code.i, 3);
}
len +=3D 3;
}
}=20
utf8str[len] =3D '\0';
if (olen)
*olen =3D len;
}
/*mylog(" olen=3D%d %s\n", *olen, utf8str ? utf8str : "");*/
return utf8str;
}

------------------------------------------------------------ ----------
W kosciele tez zdarzaja sie wpadki!
Smieszny filmik >>> http://link.interia.pl/f1e61


--=20
Sent via pgsql-odbc mailing list (pgsql-odbc@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-odbc

Re: resolution for bug 1010313

am 08.08.2008 23:03:14 von Adam M

On Tue, Jul 29, 2008 at 2:36 AM, Marcin Ligorowski wrote:

> Below is source code that I've implemented
> (I've send all ucs2_to_utf8 function source code)
>

Is it possible to attach your changes as a patch? In-line posting of
some code changes (and in non-patch format) really is not an ideal way
to post changes.

Thanks,
Adam

--
Sent via pgsql-odbc mailing list (pgsql-odbc@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-odbc

Re: resolution for bug 1010313

am 19.08.2008 20:35:36 von Marcin Ligorowski

Bellow are changes in patch format obtained by using diff utility.

Marcin

--- win_unicode.c.org Sat Sep 1 01:40:10 2007
+++ win_unicode.c Fri Jul 25 12:52:06 2008
@@ -54,6 +54,13 @@

+int is_big_endian()
+{
+ union { long l; char c[sizeof (long)]; } u;
+ u.l = 1;
+ return (u.c[sizeof (long) - 1] == 1);
+}
+
SQLULEN ucs2strlen(const SQLWCHAR *ucs2str)
{
SQLULEN len;
@@ -66,6 +73,17 @@
char * utf8str;

+ static int is_endian_calc = 0;
+ static int big_endian = 0;
+ if(!is_endian_calc)
+ {
+ big_endian = is_big_endian();
+ is_endian_calc = 1;
+ }
+
if (!ucs2str)
{
*olen = SQL_NULL_DATA;
@@ -78,8 +96,8 @@
if (utf8str)
{
int i, len = 0;
- UInt2 byte2code;
- Int4 byte4code, surrd1, surrd2;
+ union { UInt2 i; char c[sizeof (UInt2)]; } byte2code;
+ union { Int4 i; char c[sizeof (Int4)]; } byte4code, surrd1, surrd2;
const SQLWCHAR *wstr;

for (i = 0, wstr = ucs2str; i < ilen; i++, wstr++)
@@ -95,35 +113,62 @@
}
else if ((*wstr & byte3check) == 0)
{
- byte2code = byte2_base |
+ byte2code.i = byte2_base |
((byte2_mask1 & *wstr) >> 6) |
((byte2_mask2 & *wstr) << 8);
- memcpy(utf8str + len, (char *) &byte2code, sizeof(byte2code));
- len += sizeof(byte2code);
+ if(big_endian)
+ {
+ memcpy(utf8str + len, (char *) &byte2code.c[1], 1);
+ memcpy(utf8str + len + 1, (char *) &byte2code.c[0], 1);
}
+ else
+ {
+ memcpy(utf8str + len, (char *) &byte2code.i,
sizeof(byte2code.i));
+ }
+ len += sizeof(byte2code.i);
+ }
else if (surrog1_bits == (*wstr & surrog_check))
{
- surrd1 = (*wstr & ~surrog_check) + surrogate_adjust;
+ surrd1.i = (*wstr & ~surrog_check) + surrogate_adjust;
wstr++;
i++;
- surrd2 = (*wstr & ~surrog_check);
- byte4code = byte4_base |
- ((byte4_sr1_mask1 & surrd1) >> 8) |
- ((byte4_sr1_mask2 & surrd1) << 6) |
- ((byte4_sr1_mask3 & surrd1) << 20) |
- ((byte4_sr2_mask1 & surrd2) << 10) |
- ((byte4_sr2_mask2 & surrd2) << 24);
- memcpy(utf8str + len, (char *) &byte4code, sizeof(byte4code));
- len += sizeof(byte4code);
+ surrd2.i = (*wstr & ~surrog_check);
+ byte4code.i = byte4_base |
+ ((byte4_sr1_mask1 & surrd1.i) >> 8) |
+ ((byte4_sr1_mask2 & surrd1.i) << 6) |
+ ((byte4_sr1_mask3 & surrd1.i) << 20) |
+ ((byte4_sr2_mask1 & surrd2.i) << 10) |
+ ((byte4_sr2_mask2 & surrd2.i) << 24);
+ if(big_endian)
+ {
+ memcpy(utf8str + len, (char *) &byte2code.c[3], 1);
+ memcpy(utf8str + len + 1, (char *) &byte2code.c[2], 1);
+ memcpy(utf8str + len + 2, (char *) &byte2code.c[1], 1);
+ memcpy(utf8str + len + 3, (char *) &byte2code.c[0], 1);
}
else
{
- byte4code = byte3_base |
+ memcpy(utf8str + len, (char *) &byte4code.i,
sizeof(byte4code.i));
+ }
+ len += sizeof(byte4code.i);
+ }
+ else
+ {
+ byte4code.i = byte3_base |
((byte3_mask1 & *wstr) >> 12) |
((byte3_mask2 & *wstr) << 2) |
((byte3_mask3 & *wstr) << 16);
- memcpy(utf8str + len, (char *) &byte4code, 3);
+ if(big_endian)
+ {
+ memcpy(utf8str + len, (char *) &byte2code.c[3], 1);
+ memcpy(utf8str + len + 1, (char *) &byte2code.c[2], 1);
+ memcpy(utf8str + len + 2, (char *) &byte2code.c[1], 1);
+ }
+ else
+ {
+ memcpy(utf8str + len, (char *) &byte4code.i, 3);
+ }
len += 3;
}
}

Adam M pisze:
> On Tue, Jul 29, 2008 at 2:36 AM, Marcin Ligorowski wrote:
>
>> Below is source code that I've implemented
>> (I've send all ucs2_to_utf8 function source code)
>>
>
> Is it possible to attach your changes as a patch? In-line posting of
> some code changes (and in non-patch format) really is not an ideal way
> to post changes.
>
> Thanks,
> Adam
>

------------------------------------------------------------ ----------
Igrzyska z nagrodami! Kliknij>>>> http://link.interia.pl/f1edb




--
Sent via pgsql-odbc mailing list (pgsql-odbc@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-odbc