星期二, 5月 25, 2021

Unicode escape to utf-8

int get_uint8(int h, int l)
{
    int ret;

    if (h - '0' < 10)
        ret = h - '0';
    else if (h - 'A' < 6)
        ret = h - 'A' + 0x0A;
    else if (h - 'a' < 6)
        ret = h - 'a' + 0x0A;

    ret = ret << 4;

    if (l - '0' < 10)
        ret |= l - '0';
    else if (l - 'A' < 6)
        ret |= l - 'A' + 0x0A;
    else if (l - 'a' < 6)
        ret |= l - 'a' + 0x0A;
    return  ret;
}


AnsiString UniToAnsi(char *s)
{
    wchar_t *ws=new wchar_t(strlen(s));
    int c=0;
    while (*s !='\0' )
    {
        if (*s == '\\')
        {
            if (strlen(s) > 5)
            {
                if (*(s + 1) == 'u')
                {
                    unsigned int v = get_uint8(*(s + 2), *(s + 3)) << 8;
                    v |= get_uint8(*(s + 4), *(s + 5));

                    s += 6;
                    ws[c]=wchar_t(v);
                    c++;
                    continue;
                }
            }
        }
        s++;
    }
    ws[c]='\0';
    AnsiString ret=UTF8Decode(UTF8Encode(ws));
    delete ws;
    return ret;
}


Input: char *str="\u82b3TEST\u8349\u827e\u723e\u7cbe\u91c0\u5564\u9152";
Edit2->Text=UniToAnsi(Edit1->Text.c_str());