Not logged in.  Login/Logout/Register | List snippets | | Create snippet | Upload image | Upload data

91
LINES

< > TinyBrain | #1009203 - fastu_fromUtf8 - faster version of fromUtf8

JavaX fragment (include)

static S fastu_fromUtf8(byte[] data) {
  char[] chars = new char[data.length];
  int len = 0, offset = 0;
  while (offset < data.length) {
    if (data[offset] >= 0) {
      // 0xxxxxxx - it is an ASCII char, so copy it exactly as it is
      chars[len] = (char) data[offset];
      len++;
      offset++;
    } else {
      int uc = 0;
      if ((data[offset] & 0xE0) == 0xC0) {
        uc = (int) (data[offset] & 0x1F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
      } else if ((data[offset] & 0xF0) == 0xE0) {
        uc = (int) (data[offset] & 0x0F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
      } else if ((data[offset] & 0xF8) == 0xF0) {
        uc = (int) (data[offset] & 0x07);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
      } else if ((data[offset] & 0xFC) == 0xF8) {
        uc = (int) (data[offset] & 0x03);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
      } else if ((data[offset] & 0xFE) == 0xFC) {
        uc = (int) (data[offset] & 0x01);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
        uc <<= 6;
        uc |= (int) (data[offset] & 0x3F);
        offset++;
      }

      len = fastu_fromUtf8_toChars(uc, chars, len);
    }
  }
  ret new S(chars, 0, len);
}

static int fastu_fromUtf8_toChars(int codePoint, char[] dst, int index) {
  if (codePoint < 0 || codePoint > Character.MAX_CODE_POINT)
    throw new IllegalArgumentException;
  if (codePoint < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
    dst[index] = (char) codePoint;
    ret ++index;
  }
  int offset = codePoint - Character.MIN_SUPPLEMENTARY_CODE_POINT;
  dst[index + 1] = (char) ((offset & 0x3ff) + Character.MIN_LOW_SURROGATE);
  dst[index] = (char) ((offset >>> 10) + Character.MIN_HIGH_SURROGATE);
  ret index + 2;
}

download  show line numbers  debug dex   

Travelled to 2 computer(s): cfunsshuasjs, tvejysmllsmz

No comments. add comment

Snippet ID: #1009203
Snippet name: fastu_fromUtf8 - faster version of fromUtf8
Eternal ID of this version: #1009203/3
Text MD5: b6d70558eee40237c1b8d47ed1182d8f
Author: stefan
Category: javax
Type: JavaX fragment (include)
Public (visible to everyone): Yes
Archived (hidden from active list): No
Created/modified: 2017-07-16 16:20:56
Source code size: 2781 bytes / 91 lines
Pitched / IR pitched: No / No
Views / Downloads: 16 / 17
Version history: 2 change(s)
Referenced in: [show]