- 618 名前: << prefix_len - 1) - 1 << 1
{prefix: prefix, body: byte - (prefix << 8 - prefix_len)} end def utf8_codepoints(bytes) ary = bytes.map{|e| divide_prefix(e)} leading_bytes = ary.select{|dc| dc[:prefix] != 0x02} leading_bytes.map do |lb| utf8_char = ary.shift(lb[:prefix] == 0 ? 1 : lb[:prefix].bit_length - 1) raise 'invalid sequence' if utf8_char[1..-1]&.any?{|dc| dc[:prefix] != 0x02} utf8_char.reverse_each.with_index.map{|dc, i| dc[:body] << 6 * i}.sum end end [ '6F 64 61 69', 'E3 81 8A E9 A1 8C', 'C2 A9 F0 9F 8D 94 E9 A6 99 41' ].each{|str| puts utf8_codepoints(str.split.map(&:hex)).map{|e| 'U+%04X' % e}.join(' ')} # => U+006F U+0064 U+0061 U+0069 U+304A U+984C U+00A9 U+1F354 U+9999 U+0041 [] - [ここ壊れてます]
|

|