Blog
网站首页
Unicode Scalar value转utf-8编码
Unicode Scalar value转utf-8编码
2023-03-30 21:08
2023-03-30 21:08
作者:
xmh0511
提交
````rust fn unicode_to_utf8(code: u32) -> Vec
{ match code { 0..=0x7f => [code as u8].into(), 0x80..=0x7FF => { // 2 byes let unit_low = ((0b00111111 & code) | 0b10000000) as u8; let code = code >> 6; let unit_high = ((0b00111111 & code) | 0b11000000) as u8; [unit_high, unit_low].into() } 0x800..=0xFFFF => { // 3 bytes let mut result = Vec::new(); let mut code = code; for _ in 0..2 { let unit_low = ((0b00111111 & code) | 0b10000000) as u8; result.push(unit_low); code = code >> 6; } let unit_high = ((0b00111111 & code) | 0b11100000) as u8; result.push(unit_high); result.reverse(); result } 0x10000..=0x1FFFFF => { // 4 bytes let mut result = Vec::new(); let mut code = code; for _ in 0..3 { let unit_low = ((0b00111111 & code) | 0b10000000) as u8; result.push(unit_low); code = code >> 6; } let unit_high = ((0b00111111 & code) | 0b11110000) as u8; result.push(unit_high); result.reverse(); result } 0x200000..=0x3FFFFFF => { // 5 bytes let mut result = Vec::new(); let mut code = code; for _ in 0..4 { let unit_low = ((0b00111111 & code) | 0b10000000) as u8; result.push(unit_low); code = code >> 6; } let unit_high = ((0b00111111 & code) | 0b11111000) as u8; result.push(unit_high); result.reverse(); result } 0x4000000..=0x7FFFFFFF => { // 6 bytes let mut result = Vec::new(); let mut code = code; for _ in 0..5 { let unit_low = ((0b00111111 & code) | 0b10000000) as u8; result.push(unit_low); code = code >> 6; } let unit_high = ((0b00111111 & code) | 0b11111100) as u8; result.push(unit_high); result.reverse(); result } _ => { panic!("cannot be represented in unicode scalar values"); } } } fn main() { // let bytes = [0xce_u8,0xb3_u8]; // let r = std::str::from_utf8(&bytes).unwrap(); // println!("{r}"); // let v:Vec
= r.chars().collect(); // let r = char::from_str(r).unwrap(); // println!("{r}"); let r = unicode_to_utf8(0x1300); println!("{r:X?}"); } ````