gecko-dev/servo/components/util/str.rs

360 строки
9.8 KiB
Rust

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use cssparser::{self, Color, RGBA};
use geometry::Au;
use libc::c_char;
use num_lib::ToPrimitive;
use std::ascii::AsciiExt;
use std::borrow::ToOwned;
use std::ffi::CStr;
use std::iter::Filter;
use std::ops::Deref;
use std::str::{FromStr, Split, from_utf8};
pub type DOMString = String;
pub type StaticCharVec = &'static [char];
pub type StaticStringVec = &'static [&'static str];
/// Whitespace as defined by HTML5 § 2.4.1.
// TODO(SimonSapin) Maybe a custom Pattern can be more efficient?
const WHITESPACE: &'static [char] = &[' ', '\t', '\x0a', '\x0c', '\x0d'];
pub fn is_whitespace(s: &str) -> bool {
s.chars().all(char_is_whitespace)
}
#[inline]
pub fn char_is_whitespace(c: char) -> bool {
WHITESPACE.contains(&c)
}
/// A "space character" according to:
///
/// https://html.spec.whatwg.org/multipage/#space-character
pub static HTML_SPACE_CHARACTERS: StaticCharVec = &[
'\u{0020}',
'\u{0009}',
'\u{000a}',
'\u{000c}',
'\u{000d}',
];
pub fn split_html_space_chars<'a>(s: &'a str) ->
Filter<Split<'a, StaticCharVec>, fn(&&str) -> bool> {
fn not_empty(&split: &&str) -> bool { !split.is_empty() }
s.split(HTML_SPACE_CHARACTERS).filter(not_empty as fn(&&str) -> bool)
}
/// Shared implementation to parse an integer according to
/// <https://html.spec.whatwg.org/#rules-for-parsing-integers> or
/// <https://html.spec.whatwg.org/#rules-for-parsing-non-negative-integers>
fn do_parse_integer<T: Iterator<Item=char>>(input: T) -> Option<i64> {
fn is_ascii_digit(c: &char) -> bool {
match *c {
'0'...'9' => true,
_ => false,
}
}
let mut input = input.skip_while(|c| {
HTML_SPACE_CHARACTERS.iter().any(|s| s == c)
}).peekable();
let sign = match input.peek() {
None => return None,
Some(&'-') => {
input.next();
-1
},
Some(&'+') => {
input.next();
1
},
Some(_) => 1,
};
match input.peek() {
Some(c) if is_ascii_digit(c) => (),
_ => return None,
}
let value = input.take_while(is_ascii_digit).map(|d| {
d as i64 - '0' as i64
}).fold(Some(0i64), |accumulator, d| {
accumulator.and_then(|accumulator| {
accumulator.checked_mul(10)
}).and_then(|accumulator| {
accumulator.checked_add(d)
})
});
return value.and_then(|value| value.checked_mul(sign));
}
/// Parse an integer according to
/// <https://html.spec.whatwg.org/#rules-for-parsing-integers>.
pub fn parse_integer<T: Iterator<Item=char>>(input: T) -> Option<i32> {
do_parse_integer(input).and_then(|result| {
result.to_i32()
})
}
/// Parse an integer according to
/// <https://html.spec.whatwg.org/#rules-for-parsing-non-negative-integers>
pub fn parse_unsigned_integer<T: Iterator<Item=char>>(input: T) -> Option<u32> {
do_parse_integer(input).and_then(|result| {
result.to_u32()
})
}
#[derive(Copy, Clone, Debug)]
pub enum LengthOrPercentageOrAuto {
Auto,
Percentage(f32),
Length(Au),
}
/// Parses a length per HTML5 § 2.4.4.4. If unparseable, `Auto` is returned.
pub fn parse_length(mut value: &str) -> LengthOrPercentageOrAuto {
value = value.trim_left_matches(WHITESPACE);
if value.is_empty() {
return LengthOrPercentageOrAuto::Auto
}
if value.starts_with("+") {
value = &value[1..]
}
value = value.trim_left_matches('0');
if value.is_empty() {
return LengthOrPercentageOrAuto::Auto
}
let mut end_index = value.len();
let (mut found_full_stop, mut found_percent) = (false, false);
for (i, ch) in value.chars().enumerate() {
match ch {
'0'...'9' => continue,
'%' => {
found_percent = true;
end_index = i;
break
}
'.' if !found_full_stop => {
found_full_stop = true;
continue
}
_ => {
end_index = i;
break
}
}
}
value = &value[..end_index];
if found_percent {
let result: Result<f32, _> = FromStr::from_str(value);
match result {
Ok(number) => return LengthOrPercentageOrAuto::Percentage((number as f32) / 100.0),
Err(_) => return LengthOrPercentageOrAuto::Auto,
}
}
match FromStr::from_str(value) {
Ok(number) => LengthOrPercentageOrAuto::Length(Au::from_px(number)),
Err(_) => LengthOrPercentageOrAuto::Auto,
}
}
/// Parses a legacy color per HTML5 § 2.4.6. If unparseable, `Err` is returned.
pub fn parse_legacy_color(mut input: &str) -> Result<RGBA, ()> {
// Steps 1 and 2.
if input.is_empty() {
return Err(())
}
// Step 3.
input = input.trim_matches(WHITESPACE);
// Step 4.
if input.eq_ignore_ascii_case("transparent") {
return Err(())
}
// Step 5.
match cssparser::parse_color_keyword(input) {
Ok(Color::RGBA(rgba)) => return Ok(rgba),
_ => {}
}
// Step 6.
if input.len() == 4 {
match (input.as_bytes()[0],
hex(input.as_bytes()[1] as char),
hex(input.as_bytes()[2] as char),
hex(input.as_bytes()[3] as char)) {
(b'#', Ok(r), Ok(g), Ok(b)) => {
return Ok(RGBA {
red: (r as f32) * 17.0 / 255.0,
green: (g as f32) * 17.0 / 255.0,
blue: (b as f32) * 17.0 / 255.0,
alpha: 1.0,
})
}
_ => {}
}
}
// Step 7.
let mut new_input = String::new();
for ch in input.chars() {
if ch as u32 > 0xffff {
new_input.push_str("00")
} else {
new_input.push(ch)
}
}
let mut input = &*new_input;
// Step 8.
for (char_count, (index, _)) in input.char_indices().enumerate() {
if char_count == 128 {
input = &input[..index];
break
}
}
// Step 9.
if input.as_bytes()[0] == b'#' {
input = &input[1..]
}
// Step 10.
let mut new_input = Vec::new();
for ch in input.chars() {
if hex(ch).is_ok() {
new_input.push(ch as u8)
} else {
new_input.push(b'0')
}
}
let mut input = new_input;
// Step 11.
while input.is_empty() || (input.len() % 3) != 0 {
input.push(b'0')
}
// Step 12.
let mut length = input.len() / 3;
let (mut red, mut green, mut blue) = (&input[..length],
&input[length..length * 2],
&input[length * 2..]);
// Step 13.
if length > 8 {
red = &red[length - 8..];
green = &green[length - 8..];
blue = &blue[length - 8..];
length = 8
}
// Step 14.
while length > 2 && red[0] == b'0' && green[0] == b'0' && blue[0] == b'0' {
red = &red[1..];
green = &green[1..];
blue = &blue[1..];
length -= 1
}
// Steps 15-20.
return Ok(RGBA {
red: hex_string(red).unwrap() as f32 / 255.0,
green: hex_string(green).unwrap() as f32 / 255.0,
blue: hex_string(blue).unwrap() as f32 / 255.0,
alpha: 1.0,
});
fn hex(ch: char) -> Result<u8, ()> {
match ch {
'0'...'9' => Ok((ch as u8) - b'0'),
'a'...'f' => Ok((ch as u8) - b'a' + 10),
'A'...'F' => Ok((ch as u8) - b'A' + 10),
_ => Err(()),
}
}
fn hex_string(string: &[u8]) -> Result<u8, ()> {
match string.len() {
0 => Err(()),
1 => hex(string[0] as char),
_ => {
let upper = try!(hex(string[0] as char));
let lower = try!(hex(string[1] as char));
Ok((upper << 4) | lower)
}
}
}
}
#[derive(Clone, Eq, PartialEq, Hash, Debug)]
pub struct LowercaseString {
inner: String,
}
impl LowercaseString {
pub fn new(s: &str) -> LowercaseString {
LowercaseString {
inner: s.to_lowercase(),
}
}
}
impl Deref for LowercaseString {
type Target = str;
#[inline]
fn deref(&self) -> &str {
&*self.inner
}
}
/// Creates a String from the given null-terminated buffer.
/// Panics if the buffer does not contain UTF-8.
pub unsafe fn c_str_to_string(s: *const c_char) -> String {
from_utf8(CStr::from_ptr(s).to_bytes()).unwrap().to_owned()
}
pub fn str_join<T: AsRef<str>>(strs: &[T], join: &str) -> String {
strs.iter().fold(String::new(), |mut acc, s| {
if !acc.is_empty() { acc.push_str(join); }
acc.push_str(s.as_ref());
acc
})
}
// Lifted from Rust's StrExt implementation, which is being removed.
pub fn slice_chars(s: &str, begin: usize, end: usize) -> &str {
assert!(begin <= end);
let mut count = 0;
let mut begin_byte = None;
let mut end_byte = None;
// This could be even more efficient by not decoding,
// only finding the char boundaries
for (idx, _) in s.char_indices() {
if count == begin { begin_byte = Some(idx); }
if count == end { end_byte = Some(idx); break; }
count += 1;
}
if begin_byte.is_none() && count == begin { begin_byte = Some(s.len()) }
if end_byte.is_none() && count == end { end_byte = Some(s.len()) }
match (begin_byte, end_byte) {
(None, _) => panic!("slice_chars: `begin` is beyond end of string"),
(_, None) => panic!("slice_chars: `end` is beyond end of string"),
(Some(a), Some(b)) => unsafe { s.slice_unchecked(a, b) }
}
}