Skip to content

Reduce the size of Token #159

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jun 16, 2017
Merged
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

name = "cssparser"
version = "0.14.0"
version = "0.15.0"
authors = [ "Simon Sapin <[email protected]>" ]

description = "Rust implementation of CSS Syntax Level 3"
Expand Down
23 changes: 11 additions & 12 deletions src/color.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ use std::fmt;
use std::f32::consts::PI;

use super::{Token, Parser, ToCss, ParseError, BasicParseError};
use tokenizer::NumericValue;

#[cfg(feature = "serde")]
use serde::{Deserialize, Deserializer, Serialize, Serializer};
Expand Down Expand Up @@ -430,11 +429,11 @@ fn parse_color_function<'i, 't>(name: &str, arguments: &mut Parser<'i, 't>) -> R
};
let token = try!(arguments.next());
match token {
Token::Number(NumericValue { value: v, .. }) => {
Token::Number { value: v, .. } => {
clamp_unit_f32(v)
}
Token::Percentage(ref v) => {
clamp_unit_f32(v.unit_value)
Token::Percentage { unit_value: v, .. } => {
clamp_unit_f32(v)
}
t => {
return Err(BasicParseError::UnexpectedToken(t))
Expand All @@ -459,10 +458,10 @@ fn parse_rgb_components_rgb<'i, 't>(arguments: &mut Parser<'i, 't>) -> Result<(u
// Either integers or percentages, but all the same type.
// https://drafts.csswg.org/css-color/#rgb-functions
match try!(arguments.next()) {
Token::Number(NumericValue { value: v, .. }) => {
Token::Number { value: v, .. } => {
red = clamp_floor_256_f32(v);
green = clamp_floor_256_f32(match try!(arguments.next()) {
Token::Number(NumericValue { value: v, .. }) => v,
Token::Number { value: v, .. } => v,
Token::Comma => {
uses_commas = true;
try!(arguments.expect_number())
Expand All @@ -474,10 +473,10 @@ fn parse_rgb_components_rgb<'i, 't>(arguments: &mut Parser<'i, 't>) -> Result<(u
}
blue = clamp_floor_256_f32(try!(arguments.expect_number()));
}
Token::Percentage(ref v) => {
red = clamp_unit_f32(v.unit_value);
Token::Percentage { unit_value, .. } => {
red = clamp_unit_f32(unit_value);
green = clamp_unit_f32(match try!(arguments.next()) {
Token::Percentage(ref v) => v.unit_value,
Token::Percentage { unit_value, .. } => unit_value,
Token::Comma => {
uses_commas = true;
try!(arguments.expect_percentage())
Expand All @@ -501,8 +500,8 @@ fn parse_rgb_components_hsl<'i, 't>(arguments: &mut Parser<'i, 't>) -> Result<(u
// https://drafts.csswg.org/css-values/#angles
let token = try!(arguments.next());
let hue_degrees = match token {
Token::Number(NumericValue { value: v, .. }) => Ok(v),
Token::Dimension(NumericValue { value: v, .. }, ref unit) => {
Token::Number { value: v, .. } => Ok(v),
Token::Dimension { value: v, ref unit, .. } => {
match_ignore_ascii_case! { &*unit,
"deg" => Ok(v),
"grad" => Ok(v * 360. / 400.),
Expand All @@ -521,7 +520,7 @@ fn parse_rgb_components_hsl<'i, 't>(arguments: &mut Parser<'i, 't>) -> Result<(u
// Saturation and lightness are clamped to 0% ... 100%
// https://drafts.csswg.org/css-color/#the-hsl-notation
let saturation = match try!(arguments.next()) {
Token::Percentage(ref v) => v.unit_value,
Token::Percentage { unit_value, .. } => unit_value,
Token::Comma => {
uses_commas = true;
try!(arguments.expect_percentage())
Expand Down
246 changes: 246 additions & 0 deletions src/compact_cow_str.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

use std::borrow::{Borrow, Cow};
use std::cmp;
use std::fmt;
use std::hash;
use std::marker::PhantomData;
use std::mem;
use std::ops::Deref;
use std::slice;
use std::str;

// All bits set except the highest
const MAX_LEN: usize = !0 >> 1;

// Only the highest bit
const OWNED_TAG: usize = MAX_LEN + 1;

/// Like `Cow<'a, str>`, but with smaller `std::mem::size_of`. (Two words instead of four.)
pub struct CompactCowStr<'a> {
// `tagged_len` is a tag in its highest bit, and the string length in the rest of the bits.
//
// * If the tag is 1, the memory pointed to by `ptr` is owned
// and the lifetime parameter is irrelevant.
// `ptr` and `len` are the components of a `Box<str>`.
//
// * If the tag is 0, the memory is borrowed.
// `ptr` and `len` are the components of a `&'a str`.

// FIXME: https://github.com/rust-lang/rust/issues/27730 use NonZero or Shared
ptr: *const u8,
tagged_len: usize,
phantom: PhantomData<&'a str>,
}

impl<'a> From<&'a str> for CompactCowStr<'a> {
#[inline]
fn from(s: &'a str) -> Self {
let len = s.len();
assert!(len <= MAX_LEN);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Quick question, why add the tag to the length instead of the pointer?

Both seem fine, but with the second you get the benefit of not having the MAX_LENGTH restriction (though with the first you need to remove the tag).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At least in the current implementation of libstd, String and Vec in Rust are already restricted to a maximum capacity of isize::MAX, so this doesn't add any further restrictions on the length.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don’t know if any bit on the pointer is guaranteed to be available. Maybe not high bits since it seems like a userland process can get up to 3 GB on 32-bit. Not low bits since a subslice of &str can start at any byte.

CompactCowStr {
ptr: s.as_ptr(),
tagged_len: len,
phantom: PhantomData,
}
}
}

impl<'a> From<Box<str>> for CompactCowStr<'a> {
#[inline]
fn from(s: Box<str>) -> Self {
let ptr = s.as_ptr();
let len = s.len();
assert!(len <= MAX_LEN);
mem::forget(s);
CompactCowStr {
ptr: ptr,
tagged_len: len | OWNED_TAG,
phantom: PhantomData,
}
}
}

impl<'a> CompactCowStr<'a> {
/// Whether this string refers to borrowed memory
/// (as opposed to owned, which would be freed when `CompactCowStr` goes out of scope).
#[inline]
pub fn is_borrowed(&self) -> bool {
(self.tagged_len & OWNED_TAG) == 0
}

/// The length of this string
#[inline]
pub fn len(&self) -> usize {
self.tagged_len & !OWNED_TAG
}

// Intentionally private since it is easy to use incorrectly.
#[inline]
fn as_raw_str(&self) -> *const str {
unsafe {
str::from_utf8_unchecked(slice::from_raw_parts(self.ptr, self.len()))
}
}

/// If this string is borrowed, return a slice with the original lifetime,
/// not borrowing `self`.
///
/// (`Deref` is implemented unconditionally, but returns a slice with a shorter lifetime.)
#[inline]
pub fn as_str(&self) -> Option<&'a str> {
if self.is_borrowed() {
Some(unsafe { &*self.as_raw_str() })
} else {
None
}
}

/// Convert into `String`, re-using the memory allocation if it was already owned.
#[inline]
pub fn into_owned(self) -> String {
unsafe {
let raw = self.as_raw_str();
let is_borrowed = self.is_borrowed();
mem::forget(self);
if is_borrowed {
String::from(&*raw)
} else {
Box::from_raw(raw as *mut str).into_string()
}
}
}
}

impl<'a> Clone for CompactCowStr<'a> {
#[inline]
fn clone(&self) -> Self {
if self.is_borrowed() {
CompactCowStr { ..*self }
} else {
Self::from(String::from(&**self).into_boxed_str())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why can't this create a borrowed version when cloning, given it has the same lifetime?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

err, nevermind, I think I see why.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If self is borrowed, the new value will be borrowed too. If not, the lifetime parameter is irrelevant: it could be 'static, and the owned memory does not necessarily live that long.

}
}
}

impl<'a> Drop for CompactCowStr<'a> {
#[inline]
fn drop(&mut self) {
if !self.is_borrowed() {
unsafe {
Box::from_raw(self.as_raw_str() as *mut str);
}
}
}
}

impl<'a> Deref for CompactCowStr<'a> {
type Target = str;

#[inline]
fn deref(&self) -> &str {
unsafe {
&*self.as_raw_str()
}
}
}

impl<'a> From<CompactCowStr<'a>> for Cow<'a, str> {
#[inline]
fn from(cow: CompactCowStr<'a>) -> Self {
unsafe {
let raw = cow.as_raw_str();
let is_borrowed = cow.is_borrowed();
mem::forget(cow);
if is_borrowed {
Cow::Borrowed(&*raw)
} else {
Cow::Owned(Box::from_raw(raw as *mut str).into_string())
}
}
}
}

impl<'a> From<String> for CompactCowStr<'a> {
#[inline]
fn from(s: String) -> Self {
Self::from(s.into_boxed_str())
}
}

impl<'a> From<Cow<'a, str>> for CompactCowStr<'a> {
#[inline]
fn from(s: Cow<'a, str>) -> Self {
match s {
Cow::Borrowed(s) => Self::from(s),
Cow::Owned(s) => Self::from(s),
}
}
}

impl<'a> AsRef<str> for CompactCowStr<'a> {
#[inline]
fn as_ref(&self) -> &str {
self
}
}

impl<'a> Borrow<str> for CompactCowStr<'a> {
#[inline]
fn borrow(&self) -> &str {
self
}
}

impl<'a> Default for CompactCowStr<'a> {
#[inline]
fn default() -> Self {
Self::from("")
}
}

impl<'a> hash::Hash for CompactCowStr<'a> {
#[inline]
fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
str::hash(self, hasher)
}
}

impl<'a, T: AsRef<str>> PartialEq<T> for CompactCowStr<'a> {
#[inline]
fn eq(&self, other: &T) -> bool {
str::eq(self, other.as_ref())
}
}

impl<'a, T: AsRef<str>> PartialOrd<T> for CompactCowStr<'a> {
#[inline]
fn partial_cmp(&self, other: &T) -> Option<cmp::Ordering> {
str::partial_cmp(self, other.as_ref())
}
}

impl<'a> Eq for CompactCowStr<'a> {}

impl<'a> Ord for CompactCowStr<'a> {
#[inline]
fn cmp(&self, other: &Self) -> cmp::Ordering {
str::cmp(self, other)
}
}

impl<'a> fmt::Display for CompactCowStr<'a> {
#[inline]
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
str::fmt(self, formatter)
}
}

impl<'a> fmt::Debug for CompactCowStr<'a> {
#[inline]
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
str::fmt(self, formatter)
}
}
8 changes: 5 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ fn parse_border_spacing(_context: &ParserContext, input: &mut Parser)

pub use cssparser_macros::*;

pub use tokenizer::{Token, NumericValue, PercentageValue, SourceLocation};
pub use tokenizer::{Token, SourceLocation};
pub use rules_and_declarations::{parse_important};
pub use rules_and_declarations::{DeclarationParser, DeclarationListParser, parse_one_declaration};
pub use rules_and_declarations::{RuleListParser, parse_one_rule};
Expand All @@ -91,6 +91,7 @@ pub use nth::parse_nth;
pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string, TokenSerializationType};
pub use parser::{Parser, Delimiter, Delimiters, SourcePosition, ParseError, BasicParseError, ParserInput};
pub use unicode_range::UnicodeRange;
pub use compact_cow_str::CompactCowStr;

// For macros
#[doc(hidden)] pub use macros::_internal__to_lowercase;
Expand All @@ -116,6 +117,7 @@ mod color;
mod nth;
mod serializer;
mod unicode_range;
mod compact_cow_str;

#[cfg(test)]
mod tests;
#[cfg(test)] mod tests;
#[cfg(test)] mod size_of_tests;
Loading