Parsing CSS with ANTLR 11

Posted by Ben Poweski Fri, 23 May 2008 14:28:00 GMT

For a project I”m working on, we’ve used CSS syntax to describe styling on application objects. To accomplish this we created a parser using Antlr. Here is our implementation of the CSS core syntax. Unicode support is left out as it was not needed for our use case, but it should be pretty easy to add in.

grammar CssCore;
options { language=Java; }

/*

Grammar taken from
http://www.w3.org/TR/REC-CSS2/syndata.html#tokenization

*/

//stylesheet  : [ CDO | CDC | S | statement ]*;
stylesheet
    :   (CDO|CDC|statement)*
    ;


//statement   : ruleset | at-rule;
statement
    :   ruleset
    |   atRule
    ;

//at-rule     : ATKEYWORD S* any* [ block | ';' S* ];
atRule  :   ATKEYWORD any* (block | SEMICOLON)
    ;

//block       : '{' S* [ any | block | ATKEYWORD S* | ';' ]* '}' S*;
block   :   LBRACE (any|block|ATKEYWORD|SEMICOLON)* RBRACE 
    ;

//ruleset     : selector? '{' S* declaration? [ ';' S* declaration? ]* '}' S*;

selector:   '*'
    |   '*'? (IDENT|'>'|'+'|CLASS|HASH)+
    ;
    
    
ruleset :   selector? LBRACE declaration? ( SEMICOLON declaration? )* RBRACE
    ;

//selector    : any+;

//declaration : property ':' S* value;
declaration
    :   property COLON value
    ;

//property    : IDENT S*;
property:   IDENT
     ;

//value       : [ any | block | ATKEYWORD S* ]+;
value   :   (any|block|ATKEYWORD)*
          ;

//any         : [ IDENT | NUMBER | PERCENTAGE | DIMENSION | STRING
//              | DELIM | URI | HASH | UNICODE-RANGE | INCLUDES
//              | FUNCTION | DASHMATCH | '(' any* ')' | '[' any* ']' ] S*;
any :   (   IDENT|NUMBER|PERCENTAGE|DIMENSION|STRING|
            HASH|INCLUDES|
            FUNCTION|DASHMATCH
            // TODO UNICODE_RANGE|DELIM|URI| '(' any* ')' | '[' any* ']' ] S*;
        )
    ;


/* Tokens */

//IDENT     {ident}
IDENT   :   F_IDENT
    ;

//ATKEYWORD     @{ident}
ATKEYWORD
    :   '@' F_IDENT
    ;

//STRING    {string}
STRING  :   F_STRING
    ;

//HASH  #{name}
HASH    :   '#' F_NAME
    ;

//NUMBER    {num}
NUMBER  :   F_NUM
    ;

//PERCENTAGE    {num}%
PERCENTAGE
    :   F_NUM '%'
    ;

//DIMENSION     {num}{ident}
DIMENSION
    :   F_NUM F_IDENT
    ;

//URI   url\({w}{string}{w}\)
//|url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}\)
//UNICODE-RANGE     U\+[0-9A-F?]{1,6}(-[0-9A-F]{1,6})?


//CDO   <!--
CDO :   '<!--'
    ;

//CDC   -->
CDC :   '-->'
    ;


//;     ;
SEMICOLON
    :   ';'
    ;   
    
COLON   :   ':'
    ;   


//{     \{
LBRACE  :   '{'
    ;

//}     \}
RBRACE  :   '}'
    ;

//(     \(
LPAREN  :   '('
    ;

//)     \)
RPAREN  :   ')'
    ;

//[     \[
LBRACKET:   '['
    ;

//]     \]
RBRACKET:   ']'
    ;

//S     [ \t\r\n\f]+
S   :   (' '|'\t'|'\r'|'\n'|'\f')+
        { $channel=HIDDEN; }
    ;

//COMMENT   \/\*[^*]*\*+([^/][^*]*\*+)*\/
COMMENT :   '/*' (options {greedy=false;} : .)*   '*/'
        { $channel=HIDDEN; }
    ;

//FUNCTION  {ident}\(
FUNCTION:   F_IDENT '('
    ;

//INCLUDES  ~=
INCLUDES:   '~='
    ;

//DASHMATCH     |=
DASHMATCH
    :   '|='
    ;

//DELIM     any other character not matched by the above rules

CLASS   :   '.' F_IDENT
    ;


//ident     {nmstart}{nmchar}*
fragment
F_IDENT :   F_NMSTART F_NMCHAR*
    ;

//name  {nmchar}+
fragment
F_NAME  :   F_NMCHAR+
    ;

//nmstart   [a-zA-Z]|{nonascii}|{escape}
fragment
F_NMSTART
    :   (F_LETTER)
// TODO add nonascii, escaped
    ;

//nonascii  [^\0-\177]
//unicode   \\[0-9a-f]{1,6}[ \n\r\t\f]?
//escape    {unicode}|\\[ -~\200-\4177777]

//nmchar    [a-z0-9-]|{nonascii}|{escape}
fragment
F_NMCHAR:   (F_LETTER|F_DIGIT|'-')
// TODO add nonascii, escaped
    ;

//num   [0-9]+|[0-9]*\.[0-9]+
fragment
F_NUM   :   ('0'..'9')+
    |   ('0'..'9')* '.' ('0'..'9')+
    ;


//string    {string1}|{string2}
fragment
F_STRING:   F_STRING1
    |   F_STRING2
    ;

//string1   \"([\t !#$%&(-~]|\\{nl}|\'|{nonascii}|{escape})*\"
fragment
F_STRING1
    :   '"' ('\t'|' '|'!'|'#'|'$'|'%'|'&'|'\''|'.'|F_LETTER|F_DIGIT)* '"' 
    ;
//string2   \'([\t !#$%&(-~]|\\{nl}|\"|{nonascii}|{escape})*\'
fragment
F_STRING2
    :   '\'' ('\t'|' '|'!'|'#'|'$'|'%'|'&'|'.'|F_LETTER|F_DIGIT)* '\'' 
    ;

//nl    \n|\r\n|\r|\f
fragment
F_NL    :   '\n'
    |   '\r\n'
    |   '\r'
    |   '\f'
    ;
    
fragment
F_LETTER:   'a'..'z'
    |   'A'..'Z'
    ;
    
fragment
F_DIGIT :   '0'..'9'
    ;

//w     [ \t\r\n\f]*
fragment
F_W :   (' '|'\t'|'\r'|'\n'|'\f')*
    ;
    
  1. mmo 4 months later:

    I like this approach. Learning all I can on Rails, thank you.

  2. ShAaNiG almost 2 years later:

    It’s really good written and I fully agree with You on main issue, btw. I must say that I really enjoyed reading all of Your posts.

    Wholesale Brand Name Clothing

  3. ShAaNiG almost 2 years later:

    It’s really good written and I fully agree with You on main issue, btw. I must say that I really enjoyed reading all of Your posts.

    Wholesalers

  4. carlos almost 2 years later:

    it’s a so tedious to manage the S everywhere. I think the parsing process build the tree representation of the sintaxis css, and the S must not be there. In my opinion the css sintax isn’t good enough.

  5. Harry almost 2 years later:

    The only stuff I know about CSS is the most basic. But as I want to develop a more sophisticated website I keep on poking around for more advanced stuff. Thanks for sharing.

    Best online poker room

  6. Ann over 2 years later:

    Thanks for information!

  7. Split Face Brick over 2 years later:

    It’s useful to write program to parsing CSS Thank you.

  8. krzyzowki almost 3 years later:

    Thanks for this great post. Did you do it all on your own? This must’ve taken a lot of time

  9. linux almost 3 years later:

    Great blog post, I have been waiting for that

  10. lussypointing@yahoo.com about 3 years later:

    It’s really nice work and i totally agree the main issue.That is really nice information.I lot of enjoyed reading this blog.Thanks for shearing I have been waiting another blog.http://www.PolicyInteractive.com.

  11. mariyawood@yahoo.co.uk about 3 years later:

    Hey thanks for the great post it really nice work and great information shearing I have read your blog and i really enjoyed for this reading every line.i m waiting your next post. http://www.classicbedsteads.co.uk/