((section 2 "Outdated egg!" (p "This is an egg for CHICKEN 4, the unsupported old release.  You're almost certainly looking for " (int-link "/eggref/5/abnf" "the CHICKEN 5 version of this egg") ", if it exists.") (p "If it does not exist, there may be equivalent functionality provided by another egg; have a look at the " (link "https://wiki.call-cc.org/chicken-projects/egg-index-5.html" "egg index") ". Otherwise, please consider porting this egg to the current version of CHICKEN.") (tags "eggs") (toc)) (section 2 "abnf" (section 3 "Description" (p (tt "abnf") " is a collection of combinators to help constructing parsers for Augmented Backus-Naur form (ABNF) grammars (" (link "http://www.ietf.org/rfc/rfc4234.txt" "RFC 4234") ").")) (section 3 "Library Procedures" (p "The combinator procedures in this library are based on the interface provided by the " (int-link "lexgen") " library.") (section 4 ((tt "<CoreABNF>") " typeclass") (p "The procedures of this library are provided as fields of the " (tt "<CoreABNF>") " typeclass. Please see the " (int-link "typeclass") " library for information on type classes.") (p "The " (tt "<CoreABNF>") " class is intended to provide abstraction over different kinds of input sequences, e.g. character lists, strings, streams, etc. The following example illustrates the creation of an instance of " (tt "<CoreABNF>") " specialized for character lists. This code is also provided as the " (tt "abnf-charlist") " egg, which is fully compatible with " (tt "abnf") " prior to version 3.0.") (highlight scheme "(require-extension typeclass input-classes abnf)\n\n(define char-list-<Input>\n  (make-<Input> null? car cdr))\n\n(define char-list-<Token>\n  (Input->Token char-list-<Input>))\n\n(define char-list-<CharLex>\n  (Token->CharLex char-list-<Token>))\n\n(define char-list-<CoreABNF>\n  (CharLex->CoreABNF char-list-<CharLex>))\n\n(import-instance (<CoreABNF> char-list-<CoreABNF>))\n\t\t ")) (section 4 "Terminal values and core rules " (p "The following procedures are provided as fields in the " (tt "<CoreABNF>") " typeclass:") (def (sig (procedure "(char CHAR) => MATCHER" (id char))) (p "Procedure " (tt "char") " builds a pattern matcher function that matches a single character.")) (def (sig (procedure "(lit STRING) => MATCHER" (id lit))) (p (tt "lit") " matches a literal string (case-insensitive).") (p "The following primitive parsers match the rules described in RFC 4234, Section 6.1.")) (def (sig (procedure "(alpha STREAM-LIST) => STREAM-LIST" (id alpha))) (p "Matches any character of the alphabet.")) (def (sig (procedure "(binary STREAM-LIST) => STREAM-LIST" (id binary))) (p "Matches [0..1].")) (def (sig (procedure "(decimal STREAM-LIST) => STREAM-LIST" (id decimal))) (p "Matches [0..9].")) (def (sig (procedure "(hexadecimal STREAM-LIST) => STREAM-LIST" (id hexadecimal))) (p "Matches [0..9] and [A..F,a..f].")) (def (sig (procedure "(ascii-char STREAM-LIST) => STREAM-LIST" (id ascii-char))) (p "Matches any 7-bit US-ASCII character except for NUL (ASCII value 0).")) (def (sig (procedure "(cr STREAM-LIST) => STREAM-LIST" (id cr))) (p "Matches the carriage return character.")) (def (sig (procedure "(lf STREAM-LIST) => STREAM-LIST" (id lf))) (p "Matches the line feed character.")) (def (sig (procedure "(crlf STREAM-LIST) => STREAM-LIST" (id crlf))) (p "Matches the Internet newline.")) (def (sig (procedure "(ctl STREAM-LIST) => STREAM-LIST" (id ctl))) (p "Matches any US-ASCII control character. That is, any character with a decimal value in the range of [0..31,127].")) (def (sig (procedure "(dquote STREAM-LIST) => STREAM-LIST" (id dquote))) (p "Matches the double quote character.")) (def (sig (procedure "(htab STREAM-LIST) => STREAM-LIST" (id htab))) (p "Matches the tab character.")) (def (sig (procedure "(lwsp STREAM-LIST) => STREAM-LIST" (id lwsp))) (p "Matches linear white-space. That is, any number of consecutive " (tt "wsp") ", optionally followed by a " (tt "crlf") " and (at least) one more " (tt "wsp") ".")) (def (sig (procedure "(sp STREAM-LIST) => STREAM-LIST" (id sp))) (p "Matches the space character.")) (def (sig (procedure "(vspace STREAM-LIST) => STREAM-LIST" (id vspace))) (p "Matches any printable ASCII character.  That is, any character in the decimal range of [33..126].")) (def (sig (procedure "(wsp STREAM-LIST) => STREAM-LIST" (id wsp))) (p "Matches space or tab.")) (def (sig (procedure "(quoted-pair STREAM-LIST) => STREAM-LIST" (id quoted-pair))) (p "Matches a quoted pair. Any characters (excluding CR and LF) may be quoted.")) (def (sig (procedure "(quoted-string STREAM-LIST) => STREAM-LIST" (id quoted-string))) (p "Matches a quoted string. The slash and double quote characters must be escaped inside a quoted string; CR and LF are not allowed at all.") (p "The following additional procedures are provided for convenience:")) (def (sig (procedure "(set CHAR-SET) => MATCHER" (id set))) (p "Matches any character from an SRFI-14 character set.")) (def (sig (procedure "(set-from-string STRING) => MATCHER" (id set-from-string))) (p "Matches any character from a set defined as a string."))) (section 4 "Operators" (def (sig (procedure "(concatenation MATCHER-LIST) => MATCHER" (id concatenation))) (p (tt "concatenation") " matches an ordered list of rules. (RFC 4234, Section 3.1)")) (def (sig (procedure "(alternatives MATCHER-LIST) => MATCHER" (id alternatives))) (p (tt "alternatives") " matches any one of the given list of rules. (RFC 4234, Section 3.2)")) (def (sig (procedure "(range C1 C2) => MATCHER" (id range))) (p (tt "range") " matches a range of characters. (RFC 4234, Section 3.4)")) (def (sig (procedure "(variable-repetition MIN MAX MATCHER) => MATCHER" (id variable-repetition))) (p (tt "variable-repetition") " matches between " (tt "MIN") " and " (tt "MAX") " or more consecutive elements that match the given rule. (RFC 4234, Section 3.6)")) (def (sig (procedure "(repetition MATCHER) => MATCHER" (id repetition))) (p (tt "repetition") " matches zero or more consecutive elements that match the given rule.")) (def (sig (procedure "(repetition1 MATCHER) => MATCHER" (id repetition1))) (p (tt "repetition1") " matches one or more consecutive elements that match the given rule.")) (def (sig (procedure "(repetition-n N MATCHER) => MATCHER" (id repetition-n))) (p (tt "repetition-n") " matches exactly " (tt "N") " consecutive occurences of the given rule. (RFC 4234, Section 3.7)")) (def (sig (procedure "(optional-sequence MATCHER) => MATCHER" (id optional-sequence))) (p (tt "optional-sequence") " matches the given optional rule. (RFC 4234, Section 3.8)")) (def (sig (procedure "(pass) => MATCHER" (id pass))) (p "This matcher returns without consuming any input.")) (def (sig (procedure "(bind F P) => MATCHER" (id bind))) (p "Given a rule " (tt "P") " and function " (tt "F") ", returns a matcher that first applies " (tt "P") " to the input stream, then applies " (tt "F") " to the returned list of consumed tokens, and returns the result and the remainder of the input stream.") (p "Note: this combinator will signal failure if the input stream is empty.")) (def (sig (procedure "(bind* F P) => MATCHER" (id bind*))) (p "The same as " (tt "bind") ", but will signal success if the input stream is empty.")) (def (sig (procedure "(drop-consumed P) => MATCHER" (id drop-consumed))) (p "Given a rule " (tt "P") ", returns a matcher that always returns an empty list of consumed tokens when " (tt "P") " succeeds."))) (section 4 "Abbreviated syntax" (p (tt "abnf") " supports the following abbreviations for commonly used combinators:") (dl (dt (tt "::")) (dd (tt "concatenation")) (dt (tt ":?")) (dd (tt "optional-sequence")) (dt (tt ":!")) (dd (tt "drop-consumed")) (dt (tt ":s")) (dd (tt "lit")) (dt (tt ":c")) (dd (tt "char")) (dt (tt ":*")) (dd (tt "repetition")) (dt (tt ":+")) (dd (tt "repetition1"))))) (section 3 "Examples" (p "The following parser libraries have been implemented with " (tt "abnf") ", in order of complexity:") (ul (li (int-link "csv") " ") (li (int-link "internet-timestamp") " ") (li (int-link "json-abnf") " ") (li (int-link "mbox")) (li (int-link "smtp") " ") (li (int-link "internet-message") " ") (li (int-link "mime") " ")) (section 4 "Parsing date and time" (highlight scheme "\n(require-extension typeclass input-classes abnf)\n\n(define char-list-<Input>\n  (make-<Input> null? car cdr))\n\n(define char-list-<Token>\n  (Input->Token char-list-<Input>))\n\n(define char-list-<CharLex>\n  (Token->CharLex char-list-<Token>))\n\n(define char-list-<CoreABNF>\n  (CharLex->CoreABNF char-list-<CharLex>))\n\n(import-instance (<Token> char-list-<Token> char-list/)\n\t\t (<CharLex> char-list-<CharLex> char-list/)\n                 (<CoreABNF> char-list-<CoreABNF> char-list/)\n                 )\n\n(define fws\n  (concatenation\n   (optional-sequence \n    (concatenation\n     (repetition char-list/wsp)\n     (drop-consumed \n      (alternatives char-list/crlf char-list/lf char-list/cr))))\n   (repetition1 char-list/wsp)))\n\n\n(define (between-fws p)\n  (concatenation\n   (drop-consumed (optional-sequence fws)) p \n   (drop-consumed (optional-sequence fws))))\n\n;; Date and Time Specification from RFC 5322 (Internet Message Format)\n\n;; The following abnf parser combinators parse a date and time\n;; specification of the form\n;;\n;;   Thu, 19 Dec 2002 20:35:46 +0200\n;;\n; where the weekday specification is optional. \n\t\t\t     \n;; Match the abbreviated weekday names\n\n(define day-name \n  (alternatives\n   (char-list/lit \"Mon\")\n   (char-list/lit \"Tue\")\n   (char-list/lit \"Wed\")\n   (char-list/lit \"Thu\")\n   (char-list/lit \"Fri\")\n   (char-list/lit \"Sat\")\n   (char-list/lit \"Sun\")))\n\n;; Match a day-name, optionally wrapped in folding whitespace\n\n(define day-of-week (between-fws day-name))\n\n\n;; Match a four digit decimal number\n\n(define year (between-fws (repetition-n 4 char-list/decimal)))\n\n;; Match the abbreviated month names\n\n(define month-name (alternatives\n\t\t    (char-list/lit \"Jan\")\n\t\t    (char-list/lit \"Feb\")\n\t\t    (char-list/lit \"Mar\")\n\t\t    (char-list/lit \"Apr\")\n\t\t    (char-list/lit \"May\")\n\t\t    (char-list/lit \"Jun\")\n\t\t    (char-list/lit \"Jul\")\n\t\t    (char-list/lit \"Aug\")\n\t\t    (char-list/lit \"Sep\")\n\t\t    (char-list/lit \"Oct\")\n\t\t    (char-list/lit \"Nov\")\n\t\t    (char-list/lit \"Dec\")))\n\n;; Match a month-name, optionally wrapped in folding whitespace\n\n(define month (between-fws month-name))\n\n\n;; Match a one or two digit number\n\n(define day (concatenation\n\t     (drop-consumed (optional-sequence fws))\n\t     (alternatives \n\t      (variable-repetition 1 2 char-list/decimal)\n\t      (drop-consumed fws))))\n\n;; Match a date of the form dd:mm:yyyy\n(define date (concatenation day month year))\n\n;; Match a two-digit number \n\n(define hour      (repetition-n 2 char-list/decimal))\n(define minute    (repetition-n 2 char-list/decimal))\n(define isecond   (repetition-n 2 char-list/decimal))\n\n;; Match a time-of-day specification of hh:mm or hh:mm:ss.\n\n(define time-of-day (concatenation\n\t\t     hour (drop-consumed (char-list/char #\\:))\n\t\t     minute (optional-sequence \n\t\t\t     (concatenation (drop-consumed (char-list/char #\\:))\n \t\t\t\t\t isecond))))\n\n;; Match a timezone specification of the form\n;; +hhmm or -hhmm \n\n(define zone (concatenation \n\t      (drop-consumed fws)\n\t      (alternatives (char-list/char #\\-) (char-list/char #\\+))\n\t      hour minute))\n\n;; Match a time-of-day specification followed by a zone.\n\n(define itime (concatenation time-of-day zone))\n\n(define date-time (concatenation\n\t\t   (optional-sequence\n\t\t    (concatenation\n\t\t     day-of-week\n\t\t     (drop-consumed (char-list/char #\\,))))\n\t\t   date\n\t\t   itime\n\t\t   (drop-consumed (optional-sequence fws))))\n\n(define (err s)\n  (print \"lexical error on stream: \" s)\n  `(error))\n\n(require-extension lexgen)\n(print (lex date-time err \"Thu, 19 Dec 2002 20:35:46 +0200\"))\n"))) (section 3 "Requires" (ul (li (int-link "lexgen")) (li (int-link "typeclass")) (li (int-link "input-classes")))) (section 3 "Version History" (ul (li "7.0 Added bind* variant of bind [thanks to Peter Bex]") (li "6.0 Using utf8 for char operations") (li "5.1 Improvements to the CharLex->CoreABNF constructor") (li "5.0 Synchronized with lexgen 5") (li "3.2 Removed invalid identifier :|") (li "3.0 Implemented typeclass interface") (li "2.9 Bug fix in consumed-objects (reported by Peter Bex)") (li "2.7 Added abbreviated syntax (suggested by Moritz Heidkamp)") (li "2.6 Bug fixes in consumer procedures") (li "2.5 Removed procedure memo") (li "2.4 Moved the definition of bind and drop to lexgen") (li "2.2 Added pass combinator") (li "2.1 Added procedure variable-repetition") (li "2.0 Updated to match the interface of lexgen 2.0") (li "1.3 Fix in drop") (li "1.2 Added procedures bind drop consume collect") (li "1.1 Added procedures set and set-from-string") (li "1.0 Initial release"))) (section 3 "License" (pre " Copyright 2009-2015 Ivan Raikov") (pre " This program is free software: you can redistribute it and/or\n modify it under the terms of the GNU General Public License as\n published by the Free Software Foundation, either version 3 of the\n License, or (at your option) any later version.") (pre " This program is distributed in the hope that it will be useful, but\n WITHOUT ANY WARRANTY; without even the implied warranty of\n MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n General Public License for more details.") (pre " A full copy of the GPL license can be found at\n <http://www.gnu.org/licenses/>."))))