(define c-keyword-table (make-string-table)) (table-set! c-keyword-table "auto" (enum c-grammar-symbol t-auto)) (table-set! c-keyword-table "break" (enum c-grammar-symbol t-break)) (table-set! c-keyword-table "case" (enum c-grammar-symbol t-case)) (table-set! c-keyword-table "char" (enum c-grammar-symbol t-char)) (table-set! c-keyword-table "const" (enum c-grammar-symbol t-const)) (table-set! c-keyword-table "continue" (enum c-grammar-symbol t-continue)) (table-set! c-keyword-table "default" (enum c-grammar-symbol t-default)) (table-set! c-keyword-table "do" (enum c-grammar-symbol t-do)) (table-set! c-keyword-table "double" (enum c-grammar-symbol t-double)) (table-set! c-keyword-table "else" (enum c-grammar-symbol t-else)) (table-set! c-keyword-table "enum" (enum c-grammar-symbol t-enum)) (table-set! c-keyword-table "extern" (enum c-grammar-symbol t-extern)) (table-set! c-keyword-table "float" (enum c-grammar-symbol t-float)) (table-set! c-keyword-table "for" (enum c-grammar-symbol t-for)) (table-set! c-keyword-table "goto" (enum c-grammar-symbol t-goto)) (table-set! c-keyword-table "if" (enum c-grammar-symbol t-if)) (table-set! c-keyword-table "int" (enum c-grammar-symbol t-int)) (table-set! c-keyword-table "long" (enum c-grammar-symbol t-long)) (table-set! c-keyword-table "register" (enum c-grammar-symbol t-register)) (table-set! c-keyword-table "return" (enum c-grammar-symbol t-return)) (table-set! c-keyword-table "short" (enum c-grammar-symbol t-short)) (table-set! c-keyword-table "signed" (enum c-grammar-symbol t-signed)) (table-set! c-keyword-table "sizeof" (enum c-grammar-symbol t-sizeof)) (table-set! c-keyword-table "static" (enum c-grammar-symbol t-static)) (table-set! c-keyword-table "struct" (enum c-grammar-symbol t-struct)) (table-set! c-keyword-table "switch" (enum c-grammar-symbol t-switch)) (table-set! c-keyword-table "typedef" (enum c-grammar-symbol t-typedef)) (table-set! c-keyword-table "union" (enum c-grammar-symbol t-union)) (table-set! c-keyword-table "unsigned" (enum c-grammar-symbol t-unsigned)) (table-set! c-keyword-table "void" (enum c-grammar-symbol t-void)) (table-set! c-keyword-table "volatile" (enum c-grammar-symbol t-volatile)) (table-set! c-keyword-table "while" (enum c-grammar-symbol t-while)) (define (make-lexeme-action token) (lambda (lexeme position read-chars port) (values token (make-attribute #f position) read-chars))) (define integral-suffix (one-of epsilon (set #\u #\U) (set #\l #\L) (sequence (set #\u #\U) (set #\l #\L)) (sequence (set #\l #\L) (set #\u #\U)))) (define (split-integral-literal lexeme) (let ((last (- (string-length lexeme) 1)) (suffixes '())) (define (snarf-one) (if (memv (string-ref lexeme last) '(#\u #\U #\l #\L)) (begin (set! suffixes (cons (char-upcase (string-ref lexeme last)) suffixes)) (set! last (- last 1))))) (snarf-one) (snarf-one) (values (substring lexeme 0 (+ last 1)) (and (memv #\U suffixes) #t) (and (memv #\L suffixes) #t )))) (define directive-pattern (sequence (set #\newline) (set #\#) (set #\space) (submatch 'linenumber (repeat 1 #f numeric)) (set #\space) (set #\") (submatch 'filename (repeat (negate (set #\")))) (set #\") (repeat 0 4 (sequence (set #\space) numeric)))) (define (directive-action lexeme position read-chars port) (let* ((lexeme-string (list->string lexeme)) (sub-matches (match-submatches (match directive-pattern lexeme-string))) (submatch-as-string (lambda (key) (let ((submatch (cdr (assq key sub-matches)))) (substring lexeme-string (match-start submatch) (match-end submatch))))) (linenumber (string->number (submatch-as-string 'linenumber))) (filename (submatch-as-string 'filename))) ;; read-chars have the wrong positions now, so fix them ;; the new position of the port is after read-chars (call-with-values (lambda () (let lp ((read-chars read-chars) (rev-result '()) (row linenumber) (column 0)) (if (null? read-chars) (values (reverse rev-result) row column) (let ((c (caar read-chars))) (lp (cdr read-chars) (cons (cons c (make-position row column filename)) rev-result) (if (char=? c #\newline) (+ row 1) row) (if (char=? c #\newline) 0 (+ column 1))))))) (lambda (new-read-chars new-row new-column) (set-current-row! port new-row) (set-current-column! port new-column) (set-current-filename! port filename) (c-scan-one new-read-chars port))))) (define (count-newlines l) (if (null? l) 0 (if (char=? (car l) #\newline) (+ 1 (count-newlines (cdr l))) (count-newlines (cdr l))))) (define c-spec (list (cons whitespace (lambda (lexeme position read-chars port) (c-scan-one read-chars port))) (cons (text "/*") (lambda (lexeme position read-chars port) (c-scan-one (scan-comment read-chars port) port))) ;; ### preprocessor (cons (sequence (set #\newline) (set #\#) (repeat (negate (set #\newline)))) directive-action) (cons (sequence (one-of (ranges #\a #\z #\A #\Z) (set #\_)) (repeat (one-of (ranges #\a #\z #\A #\Z #\0 #\9) (set #\_)))) (lambda (lexeme position read-chars port) (let ((text (list->string lexeme))) (receive (token value read-chars) (cond ((table-ref c-keyword-table text) => (lambda (token) (values token #f read-chars))) ((typename? text) (values (enum c-grammar-symbol t-typename) text read-chars)) (else (values (enum c-grammar-symbol t-identifier) text read-chars))) (values token (make-attribute value position) read-chars))))) (cons (text "...") (make-lexeme-action (enum c-grammar-symbol t-ellipsis))) (cons (text "(") (make-lexeme-action (enum c-grammar-symbol t-openpar))) (cons (text ")") (make-lexeme-action (enum c-grammar-symbol t-closedpar))) (cons (text "[") (make-lexeme-action (enum c-grammar-symbol t-openbracket))) (cons (text "]") (make-lexeme-action (enum c-grammar-symbol t-closedbracket))) (cons (text ",") (make-lexeme-action (enum c-grammar-symbol t-comma))) (cons (text "=") (make-lexeme-action (enum c-grammar-symbol t-assign))) (cons (text "*=") (make-lexeme-action (enum c-grammar-symbol t-assign-mult))) (cons (text "/=") (make-lexeme-action (enum c-grammar-symbol t-assign-div))) (cons (text "%=") (make-lexeme-action (enum c-grammar-symbol t-assign-mod))) (cons (text "+=") (make-lexeme-action (enum c-grammar-symbol t-assign-plus))) (cons (text "-=") (make-lexeme-action (enum c-grammar-symbol t-assign-minus))) (cons (text "<<=") (make-lexeme-action (enum c-grammar-symbol t-assign-shift-left))) (cons (text ">>=") (make-lexeme-action (enum c-grammar-symbol t-assign-shift-right))) (cons (text "&=") (make-lexeme-action (enum c-grammar-symbol t-assign-bit-and))) (cons (text "^=") (make-lexeme-action (enum c-grammar-symbol t-assign-bit-xor))) (cons (text "|=") (make-lexeme-action (enum c-grammar-symbol t-assign-bit-or))) (cons (text "++") (make-lexeme-action (enum c-grammar-symbol t-increment))) (cons (text "--") (make-lexeme-action (enum c-grammar-symbol t-decrement))) (cons (text "+") (make-lexeme-action (enum c-grammar-symbol t-plus))) (cons (text "-") (make-lexeme-action (enum c-grammar-symbol t-minus))) (cons (text "/") (make-lexeme-action (enum c-grammar-symbol t-div))) (cons (text "*") (make-lexeme-action (enum c-grammar-symbol t-asterisk))) (cons (text "%") (make-lexeme-action (enum c-grammar-symbol t-mod))) (cons (text "^") (make-lexeme-action (enum c-grammar-symbol t-bit-xor))) (cons (text "|") (make-lexeme-action (enum c-grammar-symbol t-bit-xor))) (cons (text "&") (make-lexeme-action (enum c-grammar-symbol t-bit-and))) (cons (text "~") (make-lexeme-action (enum c-grammar-symbol t-bit-not))) (cons (text ">>") (make-lexeme-action (enum c-grammar-symbol t-bit-shift-right))) (cons (text "<<") (make-lexeme-action (enum c-grammar-symbol t-bit-shift-left))) (cons (text "?") (make-lexeme-action (enum c-grammar-symbol t-question-mark))) (cons (text ":") (make-lexeme-action (enum c-grammar-symbol t-colon))) (cons (text "&&") (make-lexeme-action (enum c-grammar-symbol t-bool-and))) (cons (text "||") (make-lexeme-action (enum c-grammar-symbol t-bool-or))) (cons (text "!") (make-lexeme-action (enum c-grammar-symbol t-bool-not))) (cons (text "==") (make-lexeme-action (enum c-grammar-symbol t-equals))) (cons (text "!=") (make-lexeme-action (enum c-grammar-symbol t-not-equals))) (cons (text "<") (make-lexeme-action (enum c-grammar-symbol t-less))) (cons (text ">") (make-lexeme-action (enum c-grammar-symbol t-greater))) (cons (text "<=") (make-lexeme-action (enum c-grammar-symbol t-less-equal))) (cons (text ">=") (make-lexeme-action (enum c-grammar-symbol t-greater-equal))) (cons (text ";") (make-lexeme-action (enum c-grammar-symbol t-semicolon))) (cons (text "{") (make-lexeme-action (enum c-grammar-symbol t-openbrace))) (cons (text "}") (make-lexeme-action (enum c-grammar-symbol t-closedbrace))) (cons (text "->") (make-lexeme-action (enum c-grammar-symbol t-pointer-struct-access))) (cons (text ".") (make-lexeme-action (enum c-grammar-symbol t-point))) (cons (sequence (repeat 1 #f numeric) integral-suffix) (lambda (lexeme position read-chars port) (let ((text (list->string lexeme))) (call-with-values (lambda () (split-integral-literal text)) (lambda (meat unsigned? long?) (values (enum c-grammar-symbol t-integral-constant) (make-attribute (make-integral-literal text (string->number meat) unsigned? long?) position) read-chars)))))) (cons (sequence (set #\0) (set #\x #\X) (repeat 1 #f hexdigit) integral-suffix) (lambda (lexeme position read-chars port) (let ((text (list->string lexeme))) (call-with-values (lambda () (split-integral-literal text)) (lambda (meat unsigned? long?) (values (enum c-grammar-symbol t-integral-constant) (make-attribute (make-integral-literal text (string->number (substring meat 2 (string-length meat)) 16) unsigned? long?) position) read-chars)))))) (cons (sequence (set #\0) (repeat 1 #f (ranges #\0 #\7)) integral-suffix) (lambda (lexeme position read-chars port) (let ((text (list->string lexeme))) (call-with-values (lambda () (split-integral-literal text)) (lambda (meat unsigned? long?) (values (enum c-grammar-symbol t-integral-constant) (make-attribute (make-integral-literal text (string->number meat 8) unsigned? long?) position) read-chars)))))) (cons (sequence (set #\') (set #\\) (negate (set)) (repeat (negate (set #\'))) (set #\')) (lambda (lexeme position read-chars port) (values (enum c-grammar-symbol t-character-constant) (make-attribute (list->string (reverse (cdr (reverse (cdr lexeme))))) position) read-chars))) (cons (sequence (set #\') (repeat 1 #f (negate (set #\'))) (set #\')) (lambda (lexeme position read-chars port) (values (enum c-grammar-symbol t-character-constant) (make-attribute (list->string (reverse (cdr (reverse (cdr lexeme))))) position) read-chars))) (cons (set #\") (lambda (lexeme position read-chars port) (call-with-values (lambda () (scan-to-list string-scan-one read-chars port)) (lambda (string-data read-chars) (values (enum c-grammar-symbol t-literal-string) (make-attribute (list->string (map (lambda (token.attribute) (attribute-value (cdr token.attribute))) string-data)) position) read-chars))))) ;; ### need more elaborate representation for FLOATING-CONSTANT (cons (sequence (repeat 1 #f numeric) (set #\E #\e) (repeat 0 1 (set #\+ #\-)) (repeat 1 #f numeric)) (lambda (lexeme position read-chars port) (let ((text (list->string lexeme))) (values (enum c-grammar-symbol t-integral-constant) (make-attribute (make-floating-literal text) position) read-chars)))) (cons (sequence (repeat numeric) (set #\.) (repeat 1 #f numeric) (repeat 0 1 (sequence (set #\E #\e) (repeat 0 1 (set #\+ #\-)) (repeat 1 #f numeric))) (repeat 0 1 (set #\f #\F #\l #\L))) (lambda (lexeme position read-chars port) (let ((text (list->string lexeme))) (values (enum c-grammar-symbol t-integral-constant) (make-attribute (make-floating-literal text) position) read-chars)))) (cons (sequence (repeat 1 #f numeric) (set #\.) (repeat numeric) (repeat 0 1 (sequence (set #\E #\e) (repeat 0 1 (set #\+ #\-)) (repeat 1 #f numeric))) (repeat 0 1 (set #\f #\F #\l #\L))) (lambda (lexeme position read-chars port) (let ((text (list->string lexeme))) (values (enum c-grammar-symbol t-integral-constant) (make-attribute (make-floating-literal text) position) read-chars)))))) ;; ### probably best done by hand (define string-literal-spec (list (cons (set #\") (lambda (lexeme position read-chars port) (values #f (make-attribute #f position) read-chars))) (cons (sequence (set #\\) (range #\0 #\7) (range #\0 #\7) (range #\0 #\7)) (lambda (lexeme position read-chars port) (values 'char (make-attribute (ascii->char (string->number (list->string (cdr lexeme)) 8)) position) read-chars))) (cons (sequence (set #\\) (negate (set))) (lambda (lexeme position read-chars port) (values 'char (make-attribute (cadr lexeme) position) read-chars))) (cons (negate (set)) (lambda (lexeme position read-chars port) (values 'char (make-attribute (car lexeme) position) read-chars))))) (define string-scan-one (make-scan-one string-literal-spec)) (define comment-spec (list (cons (text "*/") (lambda (lexeme position read-chars port) (values #f (make-attribute #f position) read-chars))) (cons (negate (set)) (lambda (lexeme position read-chars port) (values #t (make-attribute #f position) read-chars))))) (define comment-scan-one (make-scan-one comment-spec)) (define (scan-comment prefix port) (call-with-values (lambda () (scan-to-list comment-scan-one prefix port)) (lambda (list read-chars) read-chars))) (define c-scan-one (make-scan-one c-spec)) (define (scan-c port) (scan-to-stream c-scan-one (list (cons #\newline (make-position 0 0 ""))) (make-lexer-port port ""))) ;; data for the position field of generated absyn (define generate-position (let ((counter 0)) (lambda () (set! counter (+ 1 counter)) (make-position 0 0 (string-append "gen-" (number->string counter))))))