(define (accepts-empty? regexp) (cond ((empty-set? regexp) #f) ((epsilon? regexp) #t) ((set? regexp) #f) ((sequence? regexp) (every? accepts-empty? (sequence-exps regexp))) ((one-of? regexp) (any? accepts-empty? (one-of-exps regexp))) ((repeat? regexp) (or (zero? (repeat-low regexp)) (accepts-empty? (repeat-exp regexp)))))) (define (after-character regexp char) (cond ((empty-set? regexp) regexp) ((epsilon? regexp) the-empty-set) ((set? regexp) (if (char-in-set? char regexp) epsilon the-empty-set)) ((sequence? regexp) (let recur ((exps (sequence-exps regexp))) (if (null? exps) the-empty-set (let ((after-1 (apply sequence (after-character (car exps) char) (cdr exps))) (after-2 (if (accepts-empty? (car exps)) (recur (cdr exps)) the-empty-set))) (one-of after-1 after-2))))) ((one-of? regexp) (apply one-of (map (lambda (exp) (after-character exp char)) (one-of-exps regexp)))) ((repeat? regexp) (if (and (zero? (repeat-low regexp)) (not (repeat-high regexp))) (sequence (after-character (repeat-exp regexp) char) (repeat (repeat-exp regexp))) (after-character (normalize-repeat regexp) char))))) (define (normalize-repeat repeat-regexp) (let ((low (repeat-low repeat-regexp)) (high (repeat-high repeat-regexp)) (exp (repeat-exp repeat-regexp))) (let ((prefix (apply sequence (make-list low exp))) (middle (if high (let loop ((i (- high low)) (exps '())) (if (zero? i) (apply one-of epsilon exps) (loop (- i 1) (cons (apply sequence (make-list i exp)) exps)))) epsilon)) (postfix (if high epsilon (repeat exp)))) (sequence prefix middle postfix)))) (define (next-state state char) (filter (lambda (rule) (not (empty-set? (car rule)))) (map (lambda (rule) (let ((regexp (car rule)) (action (cdr rule))) (cons (after-character regexp char) action))) state))) (define (matched-rules state) (filter (lambda (rule) (accepts-empty? (car rule))) state)) (define (stuck? state) (null? state)) (define (initial-state rules) rules) (define (make-scan-one rules) (lambda (prefix port) (call-with-values (lambda () (let loop ((state (initial-state rules)) (prefix prefix) (rev-lexeme '()) (maybe-last-action #f) (maybe-last-position #f) (maybe-last-read-chars #f) (maybe-last-rev-lexeme #f) (read-chars (cons #f '()))) (if (or (stuck? state) (and (null? prefix) (eof-object? (peek-char port)))) (if maybe-last-action (values maybe-last-action maybe-last-position (cdr maybe-last-read-chars) maybe-last-rev-lexeme) (values #f #f prefix #f)) (call-with-values (lambda () (if (not (null? prefix)) (values (caar prefix) (cdar prefix) (cdr prefix) read-chars) (let ((pos (current-position port))) ;preserve order (let ((char (read-char port))) (set-cdr! read-chars (cons (cons char pos) '())) (values char pos prefix (cdr read-chars)))))) (lambda (char position prefix read-chars) (let* ((new-state (next-state state char)) (new-matched (matched-rules new-state)) (rev-lexeme (cons char rev-lexeme))) (call-with-values (lambda () (if (null? new-matched) (values maybe-last-action maybe-last-position maybe-last-read-chars maybe-last-rev-lexeme) (let* ((first (car new-matched)) (action (cdr first))) (values action (if maybe-last-position maybe-last-position position) read-chars rev-lexeme)))) (lambda (maybe-last-action maybe-last-position maybe-last-read-chars maybe-last-rev-lexeme) (loop new-state prefix rev-lexeme maybe-last-action maybe-last-position maybe-last-read-chars maybe-last-rev-lexeme read-chars))))))))) (lambda (action position read-chars rev-lexeme) (if action (action (reverse rev-lexeme) position read-chars port) (values #f #f read-chars)))))) (define (scan-to-list scan-one prefix port) (let loop ((rev-result '()) (prefix prefix)) (if (and (null? prefix) (eof-object? (peek-char port))) (values (reverse rev-result) prefix) (call-with-values (lambda () (scan-one prefix port)) (lambda (token attrib prefix) (if (not token) ; lex action says we're done (values (reverse rev-result) prefix) (loop (cons (cons token attrib) rev-result) prefix))))))) (define (scan-to-stream scan-one prefix port) (make-stream (lambda (prefix) (if (and (null? prefix) (eof-object? (peek-char port))) #f (call-with-values (lambda () (scan-one prefix port)) (lambda (token attrib prefix) (if (not token) ; lex action says we're done #f (cons (cons token attrib) prefix)))))) prefix)) (define (make-list n x) (let loop ((n n) (l '())) (if (zero? n) l (loop (- n 1) (cons x l)))))