;;;
;;; gencomp - convert compiled code into static C data
;;;
;;;   Copyright (c) 2004-2005 Shiro Kawai, All rights reserved.
;;;   
;;;   Redistribution and use in source and binary forms, with or without
;;;   modification, are permitted provided that the following conditions
;;;   are met:
;;;   
;;;   1. Redistributions of source code must retain the above copyright
;;;      notice, this list of conditions and the following disclaimer.
;;;  
;;;   2. Redistributions in binary form must reproduce the above copyright
;;;      notice, this list of conditions and the following disclaimer in the
;;;      documentation and/or other materials provided with the distribution.
;;;  
;;;   3. Neither the name of the authors nor the names of its contributors
;;;      may be used to endorse or promote products derived from this
;;;      software without specific prior written permission.
;;;  
;;;   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;;;   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;;;   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
;;;   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
;;;   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
;;;   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
;;;   TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
;;;   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
;;;   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;;;   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;;;   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;  
;;;  $Id: gencomp,v 1.33 2007/09/14 11:17:42 shirok Exp $
;;;

;;;
;;; This is a hack to "compile" the Scheme-written compiler into static
;;; C data structure, so that it can be linked into libgauche.
;;;
;;; Eventually this code will grow to a generic tool to create compiled
;;; executable from Scheme; for the time being, however, we concentrate
;;; on making just a compiler work.  So there are quite a few assumptions
;;; in a way the source is written.
;;;

(use srfi-1)
(use srfi-13)
(use gauche.cgen)
(use gauche.vm.insn)
(use gauche.parameter)
(use gauche.sequence)
(use gauche.parseopt)
(use file.util)
(use util.match)
(use util.list)
(use text.tr)

;; we compile things within an anonymous module to avoid interference.
(define compile-module (make-parameter #f))

;; keep the (supposed) name of the current module.  (current-module) syntax
;; is compiled into (find-module ...) expression to lookup this name at
;; runtime.
(define compile-module-name (make-parameter #f))

;; keep the basename sans extension of the compiling file.
(define compile-file-basename (make-parameter #f))

;; keep the list of exported bindings (or #t if export-all)
(define compile-module-exports (make-parameter '()))

;; when we're compiling extension module (--ext-module=file), this parameter
;; keeps a port to the specified file.  the file becomes a module definition
;; file, containing define-module and dynamic-load forms, as well as the
;; exported macros.
;; NB: we insert (dynamic-load ...) just after select-module in the ext-module
;; file, assuming the source file has standard layout.
(define ext-module-file (make-parameter #f))

;; list of private macros that should be included in the output.
;; (--keep-private-macro=name,name,...)
;; usually private macros (macros bound to a variable which isn't exported)
;; are discarded, but sometimes hygienic public macros expands to a call
;; of private macros.  gencomp cannot detect such dependency yet, and
;; so they need to be explicitly listed for the time being.
(define private-macros-to-keep (make-parameter '()))

;; compatibility kludge
(define compile       (with-module gauche.internal compile))
(define compile-toplevel-lambda
  (with-module gauche.internal compile-toplevel-lambda))
(define %procedure-inliner
  (with-module gauche.internal %procedure-inliner))
(define vm-code->list (with-module gauche.internal vm-code->list))
(define vm-eval-situation
  (with-module gauche.internal vm-eval-situation))

(define-constant SCM_VM_COMPILING 2) ;; must match with vm.h

;;================================================================
;; Entry
;;
(define (main args)
  (let1 predef-syms '()
    (let-args (cdr args)
        ((keep-private-macro "keep-private-macro=s" #f)
         (ext-module "ext-module=s" #f)
         (output-base "o|output=s" #f)
         (#f "D=s" => (lambda (sym) (push! predef-syms sym)))
         . args)
      (when keep-private-macro
        (private-macros-to-keep
         (map string->symbol (string-split keep-private-macro #\,))))
      (match args
        ((scmfile)
         (when ext-module
           (ext-module-file (ensure-ext-module-file ext-module)))
         (do-it scmfile
                (or output-base (sys-basename (path-sans-extension scmfile)))
                predef-syms)
         (when ext-module (close-output-port (ext-module-file))))
        (else (print "Usage: gosh gencomp [--keep-macro] <file.scm>")
              (exit 0)))
      0)))

(define (do-it src base predef-syms)
  (parameterize ((cgen-current-unit (get-unit src base predef-syms))
                 (compile-module    (make-module #f))
                 (compile-file-basename base)
                 (vm-eval-situation SCM_VM_COMPILING))
    ;; Set up initial environment
    (eval '(define-macro (current-module)
             `(find-module ',(with-module user (compile-module-name))))
          (compile-module))
    ;; Static stuff
    (cgen-decl "#include <gauche/code.h>")
    (cgen-decl "#include <gauche/macro.h>") ; for MakeMacroTransformerOld. temporary.
    (cgen-decl "#define INIT_ENTRY") ; may be overrided by insert-ext-initializer.
    (and-let* ((extm (ext-module-file)))
      (insert-ext-initializer (port-name extm)))
    ;; Main processing
    (with-input-from-file src
      (lambda ()
        (emit-toplevel-executor
         (reverse (port-fold compile-toplevel-form '() read)))))
    ;; Emitting
    (cgen-emit-c (cgen-current-unit))))

(define (ensure-ext-module-file filename)
  (let1 dir (sys-dirname filename)
    (unless (file-exists? dir)
      (make-directory* dir))
    (open-output-file filename)))

(define (write-ext-module form)
  (cond ((ext-module-file)
         => (lambda (p) (write form p) (newline p)))))

;; If we're compiling stand-alone Scheme file (i.e. --ext-module is
;; given), we need to include SCM_INIT_EXTENSION in the initialization
;; code.
(define (insert-ext-initializer ext-module-file-name)
  (cgen-decl "#include <gauche/extend.h>")
  (cgen-decl "#undef INIT_ENTRY")
  (cgen-decl "#define INIT_ENTRY SCM_EXTENSION_ENTRY")
  (let* ((extname (path-sans-extension ext-module-file-name))
         (safe-extname (regexp-replace-all #/\W/ extname "_")))
    (cgen-init #`"SCM_INIT_EXTENSION(,safe-extname);")))

;;================================================================
;; Compiler stuff
;;

;; NOTE:
;;   The code is compiled in the version of the compiler currently
;;   running gencomp (host compiler).  It may differ from the version
;;   of the compiler we're compiling (target compiler), and it becomes
;;   a problem if the two versions of compilers are using different
;;   mappings between mnemonics and codes.
;;
;;   When gencomp generates the C literals for the compiled code, it
;;   uses the following mapping scheme.
;;
;;    1. use vm-code->list to extract mnemonics from the code
;;       compiled by the host compiler.
;;    2. use vm-find-insn-info (in gauche.vm.insn module) to map
;;       mnemonics to the target compiler's code.
;;   
;;   For this scheme to work, the following conditions should be satisfied.
;;
;;    a. gauche.vm.insn should be the one generated from the same
;;       vminsn.scm of the target compiler.
;;    b. all the mnemonics that consists of the code generated by
;;       the host compiler must exists in the target compiler's ISA.
;;
;;   The condition b. implies that if you want to rename an instruction,
;;   you have to take three steps:
;;    (1) add a new instruction of the desired name, compile the
;;        target compiler #1.  (This version of the compiled target
;;        compiler still uses old instruction).
;;    (2) compile the target compiler again, using the target compiler #1,
;;        to generate the target compiler #2.  (This version of
;;        the target compiler uses the new instruction).
;;    (3) remove the old instruction.
;;

;; compile FORM, and conses the toplevel code (something to be
;; executed at toplevel).
(define (compile-toplevel-form form seed)
  (guard (e
          ((<error> e)
           (format (current-error-port) "Error in compiling ~s\n" form)
           (raise e)))
    (match form
      ;; Module related stuff
      (('define-module mod . body)
       (write-ext-module form)
       (parameterize ((compile-module-name mod))
         (fold compile-toplevel-form seed body)))
      (('select-module mod)
       (write-ext-module form)
       (write-ext-module
        `(dynamic-load ,(compile-file-basename)))
       (let ((sym (cgen-literal mod)))
         (cgen-init
          (format "  mod = Scm_FindModule(SCM_SYMBOL(~a), SCM_FIND_MODULE_CREATE);"
                  (cgen-cexpr sym))
          ;; force the current module to be mod
          "  Scm_SelectModule(mod);"))
       (compile-module-name mod)
       seed)
      (('use mod)
       (eval `(use ,mod) (compile-module)) seed)
      (('export . syms)
       (when (list? (compile-module-exports))
         (compile-module-exports
          (lset-union eq? syms (compile-module-exports))))
       (eval `(export ,@syms) (compile-module)) seed)
      (('export-all)
       (compile-module-exports #t))
      (('provide arg)
       (write-ext-module form)
       seed)
      ;; For the time being, we only compile the legacy macros into C file.
      ;; R5RS macros are put in ext-module file as is.
      (('define-macro (name . formals) . body)
       (eval form (compile-module))
       (when (or (symbol-exported? name)
                 (memq name (private-macros-to-keep)))
         (let* ((body-closure (compile-toplevel-lambda form name formals
                                                       body (compile-module)))
                (code (cgen-literal (closure-code body-closure)))
                (var  (cgen-literal name)))
           (cgen-init
            (format "  Scm_Define(mod, SCM_SYMBOL(~a), Scm_MakeMacroTransformerOld(SCM_SYMBOL(~a), SCM_PROCEDURE(Scm_MakeClosure(~a, NULL))));"
                    (cgen-cexpr var) (cgen-cexpr var)
                    (cgen-cexpr code)))))
       seed)
      (('define-macro name . _)
       (when (symbol-exported? name)
         (write-ext-module form))
       (eval form (compile-module)) seed)
      (('define-syntax name . _)
       (when (or (symbol-exported? name)
                 (memq name (private-macros-to-keep)))
         (write-ext-module form))
       (eval form (compile-module)) seed)
      ;; Finally, ordinary expressions.
      (('define (name . args) . body)
       (compile-toplevel-form `(define ,name (lambda ,args ,@body)) seed))
      (('define (? symbol? name) ('lambda args . body))
       (let* ((closure
               (compile-toplevel-lambda form name args body (compile-module)))
              (code (cgen-literal (closure-code closure)))
              (var  (cgen-literal name)))
         (cgen-init
          (format "  Scm_Define(mod, SCM_SYMBOL(~a), Scm_MakeClosure(~a, NULL));"
                  (cgen-cexpr var) (cgen-cexpr code))))
       seed)
      (else
       (let1 compiled-code (compile form (compile-module))
         ;; We exclude a compiled code with only CONSTU-RET, which appears
         ;; as the result of macro expansion sometimes.
         (if (toplevel-constu-ret-code? compiled-code)
           seed
           (cons (cgen-literal (compile form (compile-module))) seed))))
      )))

;; check to see the compiled code only contains CONSTU-RET insn.
;; the 'size slot test is a temporary one to make this work with 0.8.4
;; preview version.  once 0.8.4 is out, it should be removed.
(define (toplevel-constu-ret-code? compiled-code)
  (and (eq? (ref compiled-code 'name) '%toplevel)
       (if (assq 'size (class-slots <compiled-code>))
         (= (ref compiled-code 'size) 1)
         #t)
       (let1 code (vm-code->list compiled-code)
         (null? (cdr code))
         (eq? (caar code) 'CONSTU-RET))))

;; given list of toplevel compiled codes, generate code in init
;; that calls them.  This is assumed to be the last procedure before
;; calling cgen-emit.
(define (emit-toplevel-executor topcodes)
  (cgen-body "static ScmCompiledCode *toplevels[] = {")
  (dolist (t topcodes)
    (cgen-body (format "  SCM_COMPILED_CODE(~a)," (cgen-cexpr t))))
  (cgen-body " NULL /*termination*/" "};")

  (cgen-init (format "  Scm_VMExecuteToplevels(toplevels);"))
  )

;; check to see if the symbol is exported
(define (symbol-exported? sym)
  (or (eq? (compile-module-exports) #t)
      (memq sym (compile-module-exports))))

;;================================================================
;; Compiler-specific literal handling definitions
;;       
(define-cgen-literal <cgen-scheme-code> <compiled-code>
  ((code-name   :init-keyword :code-name)
   (code-vector-c-name :init-keyword :code-vector-c-name)
   (literals    :init-keyword :literals)
   )
  (make (value)
    (let* ((cv  (vm-code->list value))
           (lv  (extract-literals cv))
           (cvn (allocate-code-vector cv lv (ref value 'full-name)))
           (il  (ref value 'intermediate-form))
           (code-name (cgen-literal (ref value 'name)))
           (arg-info (cgen-literal (ref value 'arg-info)))
           (inliner (and (vector? il) (cgen-literal il)))
           )
      (define (init-thunk)
        (print "    SCM_COMPILED_CODE_CONST_INITIALIZER(")
        (format #t "            (ScmWord*)(~a), ~a,\n"
                cvn (length cv))
        (format #t "            ~a, ~a, ~a, ~a, SCM_NIL, ~a,\n"
                (ref value 'max-stack)
                (ref value 'required-args)
                (ref value 'optional-args)
                (if (cgen-literal-static? code-name)
                  (cgen-cexpr code-name)
                  "SCM_FALSE")
                (cgen-cexpr arg-info))
        (format #t "            ~a, ~a)"
                (cgen-cexpr (cgen-literal (ref value 'parent)))
                (if inliner
                  (cgen-cexpr inliner)
                  "SCM_FALSE")))
      (make <cgen-scheme-code> :value value
            :c-name (cgen-allocate-static-datum 'runtime 'ScmCompiledCode
                                                init-thunk)
            :code-vector-c-name cvn
            :code-name code-name
            :literals lv)))
  (init (self)
    (unless (cgen-literal-static? (ref self 'code-name))
      (print "  SCM_COMPILED_CODE("(ref self 'c-name)")->name = "
             (cgen-cexpr (ref self 'code-name))";"))
    (fill-code self))
  (static (self) #t)
  )

;; Returns a list of the same length of CODE, which includes the
;; <cgen-literal>s corresponding to the literal values in CODE.
;; #f is filled in the places that don't have corresponding litaral value.
(define (extract-literals code)
  (let loop ((code code)
             (lits '()))
    (if (null? code)
      (reverse lits)
      (let* ((insn (car code))
             (info (vm-find-insn-info (car insn))))
        (case (ref info 'operand-type)
          ((none) (loop (cdr code)  (cons #f lits)))
          ((addr) (loop (cddr code) (list* #f #f lits)))
          ((code codes) (loop (cddr code)
                              (list* (cgen-literal (cadr code)) #f lits)))
          ((obj) (loop (cddr code)
                        (list* (cgen-literal (cadr code)) #f lits)))
          ((obj+addr)
           (loop (cdddr code)
                 (list* #f (cgen-literal (cadr code)) #f lits)))
          )))))

(define (allocate-code-vector cv lv full-name)

  (define (alloc-word initval)
    (cgen-allocate-static-datum 'runtime 'ScmWord initval))

  (define (safe-comment str)
    (regexp-replace-all* (x->string str) #/\/\*/ "/ *" #/\*\// "* /"))

  (define (loop cv lv count first-cexpr)
    (if (null? cv)
      first-cexpr
      (let* ((insn (car cv))
             (info (vm-find-insn-info (car insn)))
             (insnval (vm-build-insn insn))
             (name-info (if first-cexpr
                          ""
                          (format "/* ~a */\n    " (safe-comment full-name))))
             (insn-cexpr
              (alloc-word
               ;; We emit it as signed integer so that 64bit machine
               ;; correctly handles negative parameter value.
               (if (> insnval #x80000000)
                 (format "~a-0x~8,'0x   /* ~3d ~a */"
                         name-info (- #x100000000 insnval) count
                         (safe-comment insn))
                 (format "~a0x~8,'0x    /* ~3d ~a */"
                         name-info insnval count
                         (safe-comment insn)))))
             (first-cexpr (or first-cexpr insn-cexpr)))
        (case (ref info 'operand-type)
          ((none)
           (loop (cdr cv) (cdr lv) (+ count 1) first-cexpr))
          ((addr)
           (alloc-word
            (format "SCM_WORD((ScmWord*)~a + ~d)"
                    first-cexpr (cadr cv)))
           (loop (cddr cv) (cddr lv) (+ count 2) first-cexpr))
          ((obj code codes)
           (alloc-word
            (if (cgen-literal-static? (cadr lv))
              (format "SCM_WORD(~a) /* ~a */"
                      (cgen-cexpr (cadr lv))
                      (safe-comment (write-to-string (cadr cv))))
              (format "SCM_WORD(SCM_UNDEFINED) /* ~a */"
                      (safe-comment (write-to-string (cadr cv))))))
           (loop (cddr cv) (cddr lv) (+ count 2) first-cexpr))
          ((obj+addr)
           (alloc-word
            (if (cgen-literal-static? (cadr lv))
              (format "SCM_WORD(~a) /* ~a */"
                      (cgen-cexpr (cadr lv))
                      (safe-comment (write-to-string (cadr cv))))
              (format "SCM_WORD(SCM_UNDEFINED) /* ~a */"
                      (safe-comment (write-to-string (cadr cv))))))
           (alloc-word
            (format "SCM_WORD((ScmWord*)~a + ~d)  /*    ~3d */"
                    first-cexpr (caddr cv) (caddr cv)))
           (loop (cdddr cv) (cdddr lv) (+ count 3) first-cexpr))
          ))))

  (loop cv lv 0 #f))

(define (fill-code code)
  (let ((cvn  (ref code 'code-vector-c-name))
        (lv   (ref code 'literals)))
    (for-each-with-index
     (lambda (index lit)
       (when (and lit (not (cgen-literal-static? lit)))
         (format #t "  ((ScmWord*)~a)[~a] = SCM_WORD(~a);\n"
                 cvn index (cgen-cexpr lit))))
     lv)
    ))

;; NB: this doesn't yet handle identifiers that are inserted by hygienic
;; macro (so that they have different module than the current one).
(define-cgen-literal <cgen-scheme-identifier> <identifier>
  ((id-name   :init-keyword :id-name)
   (mod-name  :init-keyword :mod-name))
  (make (value)
    (let ((name (ref value 'name))
          (mod  (ref value 'module))
          (env  (ref value 'env)))
      (unless (null? env)
        (error "identifier with compiler environment can't be compiled" value))
      (make <cgen-scheme-identifier> :value value
            :c-name (cgen-allocate-static-datum)
            :id-name (cgen-literal name)
            :mod-name (and-let* ((modnam (module-name-fix mod)))
                        (cgen-literal modnam)))))
  (init (self)
    (let ((name (cgen-cexpr (ref self 'id-name)))
          (cname (ref self 'c-name)))
      (or (and-let* ((modnam (ref self 'mod-name)))
            (print "  "cname" = Scm_MakeIdentifier(SCM_SYMBOL("name"), "
                   "Scm_FindModule(SCM_SYMBOL("(cgen-cexpr modnam)"), SCM_FIND_MODULE_CREATE),"
                   "SCM_NIL);"))
          (print "  "cname" = Scm_MakeIdentifier(SCM_SYMBOL("name"), mod, SCM_NIL);"))))
  (static (self) #f)
  )

;; NB: for compatibility, we check modnam vs '# to find out anonymous
;; modules.  As of 0.8.4, module-name should return #f for
;; anonymous modules.
;; NB: we also filter out user module---an identifiers inserted by local
;; macros are attributed to user module incorrectly in some version of
;; pre-0.8.4 compiler, and we need to work around that to compile the
;; corrent compiler.
(define (module-name-fix module)
  (and-let* ((nam (module-name module))
             ( (not (eq? nam '|#|)) ) ;|# <- to fool emacs
             ( (not (eq? nam 'user)) ))
    nam))

;; NB: for now, we ignore macros (we assume they are only used within
;; the source file).
(define-cgen-literal <cgen-scheme-macro> <macro>
  ()
  (make (value)
    (make <cgen-scheme-macro> :value value :c-name #f))
  )

;; For generic functions, we initialize it at runtime.
(define-cgen-literal <cgen-scheme-generic> <generic>
  ((gf-name :init-keyword :gf-name))
  (make (value)
    (make <cgen-scheme-generic>
      :value value
      :c-name  (cgen-allocate-static-datum)
      :gf-name (cgen-literal (ref value 'name))))
  (init (self)
    (format #t "  ~a = Scm_SymbolValue(mod, SCM_SYMBOL(~a));\n"
            (ref self 'c-name)
            (ref (ref self 'gf-name) 'c-name)))
  (static (self) #f)
  )

;;================================================================
;; Utilities
;;

(define (get-unit src base predef-syms)
  (make <cgen-unit>
    :name base
    :preamble `(,(format "/* Generated automatically from ~a.  DO NOT EDIT */"
                         src))
    :pre-decl (map (lambda (s) #`"#define ,s") predef-syms)
    :init-prologue (format "INIT_ENTRY void Scm_Init_~a() { ScmModule *mod;"
                           (string-tr (sys-basename base) "-+" "__"))
    ))

;; Local variables:
;; mode: scheme
;; end:
