#!/usr/bin/env python2.4
# -*- mode: python -*-       
import getopt, os, shlex, sys, tempfile, shutil
import re, string, time
import md5
       
# $Id: dap_nc_swamp,v 1.66 2007/03/08 19:37:32 wangd Exp $
# This is:  -- a wrapper-replacement for dap_nc_handler used by opendap.
#           -- It provides a script handling facility, but redirects most
#              normal calls to the original dap_nc_handler

### Temporary (1/31/07) TODO
# - pick commonly available paths as defaults in configuration.

# we potentially have to handle: das, dds, dods, ascii, info
# das -- ??
# dds -- ??
# dods -- return dods data. This is the one we care about.
# ascii -- does dods mode, with -e and no url
# info -- does das/dds sequence
# html -- calls das/dds to obtain var info, then gives html form
# version -- doesn't seem to call the handler: opendap core version

#options: -o <response>: DAS, DDS, DataDDS, DDX, BLOB or Version (Required)
#          -u <url>: The complete URL minus the CE (required for DDX)
#          -c: Compress the response using the deflate algorithm.
#          -e <expr>: When returning a DataDDS, use <expr> as the constraint.
#          -v <version>: Use <version> as the version number
#          -d <dir>: Look for ancillary file in <dir> (deprecated).
#          -f <file>: Look for ancillary data in <file> (deprecated).
#          -r <dir>: Use <dir> as a cache directory
#          -l <time>: Conditional request; if data source is unchanged since
#                     <time>, return an HTTP 304 response.
#
#          -L : skip printing of http headers. **** for easier testing
#
# "Simple" test: 
# ./dap_nc_handler_hack --local full_resamp.ssdap --with-db 
######################################################################
# Administrator-tunable parameters: (overriden by config file)
######################################################################
class local:
    disableLogging = False
    logLocation = "/var/log/ssdap.log"

    #ncBinPath = os.environ["MY_BIN_DIR"]
    nc4BinPath = "/usr/local/bin"
    ncDapBinPath = "/usr/local/bin"

    # pass original DAP commands to the original dap handler.
    passthroughPath = "/usr/local/bin/dap_nc_handler"

    dbFilename = None
    disableHTTP = True # Recommend: True (until client code is fix'd)
    # this is for command-line debugging

    disableParallel = True

    dashPath = None
    useDashHelper = False

    scratchPath = None
    resultPath = None

    nslots = 1
    #runtime variables
    logfile = None
    mypid = 0
    if sys.version_info[:2] < (2,4): # 2.4 adds @staticmethod syntax
        print """You have python version %s, and >=2.4 is required.
""" %(sys.version)
        sys.exit(1)
        
######################################################################
# User/Administrator notes:
######################################################################
# this script switches between using DAP-aware or NetCDF4-capable NCO
# binaries depending on if an OPeNDAP url is detected in a script
# line. An opendap url is currently detected by looking for an
# http:// prefix.
#
# This script looks for a config file "swamp_handler.conf",
# located in the same directory as itself (via checking argv[0]).   


######################################################################
# this class should stay identical in client/server.  If it gets big,
# we should split it into some python module to be imported.
class SsdapCommon:
    """stuff that should be identical between client and server code"""
    parserShortOpt = "4Aa:Bb:CcD:d:FfHhl:Mmn:Oo:Pp:QqRrs:S:s:t:uv:w:xY:y:"
    parserLongOpt = ["4", "netcdf4", "apn", "append",
                     "attribute=", #ncatted, ncrename
                     "avg=", "average=" #ncwa
                     "bnr", "binary",
                     "fl_bnr=", "binary-file=",
                     "crd", "coords",
                     "nocoords", "dbg_lvl=", "debug-level=",
                     "dmn=", "dimension=", "ftn", "fortran",
                     "huh", "hmm",
                     "fnc_tbl", "prn_fnc_tbl", "hst", "history",
                     "Mtd", "Metadata", "mtd", "metadata",
                     "mmr_cln", # only on recent NCO release
                     "lcl=", "local=",
                     "nintap",
                     "output=", "fl_out=",
                     "ovr", "overwrite", "prn", "print", "quiet",
                     "pth=", "path=",
                     "rtn", "retain", "revision", "vrs", "version",
                     "spt=", "script=", "fl_spt=", "script-file=",
                     "sng_fmt=", "string=",
                     "thr_nbr=", "threads=", "omp_num_threads=",
                     "xcl", "exclude",
                     "variable=", "wgt_var=", "weight=", 
                     "op_typ=", "operation=" ]

    # special handling for ncap's parameters
    ncapShortOpt = parserShortOpt.replace("v:","v")
    ncapLongOpt = parserLongOpt[:]
    ncapLongOpt.remove('variable=')
    ncapLongOpt.append('variable')

    ncpackShortOpt = parserShortOpt.replace("M","M:")
    ncpackShortOpt = ncpackShortOpt.replace("P","P:")
    ncpackShortOpt = ncpackShortOpt.replace("u","Uu")
    
    ncpackLongOpt = parserLongOpt[:]
    ncpackLongOpt.extend(['arrange','permute','reorder', 'rdr',
                         'pck_map', 'map', 'pck_plc','pack_policy',
                         'upk', 'unpack'])
    ncksShortOpt = parserShortOpt.replace("a:","a")
    ncksLongOpt = parserLongOpt[:]
    ncksLongOpt.extend(['abc', 'alphabetize'])

    ncflintShortOpt = parserShortOpt.replace("hl","hi:l")
    ncflintLongOpt = parserLongOpt[:]
    ncflintLongOpt.extend(['ntp', 'interpolate'])

    # special handling for ncwa's params (mask-related)
    # B: msk_cnd= mask_condition= (rplc)
    # b rdd degenerate-dimensions (rplc)
    # I: wgt_msk_crd_var= 
    # M: msk_val= mask_value=  mask-value= (rplc)
    # m: msk_nm= msk_var= mask_variable= mask-variable= (rplc)
    # N nmr numerator
    # T: mask_comparitor= msk_cmp_typ= op_rlt=
    ncwaShortOpt = parserShortOpt.replace("Bb:","B:b")
    ncwaShortOpt = ncwaShortOpt.replace("h","hI:")
    ncwaShortOpt = ncwaShortOpt.replace("Mmn:","M:m:Nn:")
    ncwaShortOpt = ncwaShortOpt.replace("t:","T:t:")
    ncwaLongOpt = parserLongOpt[:]
    ncwaLongOpt.extend(["msk_cnd=", "mask_condition=",
                        "rdd", "degenerate-dimensions",
                        "wgt_msk_crd_var=", 
                        "msk_val=", "mask_value=",  "mask-value=",
                        "msk_nm=", "msk_var=", "mask_variable=",
                        "mask-variable=",
                        "nmr", "numerator",
                        "mask_comparitor=", "msk_cmp_typ=", "op_rlt="])
    
    @staticmethod
    def specialGetOpt(cmd, argvlist):
        #consider special-case for ncwa ncflint -w: option
        # wgt_var, weight also for ncflint/ncwa
        if cmd == "ncap": # ncap has a different format
            return getopt.getopt(argvlist,
                                 SsdapCommon.ncapShortOpt,
                                 SsdapCommon.ncapLongOpt)
        elif cmd in ["ncpdq", "ncpack", "ncunpack"]:
            # ncpdq/ncpack/ncunpack have a different format too
            return getopt.getopt(argvlist,
                                 SsdapCommon.ncpackShortOpt,
                                 SsdapCommon.ncpackLongOpt)
        elif cmd == "ncks":
            return getopt.getopt(argvlist,
                                 SsdapCommon.ncksShortOpt,
                                 SsdapCommon.ncksLongOpt)
        elif cmd == "ncflint":
            return getopt.getopt(argvlist,
                                 SsdapCommon.ncflintShortOpt,
                                 SsdapCommon.ncflintLongOpt)
        elif cmd == "ncwa":
                        return getopt.getopt(argvlist,
                                 SsdapCommon.ncwaShortOpt,
                                 SsdapCommon.ncwaLongOpt)
        else:
            return getopt.getopt(argvlist,
                                 SsdapCommon.parserShortOpt,
                                 SsdapCommon.parserLongOpt)
    pass

############################################################
# config file reader

def readConfigFile(fname = "swamp_handler.conf"):
    # can add other mappings from config file to local settings"
    # set some defaults.
    # form: (local varname, section, item, defaultvalue)
    cfgmap = [("disableLogging", "log", "disable", False),
              ("logLocation",    "log", "location",
               "/home/wangd/opendap/iroot/ssdap.log"),

              ("nc4BinPath", "targets", "netcdf4nco",
               "/home/wangd/opendap/iroot/nco_ncdf4_bin"),
              ("ncDapBinPath", "targets", "opendapnco",
               "/home/wangd/opendap/iroot/nco_dap_bin"),
              ("scratchPath", "targets", "scratchPath", None),
              ("resultPath", "targets", "resultPath", None),
              ("passthroughPath", "targets", "passthroughPath",
               "/home/wangd/opendap/iroot/bin/dap_nc_handler"),

              ("dbFilename", "database", "dbFilename", None),

              ("disableHTTP", "global", "disableHTTP", True),
              ("disableParallel", "global", "disableParallel", True),
              ("nslots", "global", "nslots", 2),

              ("sshSpawn", "experimental", "sshSpawn", None)
              ]
    remap = {"True" : True, "False" : False, "Yes" : True, "No" : False }

    try:
        import ConfigParser
        config = ConfigParser.ConfigParser()
        if len(os.path.split(fname)[0]) == 0:
            # look in the same place as the script is located...
            # should I check current working directory instead?
            filepath = os.path.join(os.path.split(sys.argv[0])[0],
                                    fname)
        else: filepath = fname
        logit("reading configfile %s" % (filepath))
        config.read(filepath)

        for m in cfgmap:
            if config.has_option(m[1], m[2]):
                val = config.get(m[1],m[2])
                if val in remap: val = remap[val]
                setattr(local, m[0], val)
                # uncomment if you are curious about cfgfile reading
                #logit("cfgfile: setting local." + m[0] + " " +str( val))

        # dump entire config file (to remember the interface)
        #for section in config.sections():
        #    print section
        #    for option in config.options(section):
        #        print " ", option, "=", config.get(section, option)
        # fix type for nslots.
        if type(local.nslots) is not int:
            local.nslots = int(local.nslots)
        ScriptingEnv.fixConfig()
        pass
    except AttributeError:
        # no config file, probably safe to silently ignore
        pass

######################################################################
# setup some logging
######################################################################
def prestartLogging():
    """start logging using a fake file.  Dump contents to a real log after
    the log file location is decided."""
    local.mypid = os.getpid()
    if not local.disableLogging:
        import cStringIO
        local.logfile = cStringIO.StringIO()
        p = str(local.mypid)
        local.logfile.write("SSDAP/SWAMP dap_nc_swamp %s\n" % "$Revision: 1.66 $")
        local.logfile.write("handler "+p+" log open at " + time.ctime()+"\n")

def startLogging():
    """Officially start the logging system."""
    if not local.disableLogging:
        lastcontents = local.logfile.getvalue()
        #must overwrite local.logfile so we know we're out of prestart
        local.logfile = open(local.logLocation, "a")
        local.logfile.write(lastcontents)
        local.logfile.flush()
        local.logfile.close()
        # do not keep file open --> parent/child fork issues
        #local.logfile.write("log located at " + local.logLocation + "\n")

def logit(s):
    """Add the string s to the current logfile."""
    if not local.disableLogging:
        import cStringIO
        formatted = str(local.mypid) + "==" + s + "\n"
        # check if we're still in prestart
        if type(local.logfile) == cStringIO.OutputType:
            local.logfile.write(formatted)
        elif local.logfile is not None:
            lf  = open(local.logLocation, "a")
            lf.write(formatted)
            lf.flush()
            lf.close()
        pass
    #print s
    pass
def logflush():
    """Force a flush of the log.  (deprecated)
    No need to call this anymore since writes always flush immediately
    in anticipation of contention from multiple process writers. """
    return # no need to flush anymore: close file after every write.
    if not local.disableLogging:
        #local.logfile.flush()
        pass
    pass

######################################################################
# Some globals to aid in profiling.
# can put things in here so that they can be accessed in global scope
profileVar = {}  

######################################################################
class Command:
    """The Command class is an abstraction for a single script command
    to be executed.  Each line in the script line maps to approximately
    one command.  The Command class depends on the scripting environment
    to provide constants in generating a command line."""
    def __init__(self, line, lineNum, scriptenv):
        self.original = line
        self.lineNum = lineNum
        self.isMeta = False
        self.env = scriptenv
        self.cmdLine = None #empty commmandline for now
        self.myName = ""
        self.cmd = None
        self.dependents = []
        self.ancestors = []
        self.absOutputs = []
        self.absInputs = []
        self.inPrefix = ""
        self.saveOutput = False
        self.popen = os.popen2
        if False: # defer these to the command factory.
            self.parseOriginalLine()
            self.buildCommandLine()
        pass

    def commandLine(self):
        return self.cmdLine
    def addChild(self, comm):
        assert isinstance(comm, Command)
        # update self
        if comm not in self.dependents:
            self.dependents.append(comm)
            #update child
            comm.ancestors.append(self)
        # following check is only for sanity.
        #if self not in comm.parents(): 

    def addParent(self, comm):
        assert isinstance(comm, Command)
        # update self
        if comm not in self.ancestors:
            self.ancestors.append(comm)
        # update parent
        if self not in comm.children():
            comm.dependents.append(self)


    def children(self):
        return self.dependents
    def parents(self):
        return self.ancestors
    def outputs(self):
        """ return a list of absoutputfiles """
        return self.absOutputs
    def inputs(self):
        """ return a list of absinputfiles """
        return self.absInputs


    # "private" functions

    def parseOriginalLine(self):

        argv = shlex.split(self.original, True) # True--> shlex eats the comments

        if len(argv) < 1:
            return # nothing to parse

        mycmd = argv[0].strip() # drop leading/trailing whitespace
        if mycmd[0] == "#": #comment: nothing to parse.
            return
        valids = ScriptingEnv.META_COMMANDS + ScriptingEnv.NCO_COMMANDS
        if mycmd not in valids:
            print "WARN: bad script line: " + self.original
            return
        self.myName = mycmd + str(len(self.env.commands))
        self.cmd = mycmd
        if self.cmd in ScriptingEnv.META_COMMANDS:
            self.parseMetaCmd(argv[1:])
        else:
            self.parseNormalCmd(argv[1:])

    def parseMetaCmd(self, argv):
        """ deprecate in commandfactory version"""
        metamap = {"ssd_initsave" : self.cmdInitSave,
                   "ssd_save" : self.cmdSave,
                   "ssd_poll" : self.cmdPoll,
                   "ssd_retrieve" : self.cmdRetrieveOrBlock,
                   "ssd_retrievenoblock" : self.cmdRetrieve }
        logit("trying meta cmd " + self.cmd)
        metamap[self.cmd](argv)
        self.isMeta = True
        pass

    
    def __str__(self):
        return "Command (" + self.myName + ") with cmdline: " + self.cmdLine

    def parseNormalCmd(self, argv):
        self.isMeta = False
        try:
            (arglist, leftover) = SsdapCommon.specialGetOpt(self.cmd, argv)
        except getopt.GetoptError:
            import traceback
            logit("Error parsing(getopt) command: " + self.original)
            logit(traceback.format_exc())
            raise ValueError, 'Command.parse error'
        argdict = dict(arglist)
        self.acceptModifiers(argdict, arglist, leftover)
        self.acceptInOut(argdict, arglist, leftover)
        self.fixupDependencies() # we use the deptree to reduce db load
        self.arguments = arglist
        pass

    def setOutputModifiers(self, adict, alist, leftover):
        return self.acceptModifiers(adict, alist, leftover)
    def acceptModifiers(self, adict, alist, leftover):
        """accept special things like %stdouterr% and %stdout% so that
        specify how to handle a particular line."""
        def selectPopen2():
            self.popen = os.popen2
            self.absOutputs.append(ScriptingEnv.SCRIPT_STDOUT)
            self.saveOutput = True
        def selectPopen4():
            self.popen = os.popen4
            self.absOutputs.append(ScriptingEnv.SCRIPT_STDOUTERR)
            self.saveOutput = True
        # yes, I could replace the above with a single def that
        # generated the right function, but why bother?

        # this is a table of what can be matched, and what to do if
        # there's a match.
        redirTable = {ScriptingEnv.SCRIPT_STDOUTERR : selectPopen4,
                      ScriptingEnv.SCRIPT_STDOUT: selectPopen2}
        # check hangers-on on the tail of the line for modifiers.
        # for now, process last to first, until we get a lot of modifiers
        # and have a chance for actual conflict/precedence
        if len(leftover) > 0:
            while leftover[-1] in redirTable:
                redirTable[leftover.pop()]()
                # pop off the back of the list, and then do the right thing.
                if not leftover: break

        pass


    def acceptInOut(self, adict, alist, leftover):
        # look for output file first
        ofname = ""
        for x in  ["-o", "--fl_out", "--output"]:
            if x in adict:
                assert ofname == ""
                keys = [y[0] for y in alist]
                o = alist.pop(keys.index(x)) # o is a tuple.
                ofname = adict.pop(x)
                assert o[1] == ofname
                
        if ofname == "":
            # don't steal output if it's actually the input.
            if len(leftover) <= 1:
                # only one leftover...leave it to be captured by the inputter
                if len(self.absOutputs) == 0:
                    logit("warning, unbound output for " + self.original)
            else:
                ofname = leftover[-1] # take last arg
                leftover = leftover[:-1] # and drop it off

        if ofname != "": # should have a real filename (or placeholder)
            self.absOutputs.append(ofname)
            self.env.toutlist.append(ofname)
        self.absInputs += leftover

        self.env.tinlist.update(map(lambda x:(x,1),leftover))
        if "-p" in adict:
            self.inPrefix = adict["-p"]
            keys = [x[0] for x in alist]
            p = alist.pop(keys.index("-p"))
            adict.pop("-p")
        self.patchNcapInscript(adict,alist,leftover)
        pass
    
    def patchNcapInscript(self, adict, alist, leftover):
        """ncap is a special command that has an input script.
        If we are passed a filename, we should look for it in a particular
        place"""
        # this exists to solve:
        # what about server side script?  The 1st ncap regression calls
        # for the reading of a script from a file instead of supplying it
        # in the commandline.
        # ncap -h -O $fl_fmt $nco_D_flg -v -S ncap.in $in_pth_arg in.nc %tempf_00%";
        #                                  ^^^^^^^^^^
        # Can you capture/parse  the '-S scriptfile' and search for the
        # script file in the specified '-p dir'? 


        if "ncap" != self.cmd:
            return # exit if it's not ncap
        if "-S" in adict:
            newname = self.addInfilePrefix(adict["-S"])
            keys = [x[0] for x in alist]
            alist[keys.index("-S")] = ('-S', newname)
            adict["-S"] = newname
        pass
        
    def fixupDependencies(self):
        # update myself in the table
        for o in self.absOutputs:
            # virtual outputs (e.g. stdout/stderr)
            # will be overwritten here, but that's okay.
            self.env.commandsByOutput[o] = self
        for i in self.absInputs:
            if i in self.env.commandsByOutput:
                self.env.commandsByOutput[i].addChild(self)

        # don't do the other checking until we decide to track those deps
        # for each output, check previously parsed for WAW hazards
        # write-after-write is an output dependency
        # also check for WAR: anti-dependency
        #for x in self.absOutputs: # probably only one output...
        #    for p in previous:

    def safeRemove(self, fname):
        if os.access(fname, os.F_OK | os.W_OK):
            os.remove(fname)

    def buildCommandLine(self):
        line = ""
        needsDap = False
        if self.cmd == None: # don't build cmdline for non-cmds
            return
        # prefix cmd at the end.
        #line += ScriptingEnv.VALID_COMMANDS[self.cmd]
        
        #print >>sys.stderr, "building cmd: ", self.cmd
        for (k,v) in self.arguments:
             #special value handling for --op_typ='-'
             needsProt = False
             safe = string.letters + string.digits + "%"
             for x in v: needsProt |= (x not in safe)

             # add = for long options? is this necessary?
             k1 = k
             #if len(k) > 2: k1 = k + "="


             if needsProt and not ("'" == v[0] == v[-1]):
                 line += " " + k1 + " '" + v + "'"
             elif len(v) > 0: line += " " + k1 + " " + v
             else:            line += " " + k1
         # then pass commands
        filelist = []
        for name in self.absInputs:
            filelist.append(name)
        for name in self.absOutputs:
            if name in [ ScriptingEnv.SCRIPT_STDOUTERR,
                          ScriptingEnv.SCRIPT_STDOUT ]:
                continue
            filelist.append(name)
        for name in filelist:
            remaptype = self.env.shouldRemap(name)
            if remaptype:
                remapped = self.env.addAbsName(name, remaptype)
                self.safeRemove(remapped) # kill the file to let nco write.
                assert remapped != None
                line += " " + remapped
            elif self.env.possibleFilename(name):
                # add defpath for now.
                newname = self.addInfilePrefix(name)
                line += " " + newname
            else:
                needsDap |= self.env.requiresDap(name)
                line += " " + name
        if needsDap:
             self.cmdLine = ScriptingEnv.VALID_DAPCOMMANDS[self.cmd] + " " + line
        elif self.isMeta:
            self.cmdLine = "##echo Placeholder: meta: " + self.cmd
        else:
             self.cmdLine = ScriptingEnv.VALID_COMMANDS[self.cmd] + " " + line
        #logit( "real cmdline: " + self.cmdLine)
        pass
    def addInfilePrefix(self, name):
         n = None
         if self.inPrefix: n = os.path.join(self.inPrefix,name)
         else: n = name                    
         return os.path.join(self.env.env["defpath"], n)
    #deprecated...
    def cmdInitSave(self, argv):
         # want to close CGI connection, and then continue
         # Return a task id that can be queried later.
         tid = self.taskId()
         self.env.env["async"] = True;
         # open a db connection
         #o = self.env.instanceJobPersistence()
         # lazy-opening of db.
         # assert o != None
         self.arguments = [];
         return

    def cmdSave(self, argv):
        raise StandardError("unsupported call path")
    def cmdPoll(self, argv):
        raise StandardError("unsupported call path")
    def cmdRetrieve(self,argv):
        raise StandardError("unsupported call path")
    def cmdRetrieveOrBlock(self,argv):
        raise StandardError("unsupported call path")

    def execute(self, scrEnv):
         """execute myself in a certain scripting environment"""
         # open question: is it better to pass functions than the whole object?
         # FIXME
         # should we pull in the execution functionality in here?
         pass 

    def taskId(self):
        """returns: taskid for the script.

        The taskId (statistically) uniquely identifies the script so
        that it is not confused with another script with different
        content or submission time.  Submission time is uniquified by
        using the PID of the parsing process.  Might consider not
        caring about PID when we want to
        cache results from identical scripts."""
        try: return self.cachedTaskId
        except:
             import struct
             # get fingerprint from env
             digest = self.env.fingerprint()
             # munge the fingerprint to reduce clashes...
             # PIDs will be recycled, so this needs more work
             newdig = md5.md5(digest + str(os.getpid())).digest()
             # take first 4 bytes, convert to hex, strip off 0x and L
             ## assume int is 4 bytes. works on dirt (32bit) and tephra(64bit)
             assert struct.calcsize("I") == 4 
             taskid = hex(struct.unpack("I",newdig[:4])[0])[2:-1]
             #print >>sys.stderr, "taskid = ",taskid
             self.cachedTaskId = taskid
             return self.cachedTaskId
        pass

######################################################################
class DepTracker:
     """DepTracker is a class that encapsulates logic that understands
     the concept of job readiness and dependencies"""

     # since python has "managed variables" we can add get/set
     # accessor/mutators later without disrupting client code

     #for now, runnableCmds is public
     def __init__(self, cmdlist):
         self.execList = cmdlist[:]
         self.nextCmdIndex = 0 # where are we in issuing jobs?
         self.completedCmds = [] # which jobs have completed?
         self.availableData = [] # which values are available?
         self.pendingCmds = []
         self.runnableCmds = []
         self.updateRunnable()
         pass

     def markCompleted(self, cmd):
         self.availableData.append(cmd.outputs)
         self.completedCmds.append(cmd)
         self.pendingCmds.remove(cmd)
         self.updateRunnable()
         pass

     def markRunning(self, cmd):
         assert cmd in self.runnableCmds
         self.pendingCmds.append(cmd)
         self.runnableCmds.remove(cmd)
         pass
     def done(self):
         """done means that all cmds have completed.  There are no jobs
         runnable or pending.  All results are available. completedCmds
         should have the same length as the original execList."""
         idle = (not self.pendingCmds) and (not self.runnableCmds)
         reallydone = len(self.execList) == len(self.completedCmds)
         return idle and reallydone
     # private
     def updateRunnable(self):
         elist = self.execList
         idx = self.nextCmdIndex
         cmd = elist[idx]
         while self.isRunnableCmd(cmd):
             self.runnableCmds.append(cmd)
             idx += 1
             cmd = elist[idx]
         self.nextCmdIndex = idx
         pass
     def isRunnableCmd(self, cmd):
         for i in cmd.inputs:
             if i not in self.availableData:
                 return False
         return True

##########################################################################
# JobPersistence : modularize the metadata info managment code
# we will use SQLite
##########################################################################
try:
    from swamp_dbutil import JobPersistence
except ImportError:
    # if the import fails, we will get a symbol error when
    # we try to use ssdap_dbutil symbols.
    pass


        

######################################################################
# This class should hold the state used by a single script
# Might be worthwhile to split out constants/data shared
#  between scripts instead of letting them be class shared data.
######################################################################
class ScriptingEnv:
    # constants (but not const-enforced by python)
    SCRIPT_OUTFILE = "%outfile%"
    SCRIPT_STDOUTERR = "%stdouterr%"
    SCRIPT_STDOUT = "%stdout%"
    # note that it's a bad idea to have outputfile set, and one of the
    # console output catchers as well.  you're on your own for
    # deciphering the file out of that mess.

    CLASS_TEMP = "tempf"
    CLASS_SEP = "_"
    CLASS_OUT = "outfile"
    MAGIC_DELIM = "%"
    CLASS_TEMP_PREFIX = MAGIC_DELIM + CLASS_TEMP
    NC_SUFFIX = ".nc"
    NCO_COMMANDS = ["ncap", "ncatted", "ncbo", "ncdiff",
                    "ncea", "ncecat", "ncflint", "ncks",
                    "ncpack", "ncpdq", "ncra", "ncrcat",
                    "ncrename", "ncunpack", "ncwa"]
    #print >>open("/tmp/foo1","a"), dir(local)
    # for now, all commands are valid in netcdf4 or over dap
    VALID_COMMANDS=dict((x, local.nc4BinPath+os.sep+x) for x in NCO_COMMANDS)
    VALID_DAPCOMMANDS=dict( (x, local.ncDapBinPath+os.sep+x) for x in NCO_COMMANDS)

    # room to add other commands here.
    META_COMMANDS = ["ssd_initsave", "ssd_save", "ssd_poll", "ssd_retrieve"]


    COMMENT_CHAR = "#"  # not used right now.

    def __init__(self, scriptlines, settingsDict):
        assert type(scriptlines) == type([])
        assert type(settingsDict) == type({})
        self.fileMap = {}
        self.outTokens = []
        self.toutlist = []
        self.tinlist = {}
        self.env = settingsDict
        self.lines = scriptlines
        # save lines of output for each type of output.
        # direct child output to /dev/null unless requested
        #self.outFd = os.devnull
        #self.popen = os.popen2 ## let's deprecate this... FIXME!
        
        # make commandlines from each scriptline
        self.commands = []  # list of commands (inc. comments)
        #WARN: commands is read by the command builder
        self.commandsByOutput = {}
        #WARN: so we can find commands by their absOutput filename
        self.commandLine = [] # list of commandlines (no comments)
        # keep own table of linenum -> cmd, to allow non-db
        # dependency processing.
        self.lineToCommand = {}
        lineNum = 0
        # None --> script not in DB; integer-> row in tasks table
        self.persistedTask = None 

        class Persister: # use this to wrap access to persistence since
            # python doesn't have real function closures
            def __init__(self, instFunc, persFunc):
                self.instFunc = instFunc
                self.persFunc = persFunc
                self.trans = None
            def do(self, command):
                if self.trans is None:
                    jp = self.instFunc()
                    self.trans = jp.newPopulationTransaction()
                    #print "persisting ",str(command)
                self.persFunc(command, self.trans)
            def close(self):
                if self.trans is not None:
                    self.trans.finish()
                    self.trans = None
        p = Persister(self.instanceJobPersistence, self.persistCommand)
        cf = CommandFactory(p, self)
        cf.addScriptLines(scriptlines)
        cf.finalize()
        p.close()
        self.commandLine = cf.commandLines
        self.commands = cf.commandList
        self.isAsync = cf.isAsync
        local.comfact = cf
        #logit(self.depTreeToString()) # for debug...

        return
        self.isAsync = self.env.has_key("async") and self.env["async"] == True
        pop = None
        for line in scriptlines:
            lineNum += 1
            c = Command(line, lineNum, self)
            self.isAsync = self.env.has_key("async") and self.env["async"] == True
            self.lineToCommand[lineNum] = c
            if self.isAsync and (c.cmd is not None) and not c.isMeta:
                if pop is None:
                    j = self.instanceJobPersistence()
                    pop = j.newPopulationTransaction()
                    assert pop is not None
                self.persistCommand(c, pop)
            self.commands.append(c) # append command, regardless of parsing
            if c.commandLine() != None:
                 self.commandLine.append(c)
            pass # done looping through lines
        if pop is not None:
             pop.finish()
             pop = None

        # for performance, skip dep tree stuff since it's not needed
        # in the db model
        #logit(self.depTreeToString()) # for debug...
         
        pass
    @staticmethod
    def fixConfig():
         j = os.path.join
         c = ScriptingEnv.NCO_COMMANDS
         ScriptingEnv.VALID_COMMANDS=dict((x, j(local.nc4BinPath,x)) for x in c)
         ScriptingEnv.VALID_DAPCOMMANDS=dict( (x, j(local.ncDapBinPath,x)) for x in c)


    def shouldRemap(self, absName):
         # return bool indicating whether this word should be xlated.
         # keep simple for now: check a list.
         # first, filter out %...% delimiters
         if not (absName[0] == absName[-1] == ScriptingEnv.MAGIC_DELIM):
             return False  # do not remap these.
         symname = absName[1:-1] # clip ends
         #split, but only once, so we don't destroy the rhs's underscores
         fields = string.split(symname, ScriptingEnv.CLASS_SEP,1)
         if len(fields) == 2:
             # check for tempname
             if fields[0] == ScriptingEnv.CLASS_TEMP:
                 return "temp"
             if fields[0] == ScriptingEnv.CLASS_OUT:
                 return "output"
         if absName == ScriptingEnv.SCRIPT_OUTFILE:
             return "output"
         return False
    def possibleUrl(self, cand):
         """return a sre.SRE_Match object if input matches our primitive regex
         otherwise, return None"""
         return re.match('^\w+://', cand)

    def requiresDap(self, word):
         return word[:7] == 'http://'


    def possibleFilename(self, cand):
         # leave urls alone.
         if self.possibleUrl(cand): return False

         elts = string.split(cand, ".")
         # originally, we looked for file.ext, but this is restricting since
         # file conventions have name.info.info.ext commonly.
         if elts[-1] != "nc": # only going to detect .nc
             return False
         if elts[0][0] == os.sep: # do not detect with leading /
             return False
         return True


    def addAbsName(self, absName, fType):
         """ return a concrete name from an abstract name"""
         if self.fileMap.has_key(absName):
             return self.fileMap[absName][1] # reuse mapped name
         alnum = string.letters + string.digits
         fixName = ""
         for i in absName:
             if i in alnum: fixName += i
         path = {"temp" : self.env["tempdir"],
                 "output" : self.env["resultdir"]
                 }[fType]
         filepair = tempfile.mkstemp( ScriptingEnv.NC_SUFFIX,
                         fixName, path )
         # filepair = (descriptor, tempfilename)
         self.fileMap[absName] = filepair
         # close the file, so the nco util can touch it
         os.close(filepair[0])
         return filepair[1]

    def execute(self):
         #self.clearTemps() # don't need to preclear, since we do that earlier.
         self.isParent = True

         logflush()
         for l in self.commandLine:
             #print l
             pass
         # check to make sure we don't have both an output file and outstderr
         # make sure we're doing exactly one thing with our output
         havefile = self.fileMap.has_key(ScriptingEnv.SCRIPT_OUTFILE)
         # outbypass = (self.outFd != os.devnull)
         # assert havefile ^ outbypass
         # here, fork off:
         # Parent: write persistent state file (or to DB), report metatag to stdout
         # Child: continue execution, but do not return output to stdout
         logit("dep resolution")
         temps = filter(lambda x: x in self.tinlist, self.toutlist)
         print >>open("/tmp/foo1","a"), temps
#          for f in temps:
#              if isnotremapped(f):
#                  remap(f)
#                  rebuildaffectedcommandlines()
         
         logit("dep finish")
         if self.isAsync:
             self.asyncExecute()
         else:
             self.serialExec()
         pass

    def serialExec(self):
        #print self.commands
        for c in self.commands:
            logit("Starting " + str(c.cmdLine))
            self.markCmdStart(c)
            self.runCmdRaw(c)
            self.markCmdFinish(c)
            logit("Finished cmd exec")
        pass
    def runCmdRaw(self, cmd):
         outFd = os.devnull
         if cmd.saveOutput == True:
             outFd = sys.stdout
         # last chance to (re)build commandline
         # don't know if file is leaf unless peeking at future
         # so... tempfile detect should be done after all parse, but
         # remapping and command-line gen can take place just-in-time
         # before exec.
         (sin,sout) = cmd.popen(cmd.commandLine())  # use the mapped popen
         sin.close() # close the input, so it flushes
         #out handle is now ready to be read.
         #for x in sout:
         #   self.outStream.append(x)
         #   logit( "execution output: " + x)
         # skip logging the output, and just dump to outstream.

         if outFd == os.devnull:
             fd = open(outFd, "w")
             shutil.copyfileobj(sout, fd)
             fd.close()
         else:
             shutil.copyfileobj(sout, outFd)

    def depExec(self):
        """an in-order multi-issue(not yet!) command scheduler"""
        dept = DepTracker(self.commandLine) # track deps
        while not dept.done():
            # assume infinite issue width for now
            while dept.runnableCmds:
                cmd = dept.runnableCmds[0]
                dept.markRunning(cmd)
                # run/spawn cmds
                self.runCmdRaw(cmd)
                dept.markCompleted(cmd)
            # check for completed jobs. how?

            #here, we should sleep, since there are no runnable cmds
            #before we check again for runnables.
            time.sleep(1)
        #we should be done
    def dbDepExec(self, nslots=1):
         """Run the script in parallel with dependencies derived from db
         nslots indicates the maximum process issue width (# max in flight)"""
         prep = self.instanceJobPersistence().newPreparationTransaction()
         logit("filling the readyList")
         prep.execute()
         prep = None
         logit("readyList ready")
#         j.showReadyList(self.persistedTask)
#         return
         self.instanceJobPersistence().close() # force closing before fork.
         self.sleepTime = 1+((nslots-1)/4)
         tospawn = nslots-1
         logit("ready to spawn %d workers" % (tospawn))
         for i in range(tospawn):
             pid = os.fork()
             if pid > 0: continue # parent continues to fork again
             else:
                 local.mypid = os.getpid()
                 break # child exits the loop to start work
         self.dbExecWorker() # get some work done

    def dbFetchJob(self):
        """Try to fetch a job from the db.  If none exist, keep trying,
        but if task is done, return None.
        Return: tuple: (commandline, outputfilename, linenum)
        """
        (cline, outname, linenum) = (None,None,None)
        while True:
            try:
                logit("worker checking for job " + time.asctime())
                j = self.instanceJobPersistence()
                fetch = j.newFetchAndLockTransaction()
                tup = fetch.executeBlocking()
                ##logit("got "+str(tup)+" from db") # Debug ----
                fetch = None
                (cline, outname, linenum) = tup
                break
            except TypeError:
                poll = self.instanceJobPersistence().newPollingTransaction()
                jobsLeft = poll.cmdsLeft()
                poll = None
                if jobsLeft > 0:
                    time.sleep(2*self.sleepTime)
                    # logit("worker sleeping for work" + time.asctime())
                    continue
                logit("worker exit: no more work")
                break
            except:
                import traceback
                logit("unknown exception "+str(traceback.format_exc()))
                break
            #should not get here.
            return (None,None,None)
        return (cline,outname,linenum)
    def workerCommandSpawn(self, command):
        
        pass
    def workerJobSpawn(self, cline):
        import subprocess
        
        # At this point, consider marking a running state, so that
        # workers can see each others' jobs and avoid duplication.
        retcode = None
        try: # derived from: http://pydoc.org/2.4.1/subprocess.html
            logit("worker spawning %s" % (cline))
            retcode = subprocess.call(cline, shell=True)
            if retcode < 0:
                print >>sys.stderr, "Error code %d in %s" %(retcode,cline)
                logit("Failed %s --> %s, code %d " %(cline,outname, retcode))
                return None
            pass
        except OSError, e:
            print >>sys.stderr, "Failed %s --> %s, " %(cline,outname)
            print >>sys.stderr, e
            logit("Failed %s --> %s, " %(cline,outname))
            return None
        return retcode
    def workerRemoteJobSpawn(self, cmd):
        """Spawn a command remotely"""
        assert local.sshSpawn is not None
        #local.sshSpawn should be something like "ssh -x localhost"
        # it won't really work if ssh keys aren't setup.
        clist = local.sshSpawn.split()
        clist.extend(cmd.cmdLine.split())
        logit("will run "+ " ".join(clist))
        #some work here to merge command line with the clist.
        # should probably just make a command line, quote it, and send
        # it to ssh.
        # need: some work to make sure we can build a commandline at this point
        # since we will need to, when filename maps are deferred.
        return 0 # FAKE for now.
        import subprocess
        subprocess.call(clist, shell=False) # for safety, don't treat as shell
        pass
    
    def depFindInputs(self, linenum):
        # now, find my input files
        me = self.lineToCommand[linenum]
        # find my inputs (and get their concretenames)
        #inputs = map(lambda f: self.fileMap[f], me.inputs())
        inputs = []
        for p in me.parents():
            # assume single output for cmdlines
            outs = p.outputs()
            assert len(outs) == 1
            # fileMap carries tuples of (filedescriptor,concretename)
            concretename = self.fileMap[outs[0]][1]
            # find out how many children the parent has.
            count = len(p.children()) 
            inputs.append((concretename,count))
        return inputs
    def dbExecWorker(self):
        """Main worker function for running jobs/cmds.
        10/18/06-- need to refactor into nicer pieces."""
        logit("beginning worker loop")
        (cline,outname,linenum) = (None, None, None)
        jobsrun = 0
        while True:
            if cline is None:  #otherwise, we have a job ready!
                (cline,outname,linenum) = self.dbFetchJob()
                #either we have a job, or there is none left.
                if not cline:
                    break

            if False:
                if self.workerJobSpawn(cline) is None:
                    break
            else:
                cmd = self.lineToCommand[linenum]
                logit("found cmd " + cmd.cmdLine)
                if self.workerRemoteJobSpawn(cmd) is None:
                    break
                
            inputs = self.depFindInputs(linenum)
            # pass the inputs down to the db layer and implement
            # deletion-tracking logic there.  it violates original
            # design intent, but makes for simpler code.  when we
            # know differently, then rethink (maybe pass a function
            # object down to the persistence layer to encapsulate
            # the deletion handling logic.
            jobsrun = jobsrun + 1
            j = self.instanceJobPersistence()
            #if False:
                #cmt = j.newCommitCmdResultTransaction()
                #cmt.executeBlocking(outname)
                #(cline, outname) = (None,None)
                #cmt = None
            #else:
            try:
                cmt = j.newCommitAndFetchTransaction()
                tup = cmt.executeBlocking(outname, inputs)
                logit("%s commit OK" % cline)
                (cline, outname) = (None,None)
                if type(tup) is tuple and len(tup) == 3:
                    (cline, outname, linenum) = tup
            except:
                import traceback
                logit("unknown exception"+str(traceback.format_exc()))
                break
            if jobsrun > 9910: # for profiling...
                break
        j.close()
               
    def asyncExecute(self):
        sys.stdout.flush() # flush output before forking
        self.instanceJobPersistence().close() # force closing before fork.
        try:
            pid = os.fork()
            if pid > 0:
                # I am the parent: want to return tokens?
                # print "hello, I'm the parent, the child pid is", pid
                self.isParent = True
                print self.tokensDescription()
                # remove script outfile from env, to skip output
                if self.fileMap.has_key(ScriptingEnv.SCRIPT_OUTFILE):
                    self.fileMap.pop(ScriptingEnv.SCRIPT_OUTFILE)
                    #os.wait()
            else:
                if True: # while debugging, don't detach output...
                    devnull = os.open("/dev/null", os.O_RDONLY)
                    os.dup2(devnull, sys.stdin.fileno())
                    os.dup2(devnull, sys.stdout.fileno()) # silence stdout
                    os.dup2(devnull, sys.stderr.fileno()) # silence stderr
                    os.close(devnull)
                    
                # need to dup file descriptors so httpd lets parent return.
                # See:
                # http://mail.python.org/pipermail/python-list/2001-March/032663.html
                # should provide a configfile option to control ser/dep exec
                if False:
                    self.serialExec()
                    logit("done with async-init'd serialExec()")
                    sys.exit(0)
                elif False:
                    profileVar['depexecer'] = self
                    import profile
                    profilename = "/tmp/%s.pyprofile" % (str(os.getpid()))
                    logit("depexec profile at: " + profilename)
                    profile.run('profileVar["depexecer"].dbDepExec(4)', profilename)

                else: #normal execution
                    assert (type(local.nslots) == int) and (local.nslots > 0)
                    logit("dbexec with %d slots" % local.nslots)
                    self.dbDepExec(local.nslots)
                logit("done with dbDepExec()")
                sys.exit(0)
        except OSError, e: 
             print >>sys.stderr, "failed to fork for async: %d (%s)" %(e.errno,
                                                                       e.strerror) 
             sys.exit(1)
        pass
        
    def depTreeToString(self):
        out = "Script dependency digraph: \n"
        for x in self.commands:
            for c in x.children():
                # find shared input and output
                inputs = c.inputs()
                for o in x.outputs():
                    if o in inputs:
                        label = " [ label = \"" + o + "\" ]"
                        out +=  x.myName + " -> " + c.myName + label + ";\n"
        return out
    def tokensDescription(self):
        """make a description of the output tokens for this job.
        should be parseable by a client.  think about xml for this later"""
        output = []
        
        for t in self.outTokens:
            if not t[1]: continue
            output.append("SSDTOKEN<< logical=\"%s\" token=\"%d\" >>"
                          % (t[0], t[1]))
        return "\n".join(output)
    def instanceJobPersistence(self):
        """finds the class's instance of a JobPersistence object,
        creating if necessary if it doesn't exist, and caching for
        future use."""
        if self.env.has_key("JobPersistence"):
            o = self.env["JobPersistence"] 
            if o != None:
                return o
        o = JobPersistence(local.dbFilename)
        self.env["JobPersistence"] = o
        return o
        
    def returnOutput(self, targetfd):
        # if we have an output... 
        if not self.fileMap.has_key(ScriptingEnv.SCRIPT_OUTFILE):
            return  # already returned console out during exec.

        oname = self.fileMap[ScriptingEnv.SCRIPT_OUTFILE][1]

        if(local.disableHTTP):
            self.rawWrite(oname, targetfd)
        else:
            self.httpWrite(oname, targetfd)
        pass
    

    def rawWrite(self, srcfilename, targetfd):
        result = open(srcfilename)
        shutil.copyfileobj(result, targetfd)
        result.close()
        #try:
        #    result = open(srcfilename)
        #    shutil.copyfileobj(result, targetfd)
        #    result.close()
        #except IOError:
        #    print "couldn't return file",srcfilename
    def httpWrite(self, srcfilename, targetfd):
        print >> targetfd, "HTTP/1.0 200 OK"
        print >> targetfd, "Xssdap-server: 0.1"
        print >> targetfd, "Date: " + time.ctime()
        print >> targetfd, "Content-type: application/x-netcdf"
        print >> targetfd, ""
        self.rawWrite(srcfilename, targetfd)
    def clearTemps(self):
        #logit("tempdir is " + str(os.listdir(self.env["tempdir"])))
        for key in self.fileMap:
            # if the file exists, and we can write it,
            # kill it to clean up.
            (fd,fname) = self.fileMap[key]
            if os.access(fname, os.F_OK | os.W_OK):
                os.remove(fname)
                #print >>sys.stderr, "removed", fname, " OK!"
            else:
                logit("I couldn't delete " + str(fname))
        pass
    def persistCommand(self, command, trans):
        """ linenum: line number of the command
            command: the acutal command (which binary are we exec'ing)
            trans: the populationtransaction we're using"""
        # date in yyyy-mm-dd hh:mm format
        #date = "%04d-%02d-%02d %02d:%02d" % time.localtime()[:5]
        #logit("persisting cmd %s" %(command))
        linenum = command.lineNum
        assert linenum is not None
        if command.cmd == None:
            print "missing cmd for ",command.lineNum
            return # don't persist non-commmands.
        if not self.persistedTask:
            tid = command.taskId()
            self.persistedTask = trans.insertTask(tid)
            assert self.persistedTask != None
        trans.insertCmd( linenum, command.cmd, command.commandLine())

        def insert(f, map, out):
            if f in map:
                # first part of tuple is file handle
                concrete = map[f][1] 
            else:
                concrete = f
            return trans.insertInOut( linenum, f, concrete,
                                      out, 1, # if (not out): ignore state spec
                                      f.startswith(ScriptingEnv.CLASS_TEMP_PREFIX))
        for f in command.inputs():
            insert(f, self.fileMap, False)
        for f in command.outputs():
            self.outTokens.append((f,  insert(f,self.fileMap,True)))
        # defer commit/close until all commands ready.
        pass
    def markCmdStart(self, command):
        """ these mark start/finish commands are similar to those in the
        dependency tracker.  let's merge them into something coherent
        when we get things working."""
        if not self.isAsync:
            return
        trans = self.instanceJobPersistence().newSetFileStateTransaction()
        for f in command.outputs():
            trans.setByName(self.fileMap[f][1], 2)
        pass
    def markCmdFinish(self, command):
        if not self.isAsync:
            return
        trans = self.instanceJobPersistence().newSetFileStateTransaction()
        for f in command.outputs():
            trans.setByName(self.fileMap[f][1], 3)
        pass
    def fingerprint(self):
        try: return self.cachedFingerprint
        except:
            import md5
            script = "".join(self.lines)
            self.cachedFingerprint = md5.md5(script).digest()
            return self.cachedFingerprint
        pass



    def __del__(self):
        """some cleanup for the scripting environment"""
        ## we should get rid of temp files, like a good citizen
        ## this might be modified if we introduce caching
        if not self.isAsync:
            self.clearTemps()
    pass



class FrontEnd:
    # expecting something like:
    # http://hostname/cgi-bin/nph-dods/dodsdata/foo.nc.dods?superduperscript11"
    # 
    MAGIC_CONSTRAINT = "superduperscript11"
    # use in script to indicate what file should be sent back.

    def __init__(self, sysargs):
        "setup a frontend parser to interact with the outside world's input"
        # -d and -f options are supposed to be deprecated
        self.shortOpts = "Lcd:r:o:e:f:l:u:v:"
        self.longOpts = ["dryparse=", "local=", "with-db"]
        self.args = {}
        self.originalargs = sysargs
        self.env = {}

        try:
            parsed =  getopt.getopt(sysargs[1:], 
                                    self.shortOpts,
                                    self.longOpts)
        except Exception, e:
            logit("Error. getopt couldn't parse options in FrontEnd::__init__()")
            logit("args:" +str(sysargs[1:]))
            sys.exit(1)
    
        self.nonpars = []
        self.newp = []
        if parsed[1] != []:
            #logit( "nonparseable avail."+str(parsed[1]))
            nonpars = parsed[1][0]
            newp = getopt.getopt(parsed[1][1:],self.shortOpts)
            newp = newp[0]
        else:
            nonpars = ""
            newp = []

        for i in parsed[0]:
            #logfile.write(str(i)+"\n")
            self.args[i[0]] = i[1]
        #print " and fixed is ", nonpars
        #logfile.write(str(nonpars)+"\n")
        self.args["-realfile"] = nonpars

        for i in newp:
            #logfile.write(str(i)+"\n")
            self.args[i[0]] = i[1]
        #logit( str(self.args))
        #       for x in  os.environ: #.get("LD_LIBRARY_PATH")
        #       print x

        pass
    def accessmode(self):
        """Return the appropriate execution mode, given the object's
        initialization parameters."""
        if("--local" in self.args):
            return "local"
        if("--dryparse" in self.args):
            return "dryparse"
        if("-o" not in self.args):
            return "helpmode"
        return self.args["-o"]
    def passthrough(self):
        """An execution possibility.  Use this when we don't have a
        custom handler for these options.  This should bounce the call to
        the default opendap netcdf handler."""
        newargs = [local.passthroughPath] + self.originalargs[1:]
        newline = ""
        ## need to substitute null strings to provide null -e option
        for x in newargs:
            if x == "": newline += "\"\" "
            else: newline += x + " "
        logit( "Bouncing from: " + str(self.originalargs[0]))
        logit( "Executing: " + newline)
        (sin,sout) = os.popen2(newline)
        # copy our stdin to the process
#       if sys.stdin.isatty():
#           shutil.copyfileobj(sys.stdin, sin)
        shutil.copyfileobj(sout, sys.stdout)

    def argReport(args):
        """Return a string containing an argument report for the
        initialization parameters.  This should be useful for
        debugging."""
        report = ""
        report += "scratch space loc(-r): " + self.args["-r"] + "\n"
        report += "originalurl(-u): " + self.args["-u"] + "\n"

        report += "realfile? " + self.args["-realfile"] + "\n"
        report += "compression? " + ["nope","requested"][bool(self.args.has_key("-c"))] + "\n"
        report += "requestor: " + self.args["-v"] + "\n"
        if "-e" in self.args:
            report += "constraint: " + self.args["-e"] + "\n"
        else: report += "constraint: no\n"
        return report

    def runHacked(self):
        """this is where we should go ahead and run the script that we got passed."""

        if "-e" in self.args:
            if "-u" not in self.args:
                self.passthrough()
                return
            else:
                # logit(argReport(self.args))
                if self.args["-e"] == FrontEnd.MAGIC_CONSTRAINT:
                    #logit("stdin = tty?" + str(sys.stdin.isatty()))
                    self.readScript()
                    self.runScript()
                    return
        self.passthrough()  ## passthrough everything else.
    def runParseonly(self):
        """A debug execution.  This is just like runhacked, but without
            the actual script execution part"""
        scriptFilename = self.args["--dryparse"]
        async = False
        if "--with-db" in self.args:
            async = True
        self.parseOnlyHelper(scriptFilename, async)

    def parseOnlyHelper(self, fname, isAsync):
        """a helper for runparseonly.  Helpful for interactive debugging"""
        print "Only parsing", fname, "for db"
        self.readScriptFromFile(fname)
        if(isAsync):
            self.env["async"] = True # set async mode.

        scriptingenv = None
        # fill env with proper keys:
        self.env["defpath"] = "."
        self.env["tempdir"] = "/tmp"
        self.env["resultdir"] = "/tmp"

        scriptenv = ScriptingEnv(self.scriptlines, self.env)
        print "Parsed OK!"
        ###scriptenv.execute() #####
        pass
    def runLocal(self):
        scriptFilename = self.args["--local"]
        async = False
        if "--with-db" in self.args:
            async = True
        self.localHelper(scriptFilename, async)
    def localHelper(self, fname, isAsync): 
        self.readScriptFromFile(fname)
        if(isAsync):
            self.env["async"] = True # set async mode.

        scriptingenv = None
        # fill env with proper keys:
        if local.resultPath is not None:
            self.env["resultdir"] = local.resultPath
        else:
            self.env["resultdir"] = "/tmp"
        if local.scratchPath is not None:
            self.env["tempdir"] = local.scratchPath
        else:  
            self.env["tempdir"] = "/tmp"

        # now, patch up all the dap expectations
        (filepath, filename) = os.path.split(self.args["-realfile"])

        self.env["defpath"] = "/var/www/html/"
        self.env["requestedfile"] = "dummy.nc"
        print "tempdir at", self.env["tempdir"]
        print "resultdir at", self.env["resultdir"]
        (urlpath, ufilename) = ("http://localhost/cgi-bin/nph-dods", "dummy.nc")
        print "local exec starting..."
        scriptenv = ScriptingEnv(self.scriptlines, self.env)
        scriptenv.execute() #####
        #scriptenv.returnOutput(sys.stdout)
        
        
    def printusage(self):
        print """
        This script is to be called by an opendap
        installation.  Ask a developer for more info."""



    def execute(self):
        """Perform the task that (hopefully) got parsed in this object's
        initialization."""
        jumptable={'dods' : self.runHacked,
               'DODS' : self.runHacked,
               'das' :  self.passthrough,
               'DAS' :  self.passthrough,
               'dds' :  self.passthrough,
               'DDS' :  self.passthrough,
               'DDX' :  self.passthrough,
               'BLOB':  self.passthrough,
               'Version': self.passthrough,
               'helpmode': self.printusage,
               'dryparse': self.runParseonly,
               'local': self.runLocal
               }
        jumptable[self.accessmode()]()  ## make the call
        pass
    def readScript(self):
        """ reads in stdin as script input"""
        lines = []
        for i in sys.stdin:
            lines += [i] ## [] to put lines (not chars) in the list

        #logit( "we got POST data: \n"+str(lines))
        self.scriptlines = lines
        pass
    def readScriptFromFile(self, filename):
        self.scriptlines = open(filename).readlines()
        pass
    def externalScriptParse(self):
        if(local.useDash and local.dashPath):
            # if a dash helper is available, use it.
            pobj = Popen3(local.dashPath, self.scriptlines.join("\n"), True)
            return pobj.err.split("\n")
        else:
            return self.scriptlines
        

    def runScript(self):
        if local.resultPath is not None:
            self.env["resultdir"] = local.resultPath
        else:  # otherwise, use OPeNDAP default
            self.env["resultdir"] = self.args["-r"]
        if local.scratchPath is not None:
            self.env["tempdir"] = local.scratchPath
        else:  # otherwise, use OPeNDAP default
            self.env["tempdir"] = self.args["-r"]
        
        (filepath, filename) = os.path.split(self.args["-realfile"])

        self.env["defpath"] = filepath # use for default path.
        self.env["requestedfile"] = filename  #not sure we need this

        (urlpath, ufilename) = os.path.split(self.args["-u"])
        assert filename == ufilename
        for x in range(1,len(filepath)):
            pass

        #self.env["local"] = "true"
        scriptenv = ScriptingEnv(self.scriptlines, self.env)
        scriptenv.execute() #####
        #print "Skipping real execution for now"
        scriptenv.returnOutput(sys.stdout)
        pass


class JobManager:  # will I have to write this?
    def __init__(self):
        self.maxParallel = 2
# To be a command factory, we need to be able to:
# parse commands, understand dependencies, and build command lines
# This means we need to know:
# -- what commands are valid
# -- what are metacommands (and how to ensure their execution)
# -- how to persist (or possess a function that does it)
# -- pathnames to deferred commands
# -- (syntaxes of commands)
# --
class CommandFactory:
    """build commands.  Separates logic for building commands from
    ScriptingEnv and Command as much as possible.  Indirect dependence
    on persistence layer via a function closure
    public interface:
    CommandFactory(), addScriptLines(), finalize(), commandList,
    commandLineList    
    """
    def __init__(self, pers, env):
        """pers is a function that accepts a command as a single argument"""
        self.env = env
        self.commandLines = [] # public
        self.commandList = [] # public
        self.nextLineNum = 1
        self.nextLineNumGen = 3000
        self.finalized = False 
        self.isAsync = False # public
        self.persister = pers
        self.buildComplexStructs()
        self.lineList = []
        self.dapMap = {} # map remote urls to temp filenames
        #  resulting from synthesized fetches.
        pass
    def addScriptLines(self, linelist):
        assert not self.finalized
        # for now, just concatenate and defer parsing.
        self.lineList.extend(linelist)
    
    def finalize(self):
        assert not self.finalized 
        # do all the parsing that we couldn't do earlier
        nextLineNumGen = self.chooseNextLineNumGen()
        for l in self.lineList:
            self.processLine(l)
        logit("%d lines to %d commands" %(len(self.lineList),len(self.commandList)))
        map(self.persister.do, self.commandList)
        self.finalized = True        

    # below: non-public functions
    def buildComplexStructs(self):
        self.valids = ScriptingEnv.META_COMMANDS + ScriptingEnv.NCO_COMMANDS
        

    def chooseNextLineNumGen(self):
        """choose a good line number for assigning to generated
        commands.
        For now, take largest number divisible by 100 >= totallines*2 """
        llen = 2 * len(self.lineList)
        return llen + (100-(llen % 100))

    def execMeta(self, cmd, argv):
        metamap = {"ssd_initsave" : self.cmdInitSave,
                   "ssd_save" : None, #not implemented
                   "ssd_poll" : self.cmdPoll,
                   "ssd_retrieve" : self.cmdRetrieveOrBlock,
                   "ssd_retrievenoblock" : self.cmdRetrieve }
        logit("exec'ing meta cmd " + cmd)
        metamap[cmd](argv)
        pass

        
    def processLine(self, linestring):
        lineNum = self.nextLineNum
        self.nextLineNum += 1
        #parseOriginalLine()
        argv = shlex.split(linestring, True) # True--> shlex eats the comments

        if len(argv) < 1:
            return # nothing to parse
        rawname = argv[0]
        if rawname not in self.valids:
            logit("WARN: bad script line: %d: %s" %(lineNum, linestring))
            return
        if rawname in ScriptingEnv.META_COMMANDS:
            # don't need to construct Command for meta
            newcmds = self.execMeta(rawname, argv[1:])
            # does not need command-line or deferred exec.
            # but file retrieve needs a command
            ## fixme: add new filename.
            return # FIXME?
        else:
            newcmds = self.buildNormalCmd(rawname, lineNum, argv[1:], linestring)
        #for c in newcmds: # boilerplate
        #    c.original = linestring
            
        pass

    def cmdInitSave(self, argv):
        self.isAsync = True
        return
    def cmdPoll(self, argv):
        privatedb = JobPersistence(local.dbFilename)
        i = None
        try:
            poll = privatedb.newPollingTransaction()
            i = poll.pollFileStateById(int(argv[0]))
            poll = None
            i = int(i)
            text = JobPersistence.fileStateMap[i]
            print "State of %s is %d (%s)" % (argv[0], i, text)
        except (TypeError,ValueError):
            print "FileId not found", argv[0]
        privatedb.close()
        return
    def cmdRetrieveOrBlock(self, argv):
        return self.cmdRetrieve(argv, True)
    def cmdRetrieve(self, argv, shouldBlock = False):
        privatedb = JobPersistence(local.dbFilename)
        n = None
        sleeptime = 5  ## want to make this configurable
        logit("Trying to retrieve "+ argv[0])
        try:
            while True:
                poll = privatedb.newPollingTransaction()
                i = poll.pollFileStateById(int(argv[0]))

                # FIXME: this logic is faulty.
                if int(i) != 3: ## 3==saved.  better way to do this magic const?
                    logit("not ready, state is "+str(i))
                    time.sleep(sleeptime)
                    continue
                else:
                    break
                    # this logic is broken... but we are rewriting anyway.
                    print "File id", argv[0], " is nonexistent."
                    raise StandardError("Bad FileId to retrieve")
            logit("pass okay")
            return
            n = poll.pollFilenameById(int(argv[0]))
            # patch output filename (use bogus handle)
            # this is rather backwards.
            #FIXME!!!!!
            self.env.fileMap[ScriptingEnv.SCRIPT_OUTFILE] = (0,n)
        #except TypeError:
        except (TypeError,ValueError,StandardError):
            import traceback
            print "Problem retrieving file for id ", argv[0]
            logit("Exception while retriving file id %s: %s" %
                  (argv[0], str(traceback.format_exc())) )    
        privatedb.close()
        pass
        
    def buildNormalCmd(self, cmd, lineNum, argv, original):
        #print "building ",cmd,argv
        # FIXME: factory will add linenum later. (what about origline and env?)
        newcmd = Command(original,lineNum,self.env) # bogus stuff because we will fill them later.
        newcmd.isMeta = False
        newcmd.lineNum = lineNum
        newcmd.myName = cmd + str(lineNum)

        self.env.lineToCommand[lineNum] = newcmd
        try:
            (arglist, leftover) = SsdapCommon.specialGetOpt(cmd, argv)
        except getopt.GetoptError:
            import traceback
            logit("Error parsing(getopt) command: " + cmd + str(argv))
            logit(traceback.format_exc())
            raise ValueError, 'Command.parse error'
        argdict = dict(arglist)
        newcmd.setOutputModifiers(argdict, arglist, leftover)
        newcmd.acceptInOut(argdict, arglist, leftover)

        newcmd.arguments = arglist
        newcmd.cmd = cmd
        #buildCommandLine()
        line = ""
        needsDap = False
        if newcmd == None: # don't build cmdline for non-cmds
            logit("ineffectual cmd: %d:%s" % (lineNum, linestring))
            return
        

        for (k,v) in newcmd.arguments:
             #special value handling for --op_typ='-'
             safe = string.letters + string.digits + "%"
             #needsProt = False
             #for x in v: needsProt |= (x not in safe)
             needsProt = reduce(lambda x,y: x|(y not in safe), v, False)

             # not necessary to add = for long opts             
             if needsProt and not ("'" == v[0] == v[-1]):
                 line += " " + k + " '" + v + "'"
                 #only protect the non-protected
             elif len(v) > 0: line += " " + k + " " + v
             else:            line += " " + k
             pass

        # then pass commands
        def dapMapper(name):
            if self.env.requiresDap(name):
                return self.dapToLocal(name)
            else: return name
        newlist = map(dapMapper, newcmd.absInputs)
        newcmd.absInputs = newlist

        def fileGen(cmd):
            for name in cmd.absInputs:
                yield name
            for name in cmd.absOutputs:
                if name in [ ScriptingEnv.SCRIPT_STDOUTERR,
                             ScriptingEnv.SCRIPT_STDOUT ]:
                    continue
                yield name

        for name in fileGen(newcmd):
            remaptype = self.env.shouldRemap(name)
            if remaptype:
                remapped = self.env.addAbsName(name, remaptype)
                # want to move saferemove to factory class
                newcmd.safeRemove(remapped) # kill the file to let nco write.
                assert remapped != None
                line += " " + remapped
            elif self.env.possibleFilename(name):
                # add defpath for now.
                newname = newcmd.addInfilePrefix(name)
                line += " " + newname
            else:
                needsDap |= self.env.requiresDap(name)
                
                line += " " + name
        if needsDap:
             newcmd.cmdLine = ScriptingEnv.VALID_DAPCOMMANDS[cmd] + " " + line
        elif newcmd.isMeta:
            newcmd.cmdLine = "##echo Placeholder: meta: " + cmd
        else:
            newcmd.cmdLine = ScriptingEnv.VALID_COMMANDS[cmd] + " " + line
        #logit( "real cmdline: " + self.cmdLine)
        newcmd.fixupDependencies() # we use the deptree to reduce db load
        self.commandList.append(newcmd)
        pass
    def dapToLocal(self, name):
        # digest dap line into a shortname
        if name in self.dapMap: # check to see if it's already been marked for fetch
            return dapMap[name]
        # otherwise, generate a fetch command, and name the tempfile.
        lineNum = self.nextLineNumGen
        self.nextLineNumGen += 1
        absname = "%stempf_remote%d%s" % ("%", lineNum, "%")
        self.addFetchCommand(name, lineNum, absname)
        return absname
    def addFetchCommand(self, dapurl, lineNum, absname):
        rawcmd = "ncks"
        original = "ncks -O -q -M %s %s" % (dapurl, absname)
        # FIXME: don't forget to map rfetch to a real commandline.
        newcmd = Command(original, lineNum, self.env)
        newcmd.myName = "ncks%d" % lineNum
        newcmd.isMeta = False
        newcmd.absOutputs = [absname]
        newcmd.absInputs = []
        newcmd.popen = os.popen2
        newcmd.saveOutput = False
        newcmd.cmd = rawcmd
        self.env.lineToCommand[lineNum] = newcmd
        newcmd.fixupDependencies()
        remapped = self.env.addAbsName(absname, "temp")
        args = " -O -q -M %s %s" % (dapurl, remapped)
        newcmd.cmdLine = ScriptingEnv.VALID_DAPCOMMANDS[rawcmd] + args
        self.commandList.append(newcmd)
        return
    pass

class Popen3:
   """
   This is a deadlock-safe version of popen that returns
   an object with errorlevel, out (a string) and err (a string).
   (capturestderr may not work under windows.)
   Example: print Popen3('grep spam','\n\nhere spam\n\n').out

   Modified to ignore stdout (Daniel Wang 9-28-06)

   Source credit: 
   http://www.python.org/doc/faq/library.html
   Date:	<Date: 2005-12-16 19:21:20 -0700 (Fri, 16 Dec 2005) >
   Version:	<Revision: 8684 >
   """
   def __init__(self,command,input=None,capturestderr=None):
       #outfile=tempfile.mktemp()
       outfile="/dev/null"
       command="( %s ) > %s" % (command,outfile)
       if input:
           infile=tempfile.mktemp()
           open(infile,"w").write(input)
           command=command+" <"+infile
       if capturestderr:
           errfile=tempfile.mktemp()
           command=command+" 2>"+errfile
       self.errorlevel=os.system(command) >> 8
       #self.out=open(outfile,"r").read()
       self.out=None
       #os.remove(outfile)
       if input:
           os.remove(infile)
       if capturestderr:
           self.err=open(errfile,"r").read()
           os.remove(errfile)





def execute():
    """A "main" function that serves to set things in motion.
    This function is called if the script is run from the shell."""
    prestartLogging()
    readConfigFile() # have to read config file before initializing logging.
    startLogging()
    settings = FrontEnd(sys.argv)
    
    mode =  settings.accessmode()
    logit("Mode is " + mode)
    logit("python profile if available (parent only) at: " + "/tmp/%s.pyprofile" % str(os.getpid()))
    settings.execute()


    logit("handler hack log closed at " + time.ctime()+ "\n")
    pass

def doProfile():
    import profile
    basename = "bench03"
    profile.run('profilePass("' + basename + '.ssdap")',
                basename + '.pyprofile')


    import pstats
    p = pstats.Stats(basename + '.pyprofile')
    return p
def profilePass(filename):
    """profile the parser over a ssdap file"""
    prestartLogging()
    readConfigFile()
    startLogging()
    settings = FrontEnd(["./dummy.py", "--dryparse", filename, "--with-db"])
    settings.execute()
    
if __name__ == '__main__':
    if False:
        import profile
        profile.run('execute()', "/tmp/%s.pyprofile" % str(os.getpid()))
    else:
        execute()

#########################################
# notes for adding async support
#########################################
# usage model:
#
# initial script submission
# blah -c -h somefile somefile
# blah blah blah
# blah blah blah
# %async%
#
# The server should respond with something like
# 5 jobs submitted, your job id: 200
# %async-resp% yourhost, url, 5, 200
##################################################
#
# Tracking async jobs: use a sqlite db
# task table:
# # taskid | line# | cmdid
#
# cmd table:
# cmdid | cmd | cmdline
#
# file inputs and outputs (select 1 for output file):
# cmdid | output(0/1) | logicalname | actualname 
# filemap:
# (not needed?)
##################################################
# Lex/Parse sequence: (11.13.06)
# 1) meta or normal cmd
# 2) shlex the line
# 3) figure out output redirection (needed?)
# 4) detect inputs and outputs
# 5) connect cmd together with dependencies
#
# To handle command-splitting, we should shift command building to a
#command factory, which could generate multiple commands from a single
#input line.  We also need to figure out how to map commands, if the
#original command line number can't be used to identify backend
#commands.
# Interim solution for line numbering:  Since the backend schedules
#according to dependencies, the actual line number doesn't matter
#*that* much.  So, assign unused line numbers to the generated lines.

#
#234567890123456789012345678901234567890123456789012345678901234567890123456789
#0       1         2         3         4         5         6         7
#

# oldsystem usecase for command building (from ScriptingEnv)
#          for line in scriptlines:
#              lineNum += 1
#              c = Command(line, lineNum, self)
#              self.isAsync = self.env.has_key("async") and self.env["async"] == True
#              self.lineToCommand[lineNum] = c
#              if self.isAsync and (c.cmd is not None) and not c.isMeta:
#                  if pop is None:
#                      j = self.instanceJobPersistence()
#                      pop = j.newPopulationTransaction()
#                      assert pop is not None
#                  self.persistCommand(lineNum, c, pop)
#              self.commands.append(c) # append command, regardless of parsing
#              if c.commandLine() != None:
#                  self.commandLine.append(c)

# factory usecase:
# class Persister: # use this to wrap access to persistence since
#                  # python doesn't have real function closures
#     def __init__(self, instfunc, persFunc):
#         self.instFunc = instfunc
#         self.trans = None
#     def do(command):
#         if self.trans is None:
#             self.trans = instFunc()
#         persFunc(command, self.trans)
# p = Persister(self.instanceJobPersistence, self.persistCommand)
# cf = CommandFactory(p)
# cf.addScriptLines(scriptlines)
# cf.finalize()
# self.commandLine = cf.commandLines
# self.commands = cf.commandList
# self.isAsync = cf.isAsync

# contention ratio.  given d=#disks and p=#processes, and assuming that each process uses a particular disk w/p 1/d (i.e. files it needs are randomly spread among the disks), we have:
# p(zero contention) = ???
# is it:  1- ((d-1)!/(d-p-1)!)/(d^(p-1))?  but this doesn't seem to work in matlab?
#
#
# will need to *edit* originals to disambiguate filenames. simple method, prepend original, non path'd filename with the line number?
#
# filename remapping (when names are aliased) is like register renaming:
# ----
# http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=877952&isnumber=18997
# The design space of register renaming techniques
#
# http://ieeexplore.ieee.org/xpl/RecentCon.jsp?punumber=937
# Register renaming and dynamic speculation: an alternative approach
# http://portal.acm.org/citation.cfm?id=255235.255288

# R.M. Tomasulo. An efficient algorithm for exploiting multiple arithmetic units. IBM Journal of Research and Development, 11(1):25-33, January 1967.

# Look-Ahead Processors
# http://portal.acm.org/citation.cfm?id=356657&dl=GUIDE&coll=GUIDE&CFID=11024650&CFTOKEN=91719170
# article{356657,
#  author = {Robert M. Keller},
#  title = {Look-Ahead Processors},
#  journal = {ACM Comput. Surv.},
#  volume = {7},
#  number = {4},
#  year = {1975},
#  issn = {0360-0300},
#  pages = {177--195},
#  doi = {http://doi.acm.org/10.1145/356654.356657},
#  publisher = {ACM Press},
#  address = {New York, NY, USA},
#  }


