// This file is part of PUMA.
// Copyright (C) 1999-2003  The PUMA developer team.
//                                                                
// This program is free software;  you can redistribute it and/or 
// modify it under the terms of the GNU General Public License as 
// published by the Free Software Foundation; either version 2 of 
// the License, or (at your option) any later version.            
//                                                                
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of 
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  
// GNU General Public License for more details.                   
//                                                                
// You should have received a copy of the GNU General Public      
// License along with this program; if not, write to the Free     
// Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, 
// MA  02111-1307  USA                                            

#include "Puma/CScanner.h"
#include "Puma/SB_Sequential.h"
#include "Puma/SB_WholeFile.h"
#include "Puma/SB_String.h"
#include "Puma/ErrorStream.h"
#include "Puma/Source.h"
#include "Puma/Token.h"
#include "Puma/Location.h"
#include "Puma/Unit.h"
#include "Puma/CTokens.h"

namespace Puma {


void CScanner::configure (const Config &c) {
  // evaluate config file
  for (unsigned i = 0; i < c.Options (); i++) {
    if (! strcmp (c.Option (i)->Name (), "--lang-c")) {
      allow_std_c (true); allow_std_cplusplus (false); allow_aspectc (false);
    } else if (! strcmp (c.Option (i)->Name (), "--lang-c++")) {
      allow_std_c (true); allow_std_cplusplus (true); allow_aspectc (false);
    } else if (! strcmp (c.Option (i)->Name (), "--lang-ac++")) {
      allow_std_c (true); allow_std_cplusplus (true); allow_aspectc (true);
    }
  }
}


Token *CScanner::scan () {
  CRecognizer::Lang lang;
  int expr, len;
  Token *result = 0;

  switch (buffer ().state ()) {
    case ScanBuffer::STATE_OK:
      switch (recognize (lang, expr, len)) {
        case -1: result = new Token (Token::ID_ERROR); break;
        case 0 :
        case 1 : result = make_token (lang, expr, len); break;
      }
      break;
    case ScanBuffer::STATE_END:
      result = new Token (Token::ID_END_OF_FILE);
      break;
    case ScanBuffer::STATE_ERROR:
      result = new Token (Token::ID_ERROR);
      break;
  }

  return result;
}


LanguageID CScanner::map_lang (CRecognizer::Lang lang) {
  switch (lang) {
    case CRecognizer::COMMENT: return Token::comment_id;
    case CRecognizer::PRE_DIR: return Token::pre_id;
    case CRecognizer::COMP_DIR: return Token::dir_id;
    case CRecognizer::WHITE: return Token::white_id;
    case CRecognizer::PRE: return Token::macro_op_id;
    case CRecognizer::STRING: return Token::cpp_id;
    case CRecognizer::CORE: return Token::cpp_id;
    case CRecognizer::KEYWORD: return Token::keyword_id;
    case CRecognizer::ID: return Token::identifier_id;
    case CRecognizer::WILDCARD: return Token::wildcard_id;
    case CRecognizer::UNKNOWN: return LanguageID (0);
  }
  return LanguageID (0);
}


Token *CScanner::make_token (CRecognizer::Lang lang, int expr, int len) {
  Token *result = 0;
  char short_buffer[512];
  char *tok_buffer = short_buffer;
  long rows = 0L;
  
  // dynamically allocate a huge buffer
  if (len >= (int)sizeof (short_buffer))
    tok_buffer = new char[len + 1];

  char *src  = buffer ().token ();
  char *dest = tok_buffer;
  char *end  = src + len;

  int last = 0;
  Array<int> *cl = 0;
  while (src < end) {
    if (*src == '\\' && src + 1 < end && *(src + 1) == '\n') {
      src += 2;
      rows++;
      if (!cl) cl = new Array<int>(10,10);
      cl->append (last);
      last = 0;
    }
    else if (*src == '\\' && src + 2 < end && *(src + 1) == '\x0d' &&
             *(src + 2) == '\n') {
      src += 3;
      rows++;
      if (!cl) cl = new Array<int>(10,10);
      cl->append (last);
      last = 0;
    }
    else {
      if (*src == '\n')
        rows++;
      *dest = *src;
      dest++;
      src++;
      last++;
    }
  }
  *dest = '\0';

  buffer ().accept (len);

  if (lang == CRecognizer::UNKNOWN)
    expr = Token::ID_UNKNOWN;

  // select the correct language id

  LanguageID lid;

  if (mode () == CRecognizer::IN_COMP_DIR && lang != CRecognizer::COMMENT)
    lid = Token::dir_id;
  else {
    if (lang == CRecognizer::CORE && expr == TOK_OPEN_ROUND)
      lid = Token::open_id;
    else if (lang == CRecognizer::CORE && expr == TOK_COMMA)
      lid = Token::comma_id;
    else if (lang == CRecognizer::CORE && expr == TOK_CLOSE_ROUND)
      lid = Token::close_id;
    else
      lid = map_lang (lang);
  }

  result = new Token (expr, lid, tok_buffer);
  result->location (loc);

  // set the next token location
  if (rows > 0L)
    loc.setup (loc.filename (), loc.line () + rows);

  // attach the continuation line marks
  if (cl) result->cont_lines (cl);

  // free a dynamically allocated huge buffer
  if (len >= (int)sizeof (short_buffer))
    delete[] tok_buffer;

  return result;
}


void CScanner::scan_all (Unit &unit) {
  Token* token;

  loc.setup (unit.name () ? unit.name () : "<anonymous unit>", 1L);

  while ((token = scan ())->type () != Token::ID_END_OF_FILE) {
    if (token->type () == Token::ID_UNKNOWN)
      err << sev_error << token->location () 
          << "Unknown token" << endMessage;
    else if (token->type () == Token::ID_ERROR) {
      err << sev_error << token->location () 
          << "Error while scanning tokens" << endMessage;
      break;
    } else if (token->type () == Token::ID_WARNING)
      err << sev_warning << token->location () 
          << "Warning while scanning tokens" << endMessage;
      
    unit.append (*token);
  }
  if (token->type () == Token::ID_END_OF_FILE)
    delete token;
}


void CScanner::fill_unit (Source &in, Unit &unit) {
  if (in.size () > 0) {
    SB_WholeFile whole_file_buffer;

    whole_file_buffer.init (err, in);
    setup (whole_file_buffer);
    scan_all (unit);
  } else {
    SB_Sequential seq_buffer;

    seq_buffer.init (in);
    setup (seq_buffer);
    scan_all (unit);
  }
}


void CScanner::fill_unit (const char *in, Unit &unit) {
  SB_String string_buffer;

  string_buffer.init (in);
  setup (string_buffer);
  scan_all (unit);
}

   
} // namespace Puma
