// ----------------------------------------------------------------------
// File: StringTokenizer.cc
// Author: Andreas-Joachim Peters - CERN
// ----------------------------------------------------------------------

/************************************************************************
 * EOS - the CERN Disk Storage System                                   *
 * Copyright (C) 2011 CERN/Switzerland                                  *
 *                                                                      *
 * This program is free software: you can redistribute it and/or modify *
 * it under the terms of the GNU General Public License as published by *
 * the Free Software Foundation, either version 3 of the License, or    *
 * (at your option) any later version.                                  *
 *                                                                      *
 * This program is distributed in the hope that it will be useful,      *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of       *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        *
 * GNU General Public License for more details.                         *
 *                                                                      *
 * You should have received a copy of the GNU General Public License    *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.*
 ************************************************************************/

#include "common/StringTokenizer.hh"
#include <cstring>
#include <sstream>
#include <iomanip>
#include <algorithm>

EOSCOMMONNAMESPACE_BEGIN

//------------------------------------------------------------------------------
// Constructor
//------------------------------------------------------------------------------
StringTokenizer::StringTokenizer(const char* s):
  fCurrentLine(-1), fCurrentArg(-1)
{
  // the constructor just parses lines not token's within a line
  if (s) {
    fBuffer = strdup(s);
  } else {
    fBuffer = 0;
    return;
  }

  bool inquote = false;

  if (fBuffer[0] != 0) {
    // set the first pointer to offset 0
    fLineStart.push_back(0);
  }

  // intelligent parsing considering quoting
  for (size_t i = 0; i < std::strlen(fBuffer); i++) {
    if ((fBuffer[i] == '"') &&
        ((i == 0) ||
         ((fBuffer[i - 1] != '\\')))) {
      if (inquote) {
        inquote = false;
      } else {
        inquote = true;
      }
    }

    if ((!inquote) && fBuffer[i] == '\n') {
      fLineStart.push_back(i + 1);
    }
  }
}

//------------------------------------------------------------------------------
// Destructor
//------------------------------------------------------------------------------
StringTokenizer::~StringTokenizer()
{
  if (fBuffer) {
    free(fBuffer);
    fBuffer = 0;
  }
}

//------------------------------------------------------------------------------
// Return the next parsed line
//------------------------------------------------------------------------------
const char*
StringTokenizer::GetLine()
{
  fCurrentLine++;

  if (fCurrentLine < (int) fLineStart.size()) {
    char* line = fBuffer + fLineStart[fCurrentLine];
    char* wordptr = line;
    bool inquote = false;
    size_t len = strlen(line) + 1;

    for (size_t i = 0; i < len; i++) {
      if ((line[i] == '"') &&
          ((i == 0) ||
           ((line[i - 1] != '\\')))) {
        if (inquote) {
          inquote = false;
        } else {
          inquote = true;
        }
      }

      if ((line[i] == ' ') || (line[i] == 0) || (line[i] == '\n')) {
        if (!inquote) {
          if ((i > 1) && (line[i - 1] == '\\')) {
            // don't start a new word here
          } else {
            char val = line[i];
            line[i] = 0;
            fLineArgs.push_back(wordptr);
            line[i] = val;

            // start a new word here
            wordptr = line + i + 1;
          }
        }
      }

      if ((!inquote) && (line[i] == '\n')) {
        line[i] = 0;
      }
    }

    return line;
  } else {
    return 0;
  }
}

//------------------------------------------------------------------------------
// Return next parsed space separated token taking into account escaped
// blanks and quoted strings.
//
// Note: Quotes enclosing the token are removed, but other type of quotes
//       are left untouched
//------------------------------------------------------------------------------
const char*
StringTokenizer::GetToken(bool escapeand)
{
  fCurrentArg++;

  if (fCurrentArg < (int) fLineArgs.size()) {
    // patch out quotes
    XrdOucString item = fLineArgs[fCurrentArg].c_str();

    if (item.beginswith("\"")) {
      item.erase(0, 1);
    }

    if (item.endswith("\"") &&
        (!item.endswith("\\\""))) {
      item.erase(item.length() - 1);
    }

    if (escapeand) {
      int pos = 0;

      while ((pos = item.find("&", pos)) != STR_NPOS) {
        if ((pos == 0) || (item[pos - 1] != '\\')) {
          item.erase(pos, 1);
          item.insert("#AND#", pos);
        }

        pos++;
      }
    }

    fLineArgs[fCurrentArg] = item.c_str();
    return fLineArgs[fCurrentArg].c_str();
  } else {
    return 0;
  }
}

//------------------------------------------------------------------------------
// Return next parsed space separated token taking into account escaped
// blanks and quoted strings.
//
// Note: Quotes enclosing the token are removed, while any other
//       type of quotes will be unescaped
//------------------------------------------------------------------------------
const char*
StringTokenizer::GetTokenUnquoted(bool escapeand)
{
  fCurrentArg++;

  if (fCurrentArg < (int) fLineArgs.size()) {
    std::string token;
    std::stringstream ss;

    // Dequote token
    ss << fLineArgs[fCurrentArg].c_str();
    ss >> std::quoted(token);

    if (escapeand) {
      size_t pos = 0;

      while ((pos = token.find("&", pos)) != std::string::npos) {
        if ((pos == 0) || (token[pos - 1] != '\\')) {
          token.replace(pos, 1, "#AND#");
        }

        pos++;
      }
    }

    fLineArgs[fCurrentArg] = token.c_str();
    return fLineArgs[fCurrentArg].c_str();
  } else {
    return 0;
  }
}

bool
StringTokenizer::IsUnsignedNumber(const std::string& str)
{
  return !str.empty() &&
         str.find_first_not_of("0123456789") == std::string::npos &&
         (str.front() != '0' || str.size() == 1);
}

EOSCOMMONNAMESPACE_END