/* -*- Mode: C++; c-default-style: "k&r"; indent-tabs-mode: nil; tab-width: 2; c-basic-offset: 2 -*- */

/* libmwaw
* Version: MPL 2.0 / LGPLv2+
*
* The contents of this file are subject to the Mozilla Public License Version
* 2.0 (the "License"); you may not use this file except in compliance with
* the License or as specified alternatively below. You may obtain a copy of
* the License at http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* Major Contributor(s):
* Copyright (C) 2002 William Lachance (wrlach@gmail.com)
* Copyright (C) 2002,2004 Marc Maurer (uwog@uwog.net)
* Copyright (C) 2004-2006 Fridrich Strba (fridrich.strba@bluewin.ch)
* Copyright (C) 2006, 2007 Andrew Ziem
* Copyright (C) 2011, 2012 Alonso Laurent (alonso@loria.fr)
*
*
* All Rights Reserved.
*
* For minor contributions see the git repository.
*
* Alternatively, the contents of this file may be used under the terms of
* the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"),
* in which case the provisions of the LGPLv2+ are applicable
* instead of those above.
*/

#include <set>
#include <vector>

#include "MWAWStringStream.hxx"

#include "Canvas5Structure.hxx"

namespace Canvas5Structure
{
//! a basic Unpack decoder
struct UnpackDecoder {
  //! constructor
  UnpackDecoder(unsigned char const *data, unsigned long len)
    : m_data(data)
    , m_len(len)

    , m_pos(0)
  {
  }

  bool decode(unsigned long expectedLength, std::vector<unsigned char> &output)
  {
    output.clear();
    output.reserve(expectedLength > 0x8000 ? 0x8000 : expectedLength);
    while (m_pos+2<=m_len) {
      unsigned num=unsigned(m_data[m_pos++]);
      unsigned char val=m_data[m_pos++];
      if (output.size()+num>expectedLength)
        return false;
      for (unsigned i=0; i<num; ++i)
        output.push_back(val);
    }
    return output.size()==expectedLength;
  }
protected:

  unsigned char const *m_data;
  unsigned long m_len;
  mutable unsigned long m_pos;
};

//! a basic NIB decoder
struct NIBDecoder {
  //! constructor
  NIBDecoder(unsigned char const *data, unsigned long len)
    : m_data(data)
    , m_len(len)

    , m_pos(0)
  {
  }

  bool decode(unsigned long expectedLength, std::vector<unsigned char> &output)
  {
    output.clear();
    output.reserve(expectedLength > 0x8000 ? 0x8000 : expectedLength);
    unsigned char dict[30];
    std::set<unsigned char> dictKeys;

    if (m_pos+30>m_len) {
      MWAW_DEBUG_MSG(("Canvas5Structure::NIBDecoder::can not read a dictionary at pos=%lx\n", m_pos));
      return false;
    }
    for (auto &c : dict) c=m_data[m_pos++];
    dictKeys.clear();
    for (auto &c : dict) dictKeys.insert(c);

    int newC=0;
    bool readC=false;
    unsigned char c;
    while (m_pos<=m_len) {
      bool ok=true;
      for (int st=0; st<4; ++st) {
        int val;
        if (!readC) {
          if (m_pos>m_len) {
            ok=false;
            break;
          }
          c=m_data[m_pos++];
          val=int(c>>4);
        }
        else
          val=int(c&0xf);
        readC=!readC;

        if (val && st<2) {
          output.push_back(dict[15*st+val-1]);
          break;
        }
        newC=(newC<<4)|val;
        if (st==3) {
          if (dictKeys.find((unsigned char) newC)!=dictKeys.end()) {
            ok=false;
            break;
          }
          output.push_back((unsigned char) newC);
          newC=0;
        }
      }
      if (!ok)
        break;
      if (m_pos+1>=m_len && output.size()==expectedLength)
        break;
    }
    return output.size()==expectedLength;
  }
protected:

  unsigned char const *m_data;
  unsigned long m_len;
  mutable unsigned long m_pos;
};

/** a basic LWZ decoder

    \note this code is freely inspired from https://github.com/MichaelDipperstein/lzw GLP 3
 */
struct LWZDecoder {
  static int const e_firstCode=(1<<8);
  static int const e_maxCodeLen=12;
  static int const e_maxCode=(1<<e_maxCodeLen);

  //! constructor
  LWZDecoder(unsigned char const *data, unsigned long len)
    : m_data(data)
    , m_len(len)

    , m_pos(0)
    , m_bit(0)
    , m_dictionary()
  {
    initDictionary();
  }

  bool decode(std::vector<unsigned char> &output);

protected:
  void initDictionary()
  {
    m_dictionary.resize(2); // 100 and 101
    m_dictionary.reserve(e_maxCode - e_firstCode); // max table 4000
  }

  unsigned getBit() const
  {
    if (m_pos>=m_len)
      throw libmwaw::ParseException();
    unsigned val=(m_data[m_pos]>>(7-m_bit++))&1;
    if (m_bit==8) {
      ++m_pos;
      m_bit=0;
    }
    return val;
  }
  unsigned getCodeWord(unsigned codeLen) const
  {
    unsigned code=0;
    for (unsigned i=0; i<codeLen;) {
      if (m_bit==0 && (codeLen-i)>=8 && m_pos<m_len) {
        code = (code<<8) | unsigned(m_data[m_pos++]);
        i+=8;
        continue;
      }
      code = (code<<1) | getBit();
      ++i;
    }
    return code;
  }

  struct LWZEntry {
    //! constructor
    LWZEntry(unsigned int prefixCode=0, unsigned char suffix=0)
      : m_suffix(suffix)
      , m_prefixCode(prefixCode)
    {
    }
    /** last char in encoded string */
    unsigned char m_suffix;
    /** code for remaining chars in string */
    unsigned int m_prefixCode;
  };

  unsigned char decodeRec(unsigned int code, std::vector<unsigned char> &output)
  {
    unsigned char c;
    unsigned char firstChar;

    if (code >= e_firstCode) {
      if (code-e_firstCode >= m_dictionary.size()) {
        MWAW_DEBUG_MSG(("Canvas5Structure::LWZDecoder::decodeRec: bad id=%x/%x\n", code, unsigned(m_dictionary.size())));
        throw libmwaw::ParseException();
      }
      /* code word is string + c */
      c = m_dictionary[code - e_firstCode].m_suffix;
      code = m_dictionary[code - e_firstCode].m_prefixCode;

      /* evaluate new code word for remaining string */
      firstChar = decodeRec(code, output);
    }
    else /* code word is just c */
      firstChar = c = (unsigned char)code;

    output.push_back(c);
    return firstChar;
  }
  LWZDecoder(LWZDecoder const &)=delete;
  LWZDecoder &operator=(LWZDecoder const &)=delete;
  unsigned char const *m_data;
  unsigned long m_len;
  mutable unsigned long m_pos, m_bit;

  std::vector<LWZEntry> m_dictionary;
};

bool LWZDecoder::decode(std::vector<unsigned char> &output)
try
{
  output.reserve(0x8000);

  unsigned int const currentCodeLen = 12;
  unsigned lastCode=0;
  unsigned char c=(unsigned char) 0;
  bool first=true;

  while (true) {
    unsigned code=getCodeWord(currentCodeLen);
    if (code==0x100) {
      initDictionary();
      first=true;
      continue;
    }
    if (code==0x101) // end of code
      break;
    if (code < e_firstCode+m_dictionary.size())
      /* we have a known code.  decode it */
      c = decodeRec(code, output);
    else {
      /***************************************************************
       * We got a code that's not in our dictionary.  This must be due
       * to the string + char + string + char + string exception.
       * Build the decoded string using the last character + the
       * string from the last code.
       ***************************************************************/
      unsigned char tmp = c;
      c = decodeRec(lastCode, output);
      output.push_back(tmp);
    }

    /* if room, add new code to the dictionary */
    if (!first && m_dictionary.size() < e_maxCode) {
      if (lastCode>=e_firstCode+m_dictionary.size()) {
        MWAW_DEBUG_MSG(("Canvas5Structure::LWZDecoder::decode: oops a loop with %x/%x\n", lastCode, unsigned(m_dictionary.size())));
        break;
      }
      m_dictionary.push_back(LWZEntry(lastCode, c));
    }

    /* save character and code for use in unknown code word case */
    lastCode = code;
    first=false;
  }
  return true;
}
catch (...)
{
  return false;
}


bool decodeZone(MWAWInputStreamPtr input, long endPos, int type, unsigned long finalLength,
                std::shared_ptr<MWAWStringStream> &stream)
{
  if (type<0 || type>8) {
    MWAW_DEBUG_MSG(("Canvas5Structure::decodeZone: unknown type\n"));
    return false;
  }
  std::vector<unsigned long> lengths;
  lengths.push_back(finalLength);
  // checkme this code is only tested when type==0, 7, 8
  int const nExtraLength[]= {
    0, 0, 0, 0, 2, // _, _, Z, N, N+Z
    0, 0, 2, 3 // _, P, P+N, P+N+Z
  };
  long pos=input->tell();
  if (pos+4*nExtraLength[type]>endPos) {
    MWAW_DEBUG_MSG(("Canvas5Structure::decodeZone: can not read the extra length\n"));
    return false;
  }
  bool readInverted=input->readInverted();
  input->setReadInverted(false);
  for (int n=0; n<nExtraLength[type]; ++n)
    lengths.push_back(input->readULong(4));
  if (lengths.size()==1)
    lengths.push_back((unsigned long)(endPos-pos));
  input->setReadInverted(readInverted);

  auto l=lengths.back();
  lengths.pop_back();
  for (size_t i=lengths.size(); i>0 && l==0xFFFFFFFF; --i) l=lengths[i-1];

  pos=input->tell();
  unsigned long read;
  unsigned char const *dt = l<=(unsigned long)(endPos-pos) ? input->read(l, read) : nullptr;
  if (!dt || read != l) {
    MWAW_DEBUG_MSG(("Canvas5Structure::decodeZone: can not read some data\n"));
    return false;
  }
  std::vector<unsigned char> data(dt, dt+l);

  if (type==2 || type==4 || type==8) {
    l=lengths.back();
    lengths.pop_back();
    for (size_t i=lengths.size(); i>0 && l==0xFFFFFFFF; --i) l=lengths[i-1];
    if (l!=0xffffffff && l!=data.size()) {
      Canvas5Structure::LWZDecoder decoder(data.data(), data.size());
      std::vector<unsigned char> data2;
      if (!decoder.decode(data2) || data2.size()!=l) {
        MWAW_DEBUG_MSG(("Canvas5Structure::decodeZone[LWZ]: can not decode some data\n"));
        return false;
      }
      std::swap(data, data2);
    }
  }

  if (type==3 || type==4 || type==7 || type==8) {
    l=lengths.back();
    lengths.pop_back();
    for (size_t i=lengths.size(); i>0 && l==0xFFFFFFFF; --i) l=lengths[i-1];
    if (l!=0xffffffff && l!=data.size()) {
      Canvas5Structure::NIBDecoder decoder(data.data(), data.size());
      std::vector<unsigned char> data2;
      if (!decoder.decode(l, data2)) {
        MWAW_DEBUG_MSG(("Canvas5Structure::decodeZone[NIB]: can not decode some data\n"));
        return false;
      }
      std::swap(data, data2);
    }
  }

  if (type==6 || type==7 || type==8) {
    l=lengths.back();
    lengths.pop_back();
    for (size_t i=lengths.size(); i>0 && l==0xFFFFFFFF; --i) l=lengths[i-1];
    if (l!=0xffffffff && l!=data.size()) {
      Canvas5Structure::UnpackDecoder decoder(data.data(), data.size());
      std::vector<unsigned char> data2;
      if (!decoder.decode(l, data2)) {
        MWAW_DEBUG_MSG(("Canvas5Structure::decodeZone[pack]: can not decode some data\n"));
        return false;
      }
      std::swap(data, data2);
    }
  }

  if (data.size()!=finalLength) {
    MWAW_DEBUG_MSG(("Canvas5Structure::decodeZone[pack]: problem decoding data %lx/%lx\n", (unsigned long)data.size(), finalLength));
    return false;
  }

  stream->append(data.data(), unsigned(data.size()));

  if (input->tell()!=endPos) {
    MWAW_DEBUG_MSG(("Canvas5Structure::decodeZone: find extra data\n"));
    input->seek(endPos, librevenge::RVNG_SEEK_SET);
  }
  return true;
}


}
// vim: set filetype=cpp tabstop=2 shiftwidth=2 cindent autoindent smartindent noexpandtab:

