/*
 * LIUS - Lucene Index Update and Search
 * http://sourceforge.net/projects/lius/
 *
 * Copyright (c) 2004, Laval University Library.  All rights reserved.
 *
 * This program is a free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of
 * the License, or (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */
package ca.ulaval.bibl.lius.index.PDF;

//import java.io.ByteArrayInputStream;
//import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
//import java.io.FileNotFoundException;
import java.io.IOException;
//import java.io.InputStreamReader;
//import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;

import org.apache.lucene.document.Document;
import org.pdfbox.exceptions.CryptographyException;
import org.pdfbox.exceptions.InvalidPasswordException;
//import org.pdfbox.pdfparser.PDFParser;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.pdmodel.PDDocumentInformation;
import org.pdfbox.util.PDFTextStripper;

import ca.ulaval.bibl.lius.Lucene.LuceneActions;
import ca.ulaval.bibl.lius.config.LiusConfig;
import ca.ulaval.bibl.lius.config.LiusField;
import ca.ulaval.bibl.lius.index.Indexer;
import ca.ulaval.bibl.lius.config.LiusConfigBuilder;
//import java.io.InputStream;
import java.io.StringWriter;

/**
 * Classe permettant d'indexer des fichiers PDF base sur PDFBox et inspr de la classe LucenePDFDocument
 * <br/><br/>
 * Class for indexing PDF documents, based on PDFBox and inspired from LucenePDFDocument.
 * @author Rida Benjelloun (rida.benjelloun@bibl.ulaval.ca)
 */
public class PdfIndexer
    extends Indexer {
  private PDDocument pdfDocument = null;
  public Object parse(Object file) {
    String contents = "";

    try {
      FileInputStream input = new FileInputStream(new File( (String) file));
      pdfDocument = PDDocument.load(input);
      if (pdfDocument.isEncrypted()) {
        pdfDocument.decrypt("");
      }
      StringWriter writer = new StringWriter();
      PDFTextStripper stripper = new PDFTextStripper();
      stripper.writeText(pdfDocument, writer);
      contents = writer.getBuffer().toString();
    }

    catch (CryptographyException e) {
      e.printStackTrace();
    }
    catch (IOException e) {
      e.printStackTrace();
    }
    catch (InvalidPasswordException e) {
      e.printStackTrace();
    }
    finally {
      if (pdfDocument != null) {
        try {
          pdfDocument.close();
        }
        catch (IOException ex) {
          ex.printStackTrace();
        }
      }
    }
    return contents;
  }

  /**
   * Mthode retournant un objet de type Lucene document  partir du fichier
   *  indexer et du fichier de configuration de Lius exprim sous forme
   * d'objet de type LiusConfig.
   * <br/><br/>
   * Method that returns a Lucene Document object from a file to index and
   * the Lius Configuration as a LiusConfig object.
   */
  public Document createLuceneDocument(String file, LiusConfig lc) {
    Document doc = createLuceneDocument(file, lc.getPdfFields());
    return doc;
  }

  /**
       * Permet de rcuprer les champs de Lius  partir du fichier de configuration
   * pour effectuer l'indexation.
   * <br/><br/>
   * Gets Lius fields from the configuration file for indexation.
   */
  public Collection getLiusFields(LiusConfig lc) {
    return lc.getPdfFields();
  }

  /**
   * Mthode retournant un objet de type Lucene document  partir du fichier 
   * indexer et d'une collection d'objets de type LiusField. Chaque objet
   * LiusField contient de l'information sur le nom du champs Lucene, le type,
   * etc.
   * <br/><br/>
   * Method that returns a Lucene object from the configuration file and a collection
   * of LiusField objects. Each LiusField object contains information about the Lucene
   * field, the type, etc.
   */
  public Collection getPopulatedCollection(Object file, Collection liusFields) {
    Collection coll = new ArrayList();
    String contents = (String) parse(file);

    LuceneActions la = new LuceneActions();

    Iterator i = liusFields.iterator();
    while (i.hasNext()) {
      LiusField lf = (LiusField) i.next();
      if (lf.getGet() != null) {
        if (lf.getGet().equalsIgnoreCase("content")) {
          lf.setValue(contents);
          coll.add(lf);
        }
        else {
          PDDocumentInformation metaData = pdfDocument.
              getDocumentInformation();
          if (lf.getGet().equalsIgnoreCase("title")) {
            if (metaData.getTitle() != null) {
              lf.setValue(metaData.getTitle());
              coll.add(lf);
            }
          }
          else if (lf.getGet().equalsIgnoreCase("author")) {
            if (metaData.getAuthor() != null) {
              lf.setValue(metaData.getAuthor());
              coll.add(lf);
            }

          }
          else if (lf.getGet().equalsIgnoreCase("creator")) {
            if (metaData.getCreator() != null) {
              lf.setValue(metaData.getCreator());
              coll.add(lf);
            }
          }
          else if (lf.getGet().equalsIgnoreCase("keywords")) {
            if (metaData.getKeywords() != null) {
              lf.setValue(metaData.getKeywords());
              coll.add(lf);
            }
          }
          else if (lf.getGet().equalsIgnoreCase("producer")) {
            if (metaData.getProducer() != null) {
              lf.setValue(metaData.getProducer());
              coll.add(lf);
            }
          }
          else if (lf.getGet().equalsIgnoreCase("subject")) {
            if (metaData.getSubject() != null) {
              lf.setValue(metaData.getSubject());
              coll.add(lf);
            }
          }
          else if (lf.getGet().equalsIgnoreCase("trapped")) {
            if (metaData.getTrapped() != null) {
              lf.setValue(metaData.getTrapped());
              coll.add(lf);
            }
          }
          else if (lf.getGet().equalsIgnoreCase("creationDate")) {
            if (metaData.getCreationDate() != null) {
              lf.setDate(metaData.getCreationDate().getTime());
              coll.add(lf);
            }
          }
          else if (lf.getGet().equalsIgnoreCase("modificationDate")) {
            if (metaData.getModificationDate() != null) {
              lf.setDate(metaData.getModificationDate().getTime());
              coll.add(lf);
            }

          }
          else if (lf.getGet().equalsIgnoreCase("summary")) {
            int summarySize = Math.min(contents.length(), 500);
            String summary = contents.substring(0, summarySize);
            lf.setValue(summary);
            coll.add(lf);
          }

        }

      }
    }

    return coll;
  }

  public Collection getPopulatedCollection(Object file, String liusConfig) {
    LiusConfig lc = LiusConfigBuilder.getSingletonInstance().getLiusConfig(
        liusConfig);
    return getPopulatedCollection(file, lc);
  }

  public Collection getPopulatedCollection(Object file, LiusConfig lc) {
    return getPopulatedCollection(file, lc.getPdfFields());
  }

}