/*
 * LIUS - Lucene Index Update and Search
 * http://sourceforge.net/projects/lius/
 *
 * Copyright (c) 2004, Laval University Library.  All rights reserved.
 *
 * This program is a free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of
 * the License, or (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */
package ca.ulaval.bibl.lius.index.XML;


import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;

import org.jdom.Attribute;
import org.jdom.CDATA;
import org.jdom.Comment;
import org.jdom.Element;
import org.jdom.EntityRef;
import org.jdom.JDOMException;
import org.jdom.ProcessingInstruction;
import org.jdom.Text;
import org.jdom.input.SAXBuilder;
import org.jaxen.jdom.JDOMXPath;

import ca.ulaval.bibl.lius.Lucene.LuceneActions;
import ca.ulaval.bibl.lius.config.LiusConfig;
import ca.ulaval.bibl.lius.config.LiusField;
import ca.ulaval.bibl.lius.index.Indexer;
import org.jdom.Namespace;
import java.util.Map;
import org.jaxen.SimpleNamespaceContext;
import org.jaxen.JaxenException;
import ca.ulaval.bibl.lius.config.LiusConfigBuilder;

/**
 * Classe se basant sur JDOM et XPATH pour indexer des fichiers XML.
 * <br/><br/>
 * Class based on JDOM and XPATH for indexing XML files.
 * @author Rida Benjelloun (rida.benjelloun@bibl.ulaval.ca)
 */
public class XmlFileIndexer
    extends Indexer {
  private SimpleNamespaceContext nsc = new SimpleNamespaceContext();
  private String toIndex = null;

  /**
   * Permet de parser un fichier XML et de retourner un objet de type JDOM Document.
   * <br/><br/>
   * Parse an XML file and returns a JDOM object.
   */
  public Object parse(Object file) {
    org.jdom.Document xmlDoc = new org.jdom.Document();
    try {
      SAXBuilder builder = new SAXBuilder();
      builder.setValidation(false);
      xmlDoc = builder.build(new File( (String) file));
    }
    catch (JDOMException e) {
      e.printStackTrace();
    }
    catch (IOException e) {
      e.printStackTrace();
    }
    return xmlDoc;
  }

  /**
   * Mthode retournant un objet de type "Lucene document"  partir du fichier
   * XML  indexer et du fichier de configuration de Lius exprim sous forme
   * d'objet de type "LiusConfig".
   * <br/><br/>
   * Method that returns Lucene Document object from an XML file to index and
   * the Lius configuration file as a LiusConfig object.
   *
   */
  public org.apache.lucene.document.Document createLuceneDocument(String
      xmlFile, LiusConfig lc) {

    Collection liusFields = lc.getXmlFileFields();
    org.apache.lucene.document.Document luceneDoc = createLuceneDocument(
        xmlFile,
        liusFields);
    return luceneDoc;
  }

  /**
   * Permet de placer un noeud XML dans un document Lucene. Chaque lment du
       * noeud est index en se basant sur une collection d'objets de type "LiusField"
   * qui contient le nom du champs dans lucene, l'expression Xpath pour slctionner
   * le noeud et le sparateur d'occurences si on veut placer les diffrentes
       * occurences d'un mme lment dans le mme champs; dans la cas contraire chaque
   * lment sera plac dans son propre champs.
   * <br/><br/>
       * Place an XML node in a Lucene document. Each element of the node is indexed
       * based on a collection of type LiusField containing the name of the field in
   * Lucene, the XPath expression to select the node and the hits separator if we want
       * differents hits of a same element in the same field; in the other case each
   * element is placed in is own field.
   */

  public org.apache.lucene.document.Document storeNodeInLuceneDocument(Object
      xmlDoc, Collection liusFields) {

    Collection resColl = getPopulatedCollection(xmlDoc, liusFields);
    org.apache.lucene.document.Document luceneDoc = LuceneActions.
        getSingletonInstance().populateLuceneDoc(resColl);
    return luceneDoc;
  }

  private void populateField(LiusField newField, LiusField lf) {
    newField.setName(lf.getName());
    newField.setType(lf.getType());
    newField.setXpathSelect(lf.getXpathSelect());
    newField.setDateFormat(lf.getDateFormat());
  }

  /**
   * Mthode permettant de concatner les occurences multiples d'un lment qui
   * vont tre stockes dans le mme document Lucene.
   * <br/><br/>
       * Method that concatenates multiple hist of an element which will be saved in
   * the same Lucene document.
   */
  public String concatOccurance(Object xmlDoc,
                                String xpath,
                                String concatSep) {
    StringBuffer chaineConcat = new StringBuffer();
    try {
      JDOMXPath xp = new JDOMXPath(xpath);
      xp.setNamespaceContext(nsc);
      List ls = xp.selectNodes(xmlDoc);
      Iterator i = ls.iterator();
      int j = 0;
      while (i.hasNext()) {
        j++;
        String text = "";
        Object obj = (Object) i.next();
        if (obj instanceof Element) {
          Element elem = (Element) obj;
          text = elem.getText().trim();
        }
        else if (obj instanceof Attribute) {
          Attribute att = (Attribute) obj;
          text = att.getValue().trim();
        }
        else if (obj instanceof Text) {
          Text txt = (Text) obj;
          text = txt.getText().trim();

        }
        else if (obj instanceof CDATA) {
          CDATA cdata = (CDATA) obj;
          text = cdata.getText().trim();

        }
        else if (obj instanceof Comment) {
          Comment com = (Comment) obj;
          text = com.getText().trim();

        }
        else if (obj instanceof ProcessingInstruction) {
          ProcessingInstruction pi = (ProcessingInstruction) obj;
          text = pi.getData().trim();

        }
        else if (obj instanceof EntityRef) {
          EntityRef er = (EntityRef) obj;
          text = er.toString().trim();

        }
        if (text != "") {
          if (ls.size() == 1) {
            chaineConcat.append(text);
            return chaineConcat.toString().trim();
          }
          else {
            if (ls.size() == j)
              chaineConcat.append(text);
            else
              chaineConcat.append(text + " " + concatSep + " ");
          }
        }
      }
    }
    catch (JaxenException j) {
      j.printStackTrace();
    }

    return chaineConcat.toString().trim();
  }

  public void setFileName(String toIndex) {
    this.toIndex = toIndex;
  }

  public String getFileName() {
    return toIndex;
  }

  /**
   * Retourne une collection contenant les champs avec les valeurs  indexer
   * comme par exemple: le texte integral, titre etc.
   * <br/><br/>
       * Returns a collection containing the fields with the values to index; like :
   * full text, title, etc.
   */
  public Collection getPopulatedCollection(Object xmlFile, Collection liusField) {
    Object xmlDoc = null;
    List documentNs = null;
    Map hm = null;
    boolean nsTrouve = false;
    boolean isMap = false;
    Collection resColl = new ArrayList();
    if (xmlFile instanceof org.jdom.Document ||
        xmlFile instanceof org.jdom.Element)
      xmlDoc = xmlFile;
    else
      xmlDoc = parse(xmlFile);
    if (xmlDoc instanceof org.jdom.Document) {
      documentNs = getAllDocumentNs( (org.jdom.Document) xmlDoc);
    }
    Iterator itColl = liusField.iterator();
    while (itColl.hasNext()) {
      Object colElem = itColl.next();
      if (colElem instanceof Map) {
        isMap = true;
        hm = (Map) colElem;
        for (int j = 0; j < documentNs.size(); j++) {
          Collection liusFields = (Collection) hm.get(documentNs.get(j));
          if (liusFields != null) {
            nsTrouve = true;
            extractDataFromElements(xmlDoc, liusFields, resColl);
          }
        }
      }
      if (nsTrouve == false && (colElem instanceof Map)) {
        extractDataFromElements(xmlDoc, (Collection) hm.get("default"), resColl);
      }
    }
    if (isMap == false)
      extractDataFromElements(xmlDoc, liusField, resColl);
    return resColl;
  }

  public Collection getPopulatedCollection(Object file, String liusConfig) {
    LiusConfig lc = LiusConfigBuilder.getSingletonInstance().getLiusConfig(
        liusConfig);
    return getPopulatedCollection(file, lc);
  }

  public Collection getPopulatedCollection(Object file, LiusConfig lc) {
    return getPopulatedCollection(file, lc.getXmlFileFields());
  }

  private void extractDataFromElements(Object xmlDoc, Collection liusFields,
                                       Collection resColl) {
    Iterator it = liusFields.iterator();
    while (it.hasNext()) {
      LiusField lf = (LiusField) it.next();
      if (lf.getOcurSep() != null) {
        String cont = concatOccurance(xmlDoc, lf.getXpathSelect(),
                                      lf.getOcurSep());
        lf.setValue(cont);
        resColl.add(lf);
      }
      else {
        try {
          JDOMXPath xp = new JDOMXPath(lf.getXpathSelect());
          xp.setNamespaceContext(nsc);
          List selectList = xp.selectNodes(xmlDoc);
          Iterator i = selectList.iterator();
          while (i.hasNext()) {
            LiusField lfoccur = new LiusField();
            populateField(lfoccur, lf);
            Object selection = (Object) i.next();
            if (selection instanceof Element) {
              Element elem = (Element) selection;
              if (elem.getText().trim() != null &&
                  elem.getText().trim() != "") {
                lfoccur.setValue(elem.getText());
                resColl.add(lfoccur);
              }
            }
            else if (selection instanceof Attribute) {
              Attribute att = (Attribute) selection;
              lf.setValue(att.getValue());
              resColl.add(lf);
            }
            else if (selection instanceof Text) {
              Text text = (Text) selection;
              lf.setValue(text.getText());
              resColl.add(lf);
            }
            else if (selection instanceof CDATA) {
              CDATA cdata = (CDATA) selection;
              lf.setValue(cdata.getText());
              resColl.add(lf);
            }
            else if (selection instanceof Comment) {
              Comment com = (Comment) selection;
              lf.setValue(com.getText());
              resColl.add(lf);
            }
            else if (selection instanceof ProcessingInstruction) {
              ProcessingInstruction pi = (ProcessingInstruction)
                  selection;
              lf.setValue(pi.getData());
              resColl.add(lf);
            }
            else if (selection instanceof EntityRef) {
              EntityRef er = (EntityRef) selection;
              lf.setValue(er.toString());
              resColl.add(lf);
            }
          }
        }
        catch (JaxenException e) {
          e.printStackTrace();
        }

      }
    }

  }

  public List getAllDocumentNs(org.jdom.Document doc) {
    List ls = new ArrayList();
    processChildren(doc.getRootElement(), ls);
    return ls;
  }

  private boolean exist(List nsLs, String nsUri) {
    if (nsLs.isEmpty())
      return false;
    for (int i = 0; i < nsLs.size(); i++) {
      if ( ( (String) nsLs.get(i)).equals(nsUri)) {
        return true;
      }
    }
    return false;
  }

  private void processChildren(Element elem, List ns) {
    Namespace nsCourent = (Namespace) elem.getNamespace();
    String nsUri = (nsCourent.getURI());
    if (!exist(ns, nsUri)) {
      ns.add(nsUri.trim());
      nsc.addNamespace(nsCourent.getPrefix(), nsCourent.getURI());
    }

    List additionalNs = elem.getAdditionalNamespaces();
    if (!additionalNs.isEmpty())
      copyNsList(additionalNs, ns);
    
    
    
    
   List elemChildren = elem.getChildren();
   if (!elemChildren.isEmpty()) {
      for (int i = 0; i < elemChildren.size(); i++) {
        processChildren( (Element) elemChildren.get(i), ns);
      }
    }
   
   
   
   
  }

  private void copyNsList(List nsElem, List nsRes) {
    for (int i = 0; i < nsElem.size(); i++) {
      Namespace ns = (Namespace) nsElem.get(i);
      nsc.addNamespace(ns.getPrefix(), ns.getURI());
      nsRes.add(ns.getURI().trim());
    }
  }

  /**
       * Permet de rcuprer les champs de Lius  partir du fichier de configuration
   * pour effectuer l'indexation.
   * <br/><br/>
   * Get Lius fields from the configuration file for indexing.
   */
  public Collection getLiusFields(LiusConfig lc) {
    return lc.getXmlFileFields();
  }

}