/*
 * $Header: /home/cvspublic/jakarta-slide/src/stores/org/apache/slide/index/lucene/Index.java,v 1.12 2005/04/04 13:53:55 luetzkendorf Exp $
 * $Revision: 1.12 $
 * $Date: 2005/04/04 13:53:55 $
 *
 * ====================================================================
 *
 * Copyright 1999-2004 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package org.apache.slide.index.lucene;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.StringTokenizer;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import org.apache.slide.common.PropertyName;
import org.apache.slide.common.Uri;
import org.apache.slide.content.NodeProperty;
import org.apache.slide.content.NodeRevisionDescriptor;
import org.apache.slide.content.NodeRevisionNumber;
import org.apache.slide.extractor.ContentExtractor;
import org.apache.slide.extractor.ExtractorException;
import org.apache.slide.extractor.ExtractorManager;
import org.apache.slide.search.IndexException;
import org.apache.slide.util.logger.Logger;

/**
 * Wrapper for Lucene index.
 */
public class Index
{
    public static final String KEY_FIELD_NAME = "SLIDE_KEY";
    public static final String URI_FIELD_NAME = "SLIDE_URI";
    public static final String SCOPE_FIELD_NAME = "SLIDE_SCOPE";
    public static final String DEPTH_FIELD_NAME = "SLIDE_DEPTH";
    public static final String VERSION_FIELD_NAME = "SLIDE_VERSION";
    public static final String IS_DEFINED_FIELD_NAME = "SLIDE_ISDEFINED";
    public static final String CONTENT_FIELD_NAME = "SLIDE_CONTENT";
    public static final String NULL_FIELD_NAME = "SLIDE_NULL";
    
    protected static final SimpleDateFormat DATE_INDEX_FORMAT = 
        new SimpleDateFormat("yyyy-MM-dd HH:mm", Locale.UK);
    protected static final DecimalFormat INT_INDEX_FORMAT = 
        new DecimalFormat("b0000000000000000000;a0000000000000000000");
    
    public static final String DATE_LOWER_BOUND = DATE_INDEX_FORMAT.format(new Date(0));
    public static final String DATE_UPPER_BOUND = DATE_INDEX_FORMAT.format(new Date(Long.MAX_VALUE));
    public static final String INT_LOWER_BOUND = INT_INDEX_FORMAT.format(Long.MIN_VALUE);
    public static final String INT_UPPER_BOUND = INT_INDEX_FORMAT.format(Long.MAX_VALUE);
    public static final String STRING_UPPER_BOUND = "\uffff\uffff";
    public static final String STRING_LOWER_BOUND = "";

    protected static final String LOG_CHANNEL = Index.class.getName();
    
    protected IndexConfiguration configuration;
    protected JobRunner indexThread;
    protected String indexName;
    protected Logger logger;
    
    protected LinkedList txnQueue = new LinkedList();
    
    /**
     * Counter for recently executed index jobs (insertions, deletions).
     * Will be reseted after optimization.
     */
    private int jobCounter = 0;
    
    private boolean needsInitialization = false;

    public Index(IndexConfiguration configuration, Logger logger, String name) 
        throws IndexException
    {
        this.logger = logger;
        this.configuration = configuration;
        this.indexName = name;

        File file = new File(this.configuration.getIndexPath());
        if (!file.exists() && !file.mkdirs()) {
            throw new IndexException(
                    "Error can't find or create index directory: " +
                    this.configuration.getIndexPath());
        }

        try {
            Directory directory = getDirectory();
            if (IndexReader.indexExists(directory)) {
                if (IndexReader.isLocked(directory)) {
                    IndexReader.unlock(directory);
                }                
            } else {
            	this.needsInitialization = true;
                IndexWriter writer = new IndexWriter(directory, 
                        configuration.getAnalyzer(), true);
                writer.close();
            }
        } catch (IOException e) {
            throw new IndexException("Error while creating index: " +
                    this.configuration.getIndexPath(), e);
        }
        
        
        
        // TODO make configurable
        BooleanQuery.setMaxClauseCount(10000);
    }
    
    public IndexConfiguration getConfiguration() {
        return this.configuration;
    }
    
    public Logger getLogger() {
        return this.logger;
    }

    public boolean needsInitialization() {
    	return this.needsInitialization;
    }
    
    public void start() {
        if (configuration.indexAsynchron) {
            this.indexThread = new JobRunner();
            this.indexThread.setName("Indexing Thread (" + this.indexName + ")");
            this.indexThread.setPriority(configuration.getPriority());
            this.indexThread.start();
        }
    }
    public void stop() {
        if (this.indexThread != null) {
            // stop the indexing thread
            try {
                this.indexThread.interrupt();
                synchronized (this.txnQueue) {
                	this.txnQueue.notify();
                }
                this.indexThread.join();
            } catch (InterruptedException e) {
                // 
            }
            // execute remaining indexing jobs
            try {
                for(Iterator i = this.txnQueue.iterator(); i.hasNext();) {
                    IndexTransaction txn = (IndexTransaction)i.next();
                    executeIndexTransaction(txn);
                }
            } catch (IndexException e) {
                logger.log("Error while executing job", e, LOG_CHANNEL, 
                        Logger.ERROR);
            }
        }
    }
    
    public IndexSearcher getSearcher() throws IOException {
        // TODO can this be reused?
        return new IndexSearcher(this.configuration.getIndexPath());
    }


    private Directory getDirectory() throws IOException
    {
        // file system based directory
        return FSDirectory.getDirectory(this.configuration.getIndexPath(), false);
    }
    
    private Field unstoredString(String fieldName, String value) {
        return new Field(fieldName, value, false, true, false);
    }
    private Field storedString(String fieldName, String value) {
        return new Field(fieldName, value, true, true, false);
    }
    private Field textField(String fieldName, String value) {
        return new Field(fieldName, value, false, true, true);
    }
    private Field textField(String fieldName, Reader value) {
        return Field.Text(fieldName, value);
    }
    
    
    /**
     * Creates a lucene index document for a properties indexer.
     * @param uri resource 
     * @param descriptor properties to be indexed
     */
    private Document createLuceneDocument(String uri, NodeRevisionDescriptor descriptor) {
        
        Document doc = new Document();
        
        doc.add(unstoredString(Index.KEY_FIELD_NAME, 
                configuration.generateKey(uri, descriptor.getRevisionNumber())));
        doc.add(storedString(Index.URI_FIELD_NAME, uri));
        
        // scopes
        StringTokenizer tokenizer = new StringTokenizer(uri, "/");
        StringBuffer buffer = new StringBuffer(uri.length());
        doc.add(unstoredString(Index.SCOPE_FIELD_NAME, "/"));
        int depth = 0;
        for(; tokenizer.hasMoreTokens();) {
            buffer.append("/").append(tokenizer.nextToken());
            doc.add(unstoredString(Index.SCOPE_FIELD_NAME, buffer.toString()));
            depth++;
        }
        doc.add(unstoredString(Index.DEPTH_FIELD_NAME, 
                configuration.intToIndexString(depth)));
        
        // resource type
        String rtype = descriptor.getResourceType();
        for(Iterator i = configuration.knownResourceTypes(); i.hasNext();) {
            String name = (String)i.next();
            if (rtype.indexOf(name) != -1) {
                doc.add(unstoredString(configuration.generateFieldName(
                    NodeProperty.DEFAULT_NAMESPACE, "resourcetype"), 
                    name));
            }
        }
        
        // all other properties
        for(Enumeration e = descriptor.enumerateProperties(); e.hasMoreElements();) {
            NodeProperty property = (NodeProperty)e.nextElement();
            PropertyName propertyName = property.getPropertyName();
            Object value = property.getValue();
            
            if (value == null) continue;
            if (!configuration.isIndexedProperty(propertyName)) continue;
            
            if (configuration.isStringProperty(propertyName)) {
                doc.add(unstoredString(configuration.generateFieldName(
                        property.getNamespace(), property.getName()), 
                        value.toString()));
            }
            if (configuration.isDateProperty(propertyName)) {
                Date date = IndexConfiguration.getDateValue(value);
                if (date != null) {
                    doc.add(unstoredString(configuration.generateFieldName(
                            property.getNamespace(), property.getName()), 
                            configuration.dateToIndexString(date)));
                }
            }
            if (configuration.isIntProperty(propertyName)) {
                try {
                    doc.add(unstoredString(configuration.generateFieldName(
                            property.getNamespace(), property.getName()),
                            configuration.intToIndexString(
                                    Long.parseLong(value.toString()))));
                } catch (NumberFormatException ex)  {
                    // TODO log warning
                }
            }
            if (configuration.isTextProperty(propertyName)) {
                doc.add(textField(configuration.generateFieldName(
                        property.getNamespace(), property.getName()), 
                        value.toString()));
            }
            if (configuration.supportsIsDefined(propertyName)) {
                doc.add(unstoredString(Index.IS_DEFINED_FIELD_NAME, 
                        configuration.generateFieldName(
                                property.getNamespace(), property.getName())));
            }
        }
        
        return doc;
    }
    
    /**
     * Creates a lucene document for content indexing.
     */
    private Document createLuceneDocument(String uri, 
            NodeRevisionDescriptor descriptor, InputStream content) 
        throws IndexException, ExtractorException
    {
        Document doc = new Document();
        
        doc.add(unstoredString(Index.KEY_FIELD_NAME, 
                configuration.generateKey(uri, descriptor.getRevisionNumber())));
        doc.add(storedString(Index.URI_FIELD_NAME, uri));
        
        // scopes
        StringTokenizer tokenizer = new StringTokenizer(uri, "/");
        StringBuffer buffer = new StringBuffer(uri.length());
        doc.add(unstoredString(Index.SCOPE_FIELD_NAME, "/"));
        int depth = 0;
        for(; tokenizer.hasMoreTokens();) {
            buffer.append("/").append(tokenizer.nextToken());
            doc.add(unstoredString(Index.SCOPE_FIELD_NAME, buffer.toString()));
            depth++;
        }
        doc.add(unstoredString(Index.DEPTH_FIELD_NAME, 
                configuration.intToIndexString(depth)));
        
        
        List extractors = ExtractorManager.getInstance().getContentExtractors(
                configuration.getNamespaceName(), uri, descriptor);
        
        for(Iterator i = extractors.iterator(); i.hasNext();) {
            ContentExtractor extractor = (ContentExtractor)i.next();
            doc.add(textField(Index.CONTENT_FIELD_NAME, 
                    extractor.extract(content)));
        }
        
        return doc;
    }

    /**
     * Schedules an index transaction. If asynchron indexing is enabled, this
     * adds the jobs to the indexing queue, otherwise the indexing is executed
     * imediately.
     * @param removeJobs Set of jobs for deleting objects from the index.
     * @param addJobs Set of Jobs for adding new objects to the index.
     * @throws IndexException
     */
    public void scheduleIndexTransaction(Set removeJobs, Set addJobs)
        throws IndexException
    {
        if (configuration.isIndexAsynchron()) {
            IndexTransaction txn = new IndexTransaction(removeJobs, addJobs);
            synchronized (this.txnQueue) {
                this.txnQueue.addLast(txn);
                this.txnQueue.notify();
            }
        } else {
            executeIndexTransaction(removeJobs, addJobs);
        }
    }
    
    synchronized void executeIndexTransaction(IndexTransaction txn)
        throws IndexException 
    {
        executeIndexTransaction(txn.removeJobs, txn.addJobs);
    }
    
    synchronized void executeIndexTransaction(Set removeJobs, Set addJobs) 
        throws IndexException  {
        
        IndexWriter writer = null;
        IndexReader reader = null;
        try {
            // execute delete jobs
            if (removeJobs.size() > 0) {
                reader = IndexReader.open(getDirectory());
                for(Iterator i = removeJobs.iterator(); i.hasNext();) {
                    IndexJob job = (IndexJob)i.next();
                    if (logger.isEnabled(LOG_CHANNEL, Logger.DEBUG)) {
                        logger.log("remove: " + job.key, LOG_CHANNEL, Logger.DEBUG);
                    }
                    reader.delete(new Term(Index.KEY_FIELD_NAME, job.getKey()));
                    this.jobCounter++;
                }
                reader.close();
                reader = null;
            }
    
            // execute index jobs
            if (addJobs.size() > 0 || this.jobCounter >= configuration.getOptimizeThreshold()) {
                 writer = new IndexWriter(getDirectory(), 
                         configuration.getAnalyzer(), false);
                
                for(Iterator i = addJobs.iterator(); i.hasNext(); ) {
                    IndexJob job = (IndexJob)i.next();
                    Document doc; 
                    if (job.content != null) {
                        if (logger.isEnabled(LOG_CHANNEL, Logger.DEBUG)) {
                            logger.log("index content: " + job.key, LOG_CHANNEL, Logger.DEBUG);
                        }
                        try {
                            doc = createLuceneDocument(job.uri, job.descriptor, job.content);
                            writer.addDocument(doc);
                        } catch(ExtractorException e) {
                            logger.log("Error while extracting content: " + job.uri +
                                    " (" + e.toString() + ")", 
                                    LOG_CHANNEL, Logger.WARNING);
                        }
                    } else {
                        if (logger.isEnabled(LOG_CHANNEL, Logger.DEBUG)) {
                            logger.log("index properties: " + job.key, LOG_CHANNEL, Logger.DEBUG);
                        }
                        doc = createLuceneDocument(job.uri, job.descriptor);
                        writer.addDocument(doc);
                    }
                    this.jobCounter++;
                }
                if (this.jobCounter > configuration.getOptimizeThreshold()) {
                    writer.optimize();
                    logger.log("optimize", LOG_CHANNEL, Logger.DEBUG);
                    this.jobCounter = 0;
                }
                writer.close();
                writer = null;
            }
        } catch (Exception e) {
            throw new IndexException(e);
        } finally {
            try {
                if (reader != null) reader.close();
                if (writer != null) writer.close();
            } catch (IOException e) {
                logger.log("Exception after executeIndexTransaction", e, LOG_CHANNEL, Logger.ERROR);
            }
        }
    }
    
    /**
     * Creates an IndexJob for <em>property indexing</em>.
     */
    public IndexJob createIndexJob(Uri uri, NodeRevisionDescriptor descriptor) {
        return new IndexJob(uri, descriptor);
    }

    /**
     * Creates an IndexJob for <em>content indexing</em>.
     */
    public IndexJob createIndexJob(Uri uri, 
            NodeRevisionDescriptor descriptor, InputStream content) {
        return new IndexJob(uri, descriptor, content);
    }
    /**
     * Creates an IndexJob for <em>removing</em> of a resource.
     */
    public IndexJob createDeleteJob(Uri uri, NodeRevisionNumber number) {
        return new IndexJob(uri, number);
    }
    
    public class IndexJob {
        protected String key;
        protected String uri;
        protected NodeRevisionDescriptor descriptor;
        protected InputStream content;
        
        protected String getKey() {
            return key;
        }
        protected IndexJob(Uri uri, NodeRevisionNumber number) {
            this.uri = uri.toString();
            this.descriptor = null;
            this.content = null;
            this.key = configuration.generateKey(this.uri, number);
        }
        protected IndexJob(Uri uri, NodeRevisionDescriptor descriptor) {
            this.uri = uri.toString();
            this.descriptor = descriptor;
            this.content = null;
            this.key = configuration.generateKey(this.uri, descriptor.getRevisionNumber());
        }
        protected IndexJob(Uri uri, NodeRevisionDescriptor descriptor, InputStream content) {
            this.uri = uri.toString();
            this.descriptor = descriptor;
            this.content = content;
            this.key = configuration.generateKey(this.uri, descriptor.getRevisionNumber());
        }
        public boolean equals(Object obj)
        {
            if (this == obj) return true;
            if (obj instanceof IndexJob) {
                return this.key.equals(((IndexJob)obj).key);
            }
            return false;
        }
        public int hashCode()
        {
            return key.hashCode();
        }
        public String toString() {
            return this.key;
        }
    }
    
    private static class IndexTransaction {
        Set removeJobs;
        Set addJobs;
        
        IndexTransaction(Set removeJobs, Set addJobs) {
            this.removeJobs = removeJobs;
            this.addJobs = addJobs;
        }
    }
    
    /**
     * Thread for executing index jobs scheduled in the queue.
     */
    private class JobRunner extends Thread {
       public void run() 
       {
          while(true) {
             IndexTransaction txn = null;
             
             synchronized (txnQueue) {
                while(txnQueue.size() < 1) {
                   // if the job list is empty, test whether the runner is
                   // interruppted, if so leave, otherwise wait for next job 
                   if (this.isInterrupted()) {
                      return;
                   } else {
                      try {
                         txnQueue.wait();
                      }
                      catch (InterruptedException e) {
                         return;
                      }
                   }
                }
                // assert(Index.this.jobs.size() >= 1)
                txn = (IndexTransaction)txnQueue.getFirst();
                txnQueue.removeFirst();
             }
             try {
                 executeIndexTransaction(txn);
             }
             catch(Exception e) {
                logger.log("Error while executing job", e, LOG_CHANNEL, 
                        Logger.ERROR);
             }
          }
       }
    }
}
