2014-01-27 10 views
8

Ich habe den obigen Code für IndexWriter hinzugefügt.Warum verursacht Lucene.NET OutOfMemoryException beim Indizieren großer Dateien?

Ich habe zur Vermeidung von OutOfMemoryException(OOMException)

writer.SetRAMBufferSizeMB(32); 
writer.MergeFactor = 1000; 
writer.SetMaxFieldLength(Int32.MaxValue); 
writer.UseCompoundFile = false; 

alle Eigenschaft.

Hier in diesem Code in Zeile writer.AddDocument(document); zeigt OOM Ausnahme.

Können Sie mich führen, warum ich diesen Fehler bin?
Kann mir jemand helfen, das zu lösen?

Meine Gerätekonfiguration:
Systemtyp: 64-Bit-Betriebssystem.
RAM: 4 GB (3,86 GB nutzbar)
Prozessor: Intel i5 - 3230M CPU @ 2.60GHz

using System; 
using System.Data.SqlClient; 
using Lucene.Net.Documents; 
using System.Data; 
using Lucene.Net.Analysis.Standard; 
using Lucene.Net.Search; 
using Lucene.Net.Store; 
using Lucene.Net.QueryParsers; 

namespace ConsoleApplication1 
{ 
    class Program 
    { 
     static String searchTerm = ""; 
     static void Main(string[] args) { 
      /** 
      * This will create dataset according to 
      * connectingString and query 
      **/ 
      Console.WriteLine("Connecting to Sql database server."); 
      String connectionString = "Data Source=proxy-pc;Initial Catalog=Snomed; User   ID=SA;password=admin"; 
      String query = "SELECT * FROM DESCRIPTION"; 
      String INDEX_DIRECTORY = "c:\\DatabaseIndex"; 

      Console.WriteLine("Creating dataset."); 
      DataSet dataSet = createDataset(connectionString, query); 
      Console.WriteLine("Created dataset successfully."); 

      Console.WriteLine("Creating document."); 
      Document document = createDocument(dataSet); 
      Console.WriteLine("Created document successfully."); 

      var version = Lucene.Net.Util.Version.LUCENE_30; 
      var length = Lucene.Net.Index.IndexWriter.MaxFieldLength.LIMITED; 
      Lucene.Net.Analysis.Standard.StandardAnalyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(version); 
      Lucene.Net.Store.Directory directory = Lucene.Net.Store.FSDirectory.Open(new System.IO.DirectoryInfo(@INDEX_DIRECTORY)); 
      Lucene.Net.Index.IndexWriter writer = new Lucene.Net.Index.IndexWriter(directory, analyzer, length); 
      writer.SetMergeScheduler(new Lucene.Net.Index.SerialMergeScheduler()); 
      writer.SetRAMBufferSizeMB(32); 
      writer.MergeFactor = 1000; 
      writer.SetMaxFieldLength(Int32.MaxValue); 
      writer.UseCompoundFile = false; 
      Console.WriteLine("Before Adding document"); 
      **writer.AddDocument(document); ** 
      Console.WriteLine("Indexing..."); 
      writer.Optimize(); 
      writer.Dispose(); 
      Console.WriteLine("Indexing finished"); 

      if (searchTerm == "") 
      { 
       searchTerm = "(keyword)"; 
      } 

      Console.WriteLine("Searching '" + searchTerm + "'..."); 

      var occurance = searchKeyword(INDEX_DIRECTORY, version, searchTerm); 

      if (occurance != -1) 
      { 
       Console.WriteLine("Your search found : " + occurance); 
      } 
      else 
      { 
       Console.WriteLine("Invalid index directory."); 
      } 

      Console.Read(); 
     } 

     /** 
     * Method works as a searcher 
     **/ 
     private static int searchKeyword(String index_Directory_Path, Lucene.Net.Util.Version version, String searchWord) { 
      if (index_Directory_Path != null) 
      { 
       var standAnalyzer = new StandardAnalyzer(version); 
       IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(index_Directory_Path)); 

       // parse the query, "term" is the default field to search 
       var parser = new QueryParser(version, "term", standAnalyzer); 
       Query searchQuery = parser.Parse(searchWord); 

       // search 
       TopDocs hits = searcher.Search(searchQuery, 100); 
       var total = hits.TotalHits; 
       return total; 
      } 

      else 
      { 
       return -1; 
      } 
     } 

     static DataSet createDataset(String connectionString, String query) { 
      DataSet ds = new DataSet(); 

      using (SqlConnection connection = new SqlConnection(connectionString)) 
      using (SqlCommand command = new SqlCommand(query, connection)) 
      using (SqlDataAdapter adapter = new SqlDataAdapter(command)) 
      { 
       adapter.Fill(ds); 
      } 

      return ds; 
     } 

     static Lucene.Net.Documents.Document createDocument(DataSet dataSet) { 
      Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); 
      using (dataSet) 
      { 
       foreach (DataTable table in dataSet.Tables) 
       { 
        foreach (DataRow row in table.Rows) 
        { 
         String id = row["id"].ToString(); 
         String rTime = row["rTime"].ToString(); 
         String active = row["active"].ToString(); 
         String mId = row["mId"].ToString(); 
         String cId = row["cId"].ToString(); 
         String lCode = row["lCode"].ToString(); 
         String tId = row["tId"].ToString(); 
         String detail = row["detail"].ToString(); 
         String sId = row["sId"].ToString(); 

         Field idField = new Field("id", id, Field.Store.YES, Field.Index.ANALYZED); 
         Field rTimeField = new Field("rTime", rTime, Field.Store.YES, Field.Index.ANALYZED); 
         Field activeField = new Field("active", active, Field.Store.YES, Field.Index.ANALYZED); 
         Field mIdField = new Field("mId", mId, Field.Store.YES, Field.Index.ANALYZED); 
         Field cIdField = new Field("cId", cId, Field.Store.YES, Field.Index.ANALYZED); 
         Field lCodeField = new Field("lCode", lCode, Field.Store.YES, Field.Index.ANALYZED); 
         Field tIdField = new Field("tId", tId, Field.Store.YES, Field.Index.ANALYZED); 
         Field detailField = new Field("detail", detail, Field.Store.YES, Field.Index.ANALYZED); 
         Field sIdField = new Field("sId", sId, Field.Store.YES, Field.Index.ANALYZED); 

         doc.Add(idField); 
         doc.Add(rTimeField); 
         doc.Add(activeField); 
         doc.Add(mIdField); 
         doc.Add(cIdField); 
         doc.Add(lCodeField); 
         doc.Add(tIdField); 
         doc.Add(detailField); 
         doc.Add(sIdField); 
        } 
       } 
      } 

      return doc; 
     } 
    } 
} 

Antwort

1

Es sieht aus wie Sie die gesamte Datenbank als ein einziges Dokument hinzugefügt sind.

Haben Sie versucht, jede Zeile als separates Dokument hinzuzufügen? Sie könnten "createDocument" möglicherweise in "createDocuments" ändern und ein einzelnes Lucene.Net-Dokument pro Zeile ausgeben. Das würde die meisten Ihrer aktuellen Code unverändert lassen ...

Hope hilft,

+0

Danke Adrian Conlon! –