SphinxMirrorAction.java

Go to the documentation of this file.
00001 /*
00002  * WebSphinx web-crawling toolkit
00003  *
00004  * Copyright (c) 1998-2002 Carnegie Mellon University.  All rights
00005  * reserved.
00006  *
00007  * Redistribution and use in source and binary forms, with or without
00008  * modification, are permitted provided that the following conditions
00009  * are met:
00010  *
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer.
00013  *
00014  * 2. Redistributions in binary form must reproduce the above copyright
00015  *    notice, this list of conditions and the following disclaimer in
00016  *    the documentation and/or other materials provided with the
00017  *    distribution.
00018  *
00019  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
00020  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
00021  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00022  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00023  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00024  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00025  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00026  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00027  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00028  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00029  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00030  *
00031  */
00032 
00033 package org.net2map.pov.documentsManager;
00034 
00035 import websphinx.workbench.*;
00036 import websphinx.*;
00037 
00038 import java.io.File;
00039 import java.io.IOException;
00040 import java.net.URL;
00041 import java.net.MalformedURLException;
00042 
00043 public class SphinxMirrorAction extends MirrorAction
00044 {
00045     transient File dir;
00046     transient Mirror mirror;
00047     
00048     static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger( SphinxMirrorAction.class.getName() );
00049     //private static PovIndexer pIndexer;
00050     //private static boolean bCreate = true;
00051     //Runtime theRuntime = Runtime.getRuntime();
00052       
00053 /*    static int garbageCollectorCycle = 30;
00054     static int garbageCollectorCycleCounter=0;
00055     static long count = 0;
00056   */  
00057     static PovUrl uRoot;
00058     //static PovUrlsSet usUrls;
00059     static PovCrawler pCrawler;
00060     
00061     public SphinxMirrorAction( String directory, PovCrawler crawler, PovUrl rooturl )
00062     {
00063         super( directory, false );
00064         logger.setLevel( ( org.apache.log4j.Level )org.apache.log4j.Priority.WARN );
00065         //pIndexer = indexer;
00066         //bCreate = create;
00067         uRoot = rooturl;
00068         //usUrls = urls;
00069         //usUrls = new PovUrlsSet();
00070         pCrawler = crawler;
00071     }
00072     
00073     public boolean equals ( Object object )
00074     {
00075         if ( !( object instanceof MirrorAction ) )
00076             return false;
00077         MirrorAction a = ( MirrorAction )object;
00078         return same ( a.getDirectory(), getDirectory() )
00079             && a.getUseBrowser() == getUseBrowser();
00080     }
00081 
00082     private boolean same ( String s1, String s2 )
00083     {
00084         if ( s1 == null || s2 == null )
00085             return s1 == s2;
00086         else
00087             return s1.equals ( s2 );
00088     }
00089 
00090     private void showit()
00091     {
00092         Browser browser = Context.getBrowser();
00093         if ( browser != null )
00094             try
00095             {
00096                 browser.show ( Link.FileToURL( dir ) );
00097             }
00098             catch ( MalformedURLException e)
00099             {
00100             }
00101     }
00102 
00103     /*public PovUrlsSet getUrls()
00104     {
00105         return usUrls;
00106     }*/
00107     
00108     public synchronized void visit (Page page)
00109     {
00110         try
00111         {
00112             mirror.writePage( page );
00113             logger.warn( "Lookup " + mirror.lookup( new URL( "file:" ), page.getURL() ) );
00114             //PovMappedFile file = new PovMappedFile( mirror.lookup( new URL( "file:" ), page.getURL() ), page.getURL() );
00115             PovUrl url = new PovUrl( page.getURL().toString(), mirror.lookup( new URL( "file:" ), page.getURL() ), uRoot.getDepth() - 1 );
00116             //pIndexer.indexDocs( file, bCreate );
00117             //file = null;
00118             if ( url.getUrl() != uRoot.getUrl() )
00119             {
00120                 pCrawler.addCachedUrl( url );
00121             }
00122             //usUrls.put( url.getUrl(),  url );
00123             page.discardContent();
00124             /*count++;
00125             if( garbageCollectorCycleCounter >= garbageCollectorCycle )
00126             {
00127                 logger.warn("Garbage collection!");
00128                 long memoryBeforeGarbageCollection = theRuntime.freeMemory();
00129                 theRuntime.gc();
00130                 long memoryAfterGarbageCollection = theRuntime.freeMemory();
00131                 logger.warn( "Raised free memory from " + memoryBeforeGarbageCollection +
00132                              " to "  + memoryAfterGarbageCollection + 
00133                              " freeing " + (memoryAfterGarbageCollection - memoryBeforeGarbageCollection) + " bytes");
00134                 logger.warn( "Documents count = " + count );
00135                 garbageCollectorCycleCounter=0;
00136             }
00137             else
00138             {
00139                 garbageCollectorCycleCounter++;
00140             }*/
00141         }
00142         catch ( IOException e )
00143         {
00144             throw new RuntimeException ( e.toString() );
00145         }
00146         catch ( Exception f )
00147         {
00148             logger.warn( f.getMessage() );
00149         }
00150     }
00151 
00152     public void connected ( Crawler crawler )
00153     {
00154         crawler.addCrawlListener( this );
00155     }
00156 
00157     public void disconnected( Crawler crawler )
00158     {
00159         crawler.removeCrawlListener( this );
00160     }
00161 
00165     public void started( CrawlEvent event ){
00166         if ( mirror == null ) {
00167             try
00168             {
00169                 dir = ( getDirectory() != null )
00170                   ? new File ( getDirectory() )
00171                   : Access.getAccess().makeTemporaryFile( "mirror", "" );
00172                 mirror = new Mirror( dir.toString() );
00173 
00174                 Crawler crawler = event.getCrawler ();
00175                 Link[] roots = crawler.getRoots ();
00176                 for ( int i=0; i<roots.length; ++i )
00177                     mirror.mapDir( roots[i].getURL(), dir.toString() );
00178             }
00179             catch ( IOException e )
00180             {
00181                 logger.warn( e ); // FIX: use GUI when available
00182             }
00183         }
00184         //pIndexer.init( bCreate );
00185         logger.warn( "Sphinx mirror manager started..." );
00186     }
00187 
00191     public void stopped( CrawlEvent event )
00192     {
00193         try
00194         {
00195             if ( mirror != null )
00196             {
00197                 mirror.close();
00198                 mirror = null;
00199                 
00200                 if ( getUseBrowser() )
00201                     showit();
00202             }
00203         }
00204         catch ( IOException e )
00205         {
00206             System.err.println( e ); // FIX: use GUI when available
00207         }
00208         //pIndexer.stop();
00209         logger.warn( "Sphinx mirror manager stoped..." );
00210     }
00211 
00215     public void cleared( CrawlEvent event )
00216     {
00217         try
00218         {
00219             if ( mirror != null )
00220             {
00221                 mirror.close();
00222                 mirror = null;
00223                 
00224                 if ( getUseBrowser() )
00225                     showit();
00226             }
00227         }
00228         catch ( IOException e )
00229         {
00230             logger.warn( e ); // FIX: use GUI when available
00231         }
00232     }
00233 
00237     public void timedOut( CrawlEvent event )
00238     {
00239         try
00240         {
00241             if ( mirror != null )
00242             {
00243                 mirror.close();
00244                 mirror = null;
00245                 
00246                 if ( getUseBrowser() )
00247                     showit();
00248             }
00249         }
00250         catch ( IOException e )
00251         {
00252             logger.warn( e ); // FIX: use GUI when available
00253         }
00254     }
00255 
00259     public void paused( CrawlEvent event )
00260     {
00261         try
00262         {
00263             if ( mirror != null )
00264             {
00265                 mirror.rewrite();
00266                 if ( getUseBrowser() )
00267                     showit();
00268             }
00269         }
00270         catch ( IOException e )
00271         {
00272             logger.warn( e ); // FIX: use GUI when available
00273         }
00274     }
00275 }
Accueil | Téléchargement | Manuel | Doc. technique | Sources CVS | Faq | Nous contacter
©2003 - All Rights Reserved