![]() |
|||
SphinxMirrorAction.javaGo to the documentation of this file.00001 /* 00002 * WebSphinx web-crawling toolkit 00003 * 00004 * Copyright (c) 1998-2002 Carnegie Mellon University. All rights 00005 * reserved. 00006 * 00007 * Redistribution and use in source and binary forms, with or without 00008 * modification, are permitted provided that the following conditions 00009 * are met: 00010 * 00011 * 1. Redistributions of source code must retain the above copyright 00012 * notice, this list of conditions and the following disclaimer. 00013 * 00014 * 2. Redistributions in binary form must reproduce the above copyright 00015 * notice, this list of conditions and the following disclaimer in 00016 * the documentation and/or other materials provided with the 00017 * distribution. 00018 * 00019 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00020 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00021 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00022 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00023 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00024 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00025 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00026 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00027 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00028 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00029 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00030 * 00031 */ 00032 00033 package org.net2map.pov.documentsManager; 00034 00035 import websphinx.workbench.*; 00036 import websphinx.*; 00037 00038 import java.io.File; 00039 import java.io.IOException; 00040 import java.net.URL; 00041 import java.net.MalformedURLException; 00042 00043 public class SphinxMirrorAction extends MirrorAction 00044 { 00045 transient File dir; 00046 transient Mirror mirror; 00047 00048 static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger( SphinxMirrorAction.class.getName() ); 00049 //private static PovIndexer pIndexer; 00050 //private static boolean bCreate = true; 00051 //Runtime theRuntime = Runtime.getRuntime(); 00052 00053 /* static int garbageCollectorCycle = 30; 00054 static int garbageCollectorCycleCounter=0; 00055 static long count = 0; 00056 */ 00057 static PovUrl uRoot; 00058 //static PovUrlsSet usUrls; 00059 static PovCrawler pCrawler; 00060 00061 public SphinxMirrorAction( String directory, PovCrawler crawler, PovUrl rooturl ) 00062 { 00063 super( directory, false ); 00064 logger.setLevel( ( org.apache.log4j.Level )org.apache.log4j.Priority.WARN ); 00065 //pIndexer = indexer; 00066 //bCreate = create; 00067 uRoot = rooturl; 00068 //usUrls = urls; 00069 //usUrls = new PovUrlsSet(); 00070 pCrawler = crawler; 00071 } 00072 00073 public boolean equals ( Object object ) 00074 { 00075 if ( !( object instanceof MirrorAction ) ) 00076 return false; 00077 MirrorAction a = ( MirrorAction )object; 00078 return same ( a.getDirectory(), getDirectory() ) 00079 && a.getUseBrowser() == getUseBrowser(); 00080 } 00081 00082 private boolean same ( String s1, String s2 ) 00083 { 00084 if ( s1 == null || s2 == null ) 00085 return s1 == s2; 00086 else 00087 return s1.equals ( s2 ); 00088 } 00089 00090 private void showit() 00091 { 00092 Browser browser = Context.getBrowser(); 00093 if ( browser != null ) 00094 try 00095 { 00096 browser.show ( Link.FileToURL( dir ) ); 00097 } 00098 catch ( MalformedURLException e) 00099 { 00100 } 00101 } 00102 00103 /*public PovUrlsSet getUrls() 00104 { 00105 return usUrls; 00106 }*/ 00107 00108 public synchronized void visit (Page page) 00109 { 00110 try 00111 { 00112 mirror.writePage( page ); 00113 logger.warn( "Lookup " + mirror.lookup( new URL( "file:" ), page.getURL() ) ); 00114 //PovMappedFile file = new PovMappedFile( mirror.lookup( new URL( "file:" ), page.getURL() ), page.getURL() ); 00115 PovUrl url = new PovUrl( page.getURL().toString(), mirror.lookup( new URL( "file:" ), page.getURL() ), uRoot.getDepth() - 1 ); 00116 //pIndexer.indexDocs( file, bCreate ); 00117 //file = null; 00118 if ( url.getUrl() != uRoot.getUrl() ) 00119 { 00120 pCrawler.addCachedUrl( url ); 00121 } 00122 //usUrls.put( url.getUrl(), url ); 00123 page.discardContent(); 00124 /*count++; 00125 if( garbageCollectorCycleCounter >= garbageCollectorCycle ) 00126 { 00127 logger.warn("Garbage collection!"); 00128 long memoryBeforeGarbageCollection = theRuntime.freeMemory(); 00129 theRuntime.gc(); 00130 long memoryAfterGarbageCollection = theRuntime.freeMemory(); 00131 logger.warn( "Raised free memory from " + memoryBeforeGarbageCollection + 00132 " to " + memoryAfterGarbageCollection + 00133 " freeing " + (memoryAfterGarbageCollection - memoryBeforeGarbageCollection) + " bytes"); 00134 logger.warn( "Documents count = " + count ); 00135 garbageCollectorCycleCounter=0; 00136 } 00137 else 00138 { 00139 garbageCollectorCycleCounter++; 00140 }*/ 00141 } 00142 catch ( IOException e ) 00143 { 00144 throw new RuntimeException ( e.toString() ); 00145 } 00146 catch ( Exception f ) 00147 { 00148 logger.warn( f.getMessage() ); 00149 } 00150 } 00151 00152 public void connected ( Crawler crawler ) 00153 { 00154 crawler.addCrawlListener( this ); 00155 } 00156 00157 public void disconnected( Crawler crawler ) 00158 { 00159 crawler.removeCrawlListener( this ); 00160 } 00161 00165 public void started( CrawlEvent event ){ 00166 if ( mirror == null ) { 00167 try 00168 { 00169 dir = ( getDirectory() != null ) 00170 ? new File ( getDirectory() ) 00171 : Access.getAccess().makeTemporaryFile( "mirror", "" ); 00172 mirror = new Mirror( dir.toString() ); 00173 00174 Crawler crawler = event.getCrawler (); 00175 Link[] roots = crawler.getRoots (); 00176 for ( int i=0; i<roots.length; ++i ) 00177 mirror.mapDir( roots[i].getURL(), dir.toString() ); 00178 } 00179 catch ( IOException e ) 00180 { 00181 logger.warn( e ); // FIX: use GUI when available 00182 } 00183 } 00184 //pIndexer.init( bCreate ); 00185 logger.warn( "Sphinx mirror manager started..." ); 00186 } 00187 00191 public void stopped( CrawlEvent event ) 00192 { 00193 try 00194 { 00195 if ( mirror != null ) 00196 { 00197 mirror.close(); 00198 mirror = null; 00199 00200 if ( getUseBrowser() ) 00201 showit(); 00202 } 00203 } 00204 catch ( IOException e ) 00205 { 00206 System.err.println( e ); // FIX: use GUI when available 00207 } 00208 //pIndexer.stop(); 00209 logger.warn( "Sphinx mirror manager stoped..." ); 00210 } 00211 00215 public void cleared( CrawlEvent event ) 00216 { 00217 try 00218 { 00219 if ( mirror != null ) 00220 { 00221 mirror.close(); 00222 mirror = null; 00223 00224 if ( getUseBrowser() ) 00225 showit(); 00226 } 00227 } 00228 catch ( IOException e ) 00229 { 00230 logger.warn( e ); // FIX: use GUI when available 00231 } 00232 } 00233 00237 public void timedOut( CrawlEvent event ) 00238 { 00239 try 00240 { 00241 if ( mirror != null ) 00242 { 00243 mirror.close(); 00244 mirror = null; 00245 00246 if ( getUseBrowser() ) 00247 showit(); 00248 } 00249 } 00250 catch ( IOException e ) 00251 { 00252 logger.warn( e ); // FIX: use GUI when available 00253 } 00254 } 00255 00259 public void paused( CrawlEvent event ) 00260 { 00261 try 00262 { 00263 if ( mirror != null ) 00264 { 00265 mirror.rewrite(); 00266 if ( getUseBrowser() ) 00267 showit(); 00268 } 00269 } 00270 catch ( IOException e ) 00271 { 00272 logger.warn( e ); // FIX: use GUI when available 00273 } 00274 } 00275 } |
|||
|
Accueil | Téléchargement | Manuel
| Doc. technique | Sources CVS |
Faq | Nous contacter
©2003 - All Rights Reserved |
|||