View Javadoc

1   
2   /*
3    * SmartCrawler
4    *
5    * $Id: ImprovedLinksProvider.java,v 1.4 2005/07/08 12:09:08 vincool Exp $
6    * Copyright 2005 Davide Pozza
7    *
8    * This program is free software; you can redistribute it
9    * and/or modify it under the terms of the GNU General Public
10   * License as published by the Free Software Foundation;
11   * either version 2 of the License, or (at your option) any
12   * later version.
13   *
14   * This program is distributed in the hope that it will be
15   * useful, but WITHOUT ANY WARRANTY; without even the implied
16   * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
17   * PURPOSE. See the GNU General Public License for more
18   * details.
19   *
20   * You should have received a copy of the GNU General Public
21   * License along with this program; if not, write to the Free
22   * Software Foundation, Inc., 59 Temple Place, Suite 330,
23   * Boston, MA 02111-1307 USA
24   *
25   */
26  
27  package org.smartcrawler.common;
28  
29  import java.util.HashSet;
30  import java.util.LinkedList;
31  import org.apache.log4j.Logger;
32  
33  
34  
35  /***
36   *
37   *
38   * @author <a href="mailto:pozzad@alice.it">Davide Pozza</a>
39   * @version <tt>$Revision: 1.4 $</tt>
40   */
41  public class ImprovedLinksProvider implements Provider {
42  
43      private static ImprovedLinksProvider instance;
44      private LinkedList<Link> queue;
45      private HashSet<Link> retrievedLinks;
46      private HashSet<Link> toBeConfirmedLinks;
47      private int size;
48      private static Logger log = SCLogger.getLogger(ImprovedLinksProvider.class);
49      private static Logger logProv = SCLogger.getProviderLogger();
50      private Object lock;
51  
52      /*** Creates a new instance of ImprovedLinksProvider */
53      private ImprovedLinksProvider() {
54          queue = new LinkedList<Link>();
55          retrievedLinks = new HashSet<Link>();
56          toBeConfirmedLinks = new HashSet<Link>();
57          lock = new Object();
58      }
59  
60      /***
61       *
62       * @return
63       */
64      public synchronized static ImprovedLinksProvider instance() {
65          if (instance == null) {
66              instance = new ImprovedLinksProvider();
67          }
68          return instance;
69      }
70  
71      /***
72       *
73       * @return
74       */
75      public synchronized Link next() {
76          log.debug("next(): BEGIN");
77  
78          while (toBeConfirmedLinks.size() > 0 && size() == 0){
79              try{
80                  log.debug("next(): waiting: toBeConfirmedLinks="
81                          + toBeConfirmedLinks);
82                  wait();
83              }catch(Exception e){}
84          }
85  
86          Link next = null;
87          if (toBeConfirmedLinks.size() == 0 && size() == 0) {
88              log.debug("next(): Bad call: the provider is empty");
89          } else {
90              //get next link, register it as processed and remove it from the queue
91              log.debug("next(): queue size is " + queue.size());
92  
93              next = queue.poll();
94              log.debug("next(): retrieving " + next);
95              toBeConfirmedLinks.add(next);
96          }
97          log.debug("next(): END");
98          logProv.info("provided: " + next + " queue size: " + queue.size()
99              + " retrieved TOT.:" + retrievedLinks.size()
100             + " to be confirmed TOT.:" + toBeConfirmedLinks.size());
101         return next;
102     }
103 
104     /***
105      *
106      * @param link
107      */
108     public synchronized void store(Link link) {
109         log.debug("store(): BEGIN");
110         //if already exists the file, ignore the put
111         String key = link.toString();
112         if (key != null &&
113                 !retrievedLinks.contains(link) &&
114                 !toBeConfirmedLinks.contains(link) &&
115                 !queue.contains(link)) {
116 
117             log.debug("store(): storing " + link);
118             queue.add(link);
119             notifyAll();
120             logProv.info("stored: " + link + " queue size: " + queue.size()
121             + " retrieved TOT.:" + retrievedLinks.size()
122             + " to be confirmed TOT.:" + toBeConfirmedLinks.size());
123         }
124         log.debug("store(): END");
125     }
126 
127     /***
128      *
129      * @param link
130      */
131     public synchronized void confirm(Link link) {
132         log.debug("confirm(): BEGIN");
133         //if already exists the file, ignore the put
134         //retrievedLinks.add(link.toString());
135         retrievedLinks.add(link);
136         toBeConfirmedLinks.remove(link);
137         notifyAll();
138         logProv.info("confirmed: " + link + " queue size: " + queue.size()
139             + " retrieved TOT.:" + retrievedLinks.size()
140             + " to be confirmed TOT.:" + toBeConfirmedLinks.size());
141         log.debug("confirm(): END");
142     }
143 
144     /***
145      *
146      * @return
147      */
148     public synchronized int size(){
149         return queue.size();
150     }
151 
152     /***
153      *
154      * @return
155      */
156     public synchronized boolean isEmpty() {
157         while (toBeConfirmedLinks.size() > 0 && queue.size() == 0) {
158             try{
159                 log.debug("isEmpty(): waiting: toBeConfirmedLinks="
160                         + toBeConfirmedLinks);
161                 wait();
162             }catch(Exception e){}
163         }
164         log.debug("isEmpty(): returning queue.size()=" + queue.size()
165             + " toBeConfirmedLinks.size()=" + toBeConfirmedLinks.size());
166         return (queue.size() == 0 && toBeConfirmedLinks.size() == 0);
167     }
168 }