1
2 /*
3 * SmartCrawler
4 *
5 * $Id: LinkFilter.java,v 1.4 2005/08/05 15:55:53 vincool Exp $
6 * Copyright 2005 Davide Pozza
7 *
8 * This program is free software; you can redistribute it
9 * and/or modify it under the terms of the GNU General Public
10 * License as published by the Free Software Foundation;
11 * either version 2 of the License, or (at your option) any
12 * later version.
13 *
14 * This program is distributed in the hope that it will be
15 * useful, but WITHOUT ANY WARRANTY; without even the implied
16 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
17 * PURPOSE. See the GNU General Public License for more
18 * details.
19 *
20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the Free
22 * Software Foundation, Inc., 59 Temple Place, Suite 330,
23 * Boston, MA 02111-1307 USA
24 *
25 */
26
27 package org.smartcrawler.filter;
28 import org.apache.commons.lang.StringUtils;
29 import org.apache.log4j.Logger;
30 import org.smartcrawler.common.AbstractParametrizableComponent;
31 import org.smartcrawler.common.Context;
32 import org.smartcrawler.common.Link;
33 import org.smartcrawler.common.SCLogger;
34
35
36 /***
37 *
38 *
39 * @author <a href="mailto:pozzad@alice.it">Davide Pozza</a>
40 * @version <tt>$Revision: 1.4 $</tt>
41 */
42 public class LinkFilter extends AbstractParametrizableComponent implements PrecFilterLink {
43
44 private static Logger log = SCLogger.getLogger(LinkFilter.class);
45
46 /***
47 *
48 * @param link
49 * @return
50 */
51 /* public boolean isPermitted(Context conf, Link link) {
52 log.debug("isPermitted() BEGIN");
53 String linksStr = getParameter("links");
54 StringTokenizer st = new StringTokenizer(linksStr);
55 String[] links = new String[st.countTokens()];
56 int i = 0;
57 boolean res = false;
58 while (st.hasMoreTokens()) {
59 links[i] = st.nextToken();
60 if (link.toString().indexOf(links[i]) >= 0) {
61 res = true;
62 log.debug("Checking link: " + link.toString()
63 + " VS " + links[i] + " res="+res);
64 break;
65 } else {
66 log.debug("Checking link: " + link.toString()
67 + " VS " + links[i] + " res="+res);
68 }
69 }
70 log.debug("isPermitted() END");
71 return res;
72 }*/
73 public boolean isPermitted(Context conf, Link link) {
74 log.debug("isPermitted() BEGIN");
75 boolean res = false;
76 try {
77 String[] items = getParameters("links");
78 log.debug("isPermitted() items.length=" + items.length);
79
80 for (int i = 0; i < items.length; i++) {
81 items[i] = StringUtils.replace(items[i], ".", "//.");
82 items[i] = StringUtils.replace(items[i], "*", ".*");
83 if (link.toString().matches(items[i])) {
84 res = true;
85 log.debug("Checking link: " + link.toString()
86 + " VS " + items[i] + " res="+res);
87 break;
88 } else {
89 log.debug("Checking link: " + link.toString()
90 + " VS " + items[i] + " res="+res);
91 }
92 }
93 }catch (Exception e) {
94 log.warn("Filter error", e);
95 }
96 log.debug("isPermitted() END");
97 return res;
98 }
99 }