1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 package org.smartcrawler.retriever;
28
29 import java.io.IOException;
30 import java.util.zip.GZIPInputStream;
31
32 import org.apache.commons.httpclient.*;
33 import org.apache.commons.httpclient.methods.GetMethod;
34 /***
35 *
36 *
37 * @author <a href="mailto:pozzad@alice.it">Davide Pozza</a>
38 * @version <tt>$Revision: 1.2 $</tt>
39 */
40
41 public class SmartGetMethod extends GetMethod {
42
43 public SmartGetMethod() {
44 super();
45 }
46
47 /***
48 * Constructor specifying a URI.
49 *
50 * @param uri either an absolute or relative URI
51 *
52 * @since 1.0
53 */
54 public SmartGetMethod(String uri) {
55 super(uri);
56 }
57
58
59 /***
60 * Overrides method in {@link HttpMethodBase}.
61 *
62 * Notifies the server that we can process a GZIP-compressed response before
63 * sending the request.
64 *
65 */
66 public int execute(HttpState state, HttpConnection conn)
67 throws HttpException, HttpRecoverableException, IOException {
68
69
70 addRequestHeader("Accept-Encoding", "gzip");
71 addRequestHeader("Accept", "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5");
72
73 return super.execute(state, conn);
74 }
75
76 /***
77 * Overrides method in {@link GetMethod} to set the responseStream variable appropriately.
78 *
79 * If the response body was GZIP-compressed, responseStream will be set to a GZIPInputStream
80 * wrapping the original InputStream used by the superclass.
81 *
82 */
83 protected void readResponseBody(HttpState state, HttpConnection conn) throws IOException, HttpException {
84 super.readResponseBody(state, conn);
85
86 Header contentEncodingHeader = getResponseHeader("Content-Encoding");
87
88 if (contentEncodingHeader != null && contentEncodingHeader.getValue().equalsIgnoreCase("gzip"))
89 setResponseStream(new GZIPInputStream(getResponseStream()));
90 }
91
92 }