View Javadoc

1   package org.sourceforge.vlibrary.util;
2   
3   import org.apache.axis2.client.ServiceClient;
4   import org.apache.axis2.client.OperationClient;
5   import org.apache.axiom.om.OMElement;
6   import org.apache.axiom.om.OMNamespace;
7   import org.apache.axiom.om.OMAbstractFactory;
8   import org.apache.axis2.client.Options;
9   import org.apache.axis2.addressing.EndpointReference;
10  import org.apache.axiom.soap.SOAPBody;
11  import org.apache.axiom.soap.SOAPEnvelope;
12  import org.apache.axiom.soap.SOAPFactory;
13  import org.apache.axis2.Constants;
14  import org.apache.axis2.context.MessageContext;
15  
16  import org.apache.axis2.transport.http.HTTPConstants;
17  import org.apache.axis2.transport.http.HttpTransportProperties;
18  import org.apache.commons.lang.StringUtils;
19  
20  import org.apache.log4j.Logger;
21  import java.util.Iterator;
22  import java.util.Map;
23  import java.util.HashMap;
24  import java.util.ArrayList;
25  
26  /**
27   *
28   * Class to allow the lookup of books by identifiers (ISBN, etc) 
29   * with SRU/SRW compliant service providers, to obtain:
30   *     - Author
31   *     - Title
32   *     - Publisher
33   *     - PublishedDate
34   *     - SubjectsList
35   *
36   */
37  public class SruSrwClientImpl implements SruSrwClientInterface {
38  
39      private static Logger logger =
40          Logger.getLogger(SruSrwClientImpl.class.getName());
41  
42      String endPointURL = "http://z3950.loc.gov:7090/voyager";
43      String proxyUserId = "anonymous";
44      String proxyPasswd = "anonymous";
45      String proxyHostName = "";
46      String proxyPort = "8080";
47  
48      /**
49      * Sets the URI for the SRU/SRW service provider, default to http://z3950.loc.gov:7090/voyager
50      */
51      public void setEndPointURL( String endPointURL ) {
52          this.endPointURL = endPointURL;
53      }
54  
55      /**
56       * Returns the the SRU/SRW service provider URI
57      * @return The URI for the SRU/SRW service provider, default to http://z3950.loc.gov:7090/voyager
58      * Required for any request
59      */
60      public String getEndPointURL() {
61          return this.endPointURL;
62      }
63  
64      /**
65      * Set the proxy name if a proxy is required (ex. "127.0.0.1" )
66      * Default is ""
67      */
68      public void setProxyHostName( String proxyHostName ) {
69          this.proxyHostName= proxyHostName;
70      }
71  
72      /**
73       * Returns the proxy hostname
74      * @return the proxy name 
75      */
76      public String getProxyHostName() {
77          return this.proxyHostName;
78      }
79  
80      /**
81      * Set the proxy userid if proxy is required and configured (non-empty), ignored otherwise
82      * Default is "anonymous", if a non-authenticate proxy is used this value must be the default
83      */
84      public void setProxyUserId( String proxyUserId) {
85          this.proxyUserId = proxyUserId;
86      }
87  
88      /**
89       * Returns the proxy userid
90      * @return the proxy userid 
91      */
92      public String getProxyUserId() {
93          return this.proxyUserId;
94      }
95  
96      /**
97      * Set the proxy password if proxy is required and configured (non-empty), ignored otherwise
98      * Default is "anonymous", if a non-authenticate proxy is used this value must be the default
99      */
100     public void setProxyPasswd( String proxyPasswd) {
101         this.proxyPasswd = proxyPasswd;
102     }
103 
104     /**
105      * Returns the proxy userid
106     * @return the proxy password
107     */
108     public String getProxyPasswd() {
109         return this.proxyPasswd;
110     }
111 
112     /**
113     * Set the proxy port if proxy is required and configured (non-empty), ignored otherwise
114     * Content of the string must be between 1 and 65556
115     * Default value is 8080
116     */
117     public void setProxyPort( String proxyPort) {
118         this.proxyPort= proxyPort;
119     }
120 
121     /**
122      * Returns the proxy port
123     * @return the proxy port
124     */
125     public String getProxyPort() {
126         return this.proxyPort;
127     }
128 
129     /**
130     * Looks up using SRU/SRW defined endpoint an identifier (normally ISBN)
131     * and returns the first record returned by SRU/SRW as a HashMap of fields
132     * 
133     * @param identifier Normally the isbn of the book to search, 
134     *             expressed in the "xxxxxxxxxx" format
135     *             <br>ISBN can be in 10 or 13 digit format, no spaces or dashes </br>
136     *             <br>If something else is passed instead of an (unique) ISBN,
137     *             the first record (of potentially many) is always returned. </br>
138     *             <br>This is a limitation due to the fact that the main 
139     *             SRU/SRW SP (LOC.GOV) does not seem to have implemented fully
140     *             the dc (Dublin Core) context (missing especially the dc.identifier index). </br>
141     *             <br>For example, a search for Web Services will return:
142     *             recordMap={TITLE=Introduction to Semantic Web and Semantic Web services /, PUBLISHER=Boca Raton, FL : CRC Press,, AUTHOR=Yu, Liyang., DATE=2007., SUBJECTSLIST=[Semantic Web., Web services.]} </br>
143     *             Note that any dentifier strings containing spaces must be escaped (ex: "%22Web%20Services%22").
144     * @see <a href="http://www.loc.gov/standards/sru/"> SRU/SRW Specification for more details </a>
145     *
146     * @return Map containing the elements described above
147     *         using the names:   AUTHOR, TITLE, PUBLISHER, PUBLISHEDDATE, SUBJECTSLIST
148     *         and String values,  with the exception of SubjectsList whose value 
149     *         is a List of string subjects
150     *
151     * @throws Exception if either parameters are incorrect, the SRU/SRW enpoint
152     *         cannot be called, if the returned XML cannot be parsed or
153     *         if the record identified by identifier could not be found by provider
154     */
155     public Map callSruSrwProvider( String identifier ) throws Exception {
156 
157         if (endPointURL == null || endPointURL.equals("") ) {
158             throw new Exception("Invalid endPointURL=" + endPointURL);
159         }
160 
161         if ( identifier == null || identifier.equals("") ) {
162             throw new Exception("Invalid identifier=" + identifier);
163         }
164 
165         ServiceClient client = new ServiceClient();
166 	OperationClient operationClient = client.createClient( ServiceClient.ANON_OUT_IN_OP );
167 
168         MessageContext outMsgCtx = new MessageContext();
169         Options options = outMsgCtx.getOptions();
170         options.setTo(new EndpointReference( endPointURL ));
171         options.setTransportInProtocol(Constants.TRANSPORT_HTTP);
172         options.setProperty(Constants.Configuration.ENABLE_REST, Constants.VALUE_TRUE);
173         options.setProperty(Constants.Configuration.HTTP_METHOD, Constants.Configuration.HTTP_METHOD_GET);
174         options.setProperty(Constants.Configuration.CONTENT_TYPE, HTTPConstants.MEDIA_TYPE_X_WWW_FORM);
175         options.setProperty(HTTPConstants.REUSE_HTTP_CLIENT, "true");
176 
177         if (proxyHostName != null && !proxyHostName.equals("") ) {
178             int port;
179             
180             try {
181                 port = new Integer(proxyPort).intValue();
182             } catch( Exception ex) {
183                 throw new Exception("Invalid port=" + proxyPort);
184             }
185             
186             HttpTransportProperties.ProxyProperties proxyProperties = new HttpTransportProperties.ProxyProperties();
187             proxyProperties.setProxyPort( port );
188             proxyProperties.setProxyName( proxyHostName );
189             options.setProperty(HTTPConstants.PROXY, proxyProperties );
190 
191             HttpTransportProperties.Authenticator auth = new HttpTransportProperties.Authenticator();
192             auth.setUsername(proxyUserId);
193             auth.setPassword(proxyPasswd);
194             auth.setHost( endPointURL );
195             options.setProperty(org.apache.axis2.transport.http.HTTPConstants.AUTHENTICATE,auth);
196         }
197 
198         SOAPEnvelope request =  getSOAPPayload( identifier) ;
199         outMsgCtx.setEnvelope( request ); 
200 
201         operationClient.addMessageContext(outMsgCtx);
202 
203         try {
204             operationClient.execute(true);
205         } catch (Exception ex) {
206             logger.error(ex);
207             throw new Exception("Exception while accessing endPointURL= " + endPointURL, ex );
208         }
209 
210 
211         MessageContext inMsgtCtx = operationClient.getMessageContext("In");  
212         SOAPEnvelope response = inMsgtCtx.getEnvelope();
213 
214         try {
215             return getRecordFields( response );	
216         } catch( RecordNotFoundException ex) {
217             throw new Exception("Record not found for request: " + request + 
218                                                    ", response= " + response, ex);
219         } catch( Exception exx ) {
220             throw new Exception("Unparsable response for request: " + request +
221                                                    ", response= " + response, exx);
222         }    
223 
224     }
225 
226     /**
227     *
228     * Creates the full SOAPEnvelope to be sent to LOC, based on SRW schemas
229     * @param isbn The isbn of the book to search, 
230     *             expressed in the "xxxxxxxxxx" format
231     *             ISBN can be in 10 or 13 digit format, no spaces or dashes
232     *
233     * @return SOAPEnvelope containing fully built payload to be passed to LOC
234     *
235     */
236     public SOAPEnvelope getSOAPPayload( String isbn ) {
237 
238         SOAPFactory omFactory = OMAbstractFactory.getSOAP11Factory();
239         OMNamespace ns = omFactory.createOMNamespace( "http://www.loc.gov/zing/srw/", "SRW");
240 
241         SOAPEnvelope envelope = omFactory.getDefaultEnvelope();
242         SOAPBody body = envelope.getBody();
243 
244         OMElement request = omFactory.createOMElement("request", ns);
245         request.setText("searchRetrieve");
246         body.addChild( request);
247 
248         OMElement version= omFactory.createOMElement("version", ns);
249         version.setText("1.1");
250         request.addChild( version );
251 
252         OMElement operation = omFactory.createOMElement("operation", ns);
253         operation.setText("searchRetrieve");
254         request.addChild( operation );
255 
256         OMElement maximumRecords = omFactory.createOMElement("maximumRecords", ns);
257         maximumRecords.setText("1");
258         request.addChild( maximumRecords );
259 
260         OMElement recordSchema = omFactory.createOMElement("recordSchema", ns);
261         recordSchema.setText("dc");
262         request.addChild( recordSchema );
263 
264         OMElement query = omFactory.createOMElement("query", ns);
265         query.setText( isbn );
266         request.addChild( query );
267 
268         if (logger.isDebugEnabled()) {
269             logger.debug("envelope=" + envelope);
270         }
271 
272         return envelope;
273     }
274 
275     /**
276     *
277     * Parses the response envelope looking for:
278     *     - Author
279     *     - Title
280     *     - Publisher
281     *     - PublishedDate
282     *     - SubjectsList
283     *
284     * @param envelope Containing the xml with record data
285     * @return HashMap containing the elements described above
286     *         using the all-capital names listed above and String values, 
287     *         with the exception of SubjectsList whose value 
288     *         is an ArrayList of string subjects
289     * @throws RecordNotFoundException if the record was not 
290     *         found (service provider returned 0 records, etc
291     * @throws Exception if any other parsing error
292     */
293     private HashMap getRecordFields( SOAPEnvelope envelope ) 
294       throws RecordNotFoundException, Exception {
295 
296         HashMap recordMap = new HashMap();
297         ArrayList subjectsList = new ArrayList();
298 
299         recordMap.put("SUBJECTSLIST", subjectsList);
300 	
301         OMElement searchRetrieveResponse = envelope.getBody().getFirstElement();  // this should always be searchRetrieveResponse
302         
303         OMElement record = null;
304 
305         Iterator iterator = searchRetrieveResponse.getChildElements();
306 
307         while( iterator.hasNext() ) {
308             OMElement elem = (OMElement)iterator.next();
309             if ( elem.getLocalName().equalsIgnoreCase("numberOfRecords") && 
310                  elem.getText().equals("0") ) {
311                     logger.error("Throwing RecordNotFoundException: " +
312                             "number of records is 0");
313                     throw new RecordNotFoundException();
314             } else if ( elem.getLocalName().equalsIgnoreCase("records") ) {
315                 record = elem.getFirstElement();
316                 break;
317             }
318         }
319 
320         if (record == null) {
321             logger.error("Throwing RecordNotFoundException: " +
322                     "no first element");
323             throw new RecordNotFoundException();
324         }
325 
326         OMElement recordData = null;
327 
328         iterator = record.getChildElements();
329 
330         while( iterator.hasNext() ) {
331             OMElement elem = (OMElement)iterator.next();
332             if ( elem.getLocalName().equalsIgnoreCase("recordData") ) {
333                 recordData = elem.getFirstElement();  //only one, the awkwardly called srw_dc:dc
334 
335                 break;
336             }
337         }
338 
339         if ( recordData == null ) {
340             logger.error("Throwing RecordNotFoundException: " +
341                     "no recordData");
342                     throw new RecordNotFoundException();
343         }
344 
345         iterator = recordData.getChildElements();
346 
347         while( iterator.hasNext() ) {
348             OMElement elem = (OMElement)iterator.next();
349 
350             if ( elem.getLocalName().equalsIgnoreCase("title") ) {
351                 // Remove trailing "/" from title
352                 recordMap.put("TITLE", StringUtils.removeEnd(elem.getText(), "/"));
353             } else if ( elem.getLocalName().equalsIgnoreCase("creator") ) {
354                 recordMap.put("AUTHOR", elem.getText());
355             } else if ( elem.getLocalName().equalsIgnoreCase("publisher") ) {
356                 recordMap.put("PUBLISHER", elem.getText());
357             } else if ( elem.getLocalName().equalsIgnoreCase("date") ) {
358                 recordMap.put("DATE", elem.getText());
359             } else if ( elem.getLocalName().equalsIgnoreCase("subject") ) {
360                 subjectsList.add(elem.getText());
361             }
362         }
363         if (logger.isDebugEnabled()) {
364             logger.debug("recordMap=" + recordMap);
365         }
366 
367 	return recordMap;
368     }
369 
370     /* thrown if requested record was not found by the service provider */
371     class RecordNotFoundException extends Exception {
372 
373         public RecordNotFoundException() {
374             super("searchRetrieveResponse: record not found");
375         }
376     }
377 }
378