import java.util.Vector; import java.util.Date; import java.net.*; import java.io.*; /** An AltaVistaFacade defines an Alta Vista search engine. This engine * submits queries to the AltaVista site and parses the results into a * SearchResults Collection * * @version 1.0 * @author Deric Bertrand - October 1999 */ public class AltaVistaFacade extends SearchEngineFacade { /** S_AltaVistaURLString Defines the AltaVista URL */ private static final String S_AltaVistaURLString = "http://www.altavista.com/cgi-bin/query"; /** * Submits a query request to the AdditionalResults.ASP and unmarshalls the * searialized results into a SearchResult collection * @param queryText The query string sent to the AltaVista search engine for processing * @param listingIndex Starting index of the query * @param theCollection The Vector of SearchResult's * @return The number of results obtained from the current search request */ public int SubmitQuery(String queryText, int listingIndex, Vector theCollection) throws java.net.MalformedURLException, java.io.IOException, Exception { int resultCount, startidx; HttpURLConnection theConnection; BufferedReader in; SearchResult searchResult; URL theURL; String resultLine; StringBuffer resultString = new StringBuffer();; // Connect to Alta Vista and retrieve the HTML page of results theURL = new URL( this.buildURLString( queryText, listingIndex)); theConnection = (HttpURLConnection) theURL.openConnection(); // added exception handling - 11/11/99 - Deric Bertrand if (( theConnection.getResponseCode() == -1) ||(theConnection.getResponseCode() != HttpURLConnection.HTTP_OK)) throw new IOException("Bad response from URLconnect"); in = new BufferedReader(new InputStreamReader(theConnection.getInputStream())); while ((resultLine = in.readLine()) != null) resultString.append(resultLine); // Find where the result set starts startidx = 0; resultCount = 0; while ((startidx = resultString.toString().indexOf("
", startidx)) != -1) { startidx += 11; // Move past the
token searchResult = parseHTML(resultString.toString(), startidx); // Are we looping - 11/09/99 - Deric Bertrand if ( searchResult.getRank() < listingIndex) throw new Exception( "Invalid Search Result retrieved for listingIndex" ); theCollection.addElement( searchResult); resultCount++; } // Close the connection - 11/11/99 - Deric Bertrand in.close(); return resultCount; } /** * Formats a query to send to AltaVista (URL and the CGI Query String) * @param queryText The query string sent to the AltaVista search engine for processing * @param listingIndex Starting index of the query * @return The formatted AltaVista URL and Query String */ private String buildURLString( String queryText, int listingIndex) { StringBuffer queryString = new StringBuffer(); queryString.append(S_AltaVistaURLString + "?"); // AltaVista URL queryString.append("pg=aq"); // Advanced Query queryString.append("&kl=XX"); // Language Type (CODE VALUE) queryString.append("&q=" + queryText); // Query Text queryString.append("&r="); // Sort By queryString.append("&d0="); // Start Date (DATE FORMAT: DD%2FMM%2FYY) queryString.append("&d1="); // End Date // Is there a specific starting Item if ( listingIndex > 0) queryString.append("&stq=" + listingIndex + "&c9k"); return(queryString.toString()); } /** * Parses the AltaVista Search Results (HTML page) into SearchResult instances * @param htmlPage The HTML page of search results returned from the AltaVista query * @param startidx Starting index of parse * @return A SearchResult instance * deprecated Date(string) */ private SearchResult parseHTML( String htmlPage, int startidx) throws MalformedURLException { SearchResult searchResult = null; String urlString = null; String title = null; String description = null; Date lastModified = null; int rank = 0; int estimatedSize = -1; int endidx = 0; // Rank endidx = htmlPage.indexOf(".", startidx); rank = (Integer.parseInt(htmlPage.substring(startidx, endidx))); startidx = endidx + 1; // URL endidx = htmlPage.indexOf("", startidx); urlString = htmlPage.substring(startidx, endidx - 1); startidx = endidx + 4; // Title endidx = htmlPage.indexOf("", startidx); title = htmlPage.substring(startidx, endidx); startidx = endidx + 8; // Description endidx = htmlPage.indexOf("
", startidx); startidx = endidx + 4; endidx = htmlPage.indexOf("
", startidx); description = htmlPage.substring(startidx, endidx); startidx = endidx + 5; // LastModified endidx = htmlPage.indexOf("Last modified on:", startidx); startidx = endidx + 17; endidx = htmlPage.indexOf(" - ", startidx); lastModified = new Date( htmlPage.substring(startidx, endidx)); startidx = endidx + 3; // Find the PageSize endidx = htmlPage.indexOf("K bytes", startidx); estimatedSize = (Integer.parseInt(htmlPage.substring(startidx, endidx)) * 1024); // Build the search result instance searchResult = new SearchResult(urlString, title, rank, description, lastModified, estimatedSize); return(searchResult); } }