113d69e
Index: lib/alexandria/book_providers/worldcat.rb
113d69e
===================================================================
113d69e
--- lib/alexandria/book_providers/worldcat.rb	(revision 1088)
113d69e
+++ lib/alexandria/book_providers/worldcat.rb	(revision 1089)
113d69e
@@ -23,7 +23,11 @@
113d69e
 # New WorldCat provider, taken from the Palatina MetaDataSource and
113d69e
 # modified to fit the structure of Alexandria book providers.
113d69e
 # (25 Feb 2009)
113d69e
+#
113d69e
+# Updated from Palatina, to reflect changes in the worldcat website.
113d69e
+# (1 Sep 2009)
113d69e
 
113d69e
+
113d69e
 require 'cgi'
113d69e
 require 'alexandria/net'
113d69e
 
113d69e
@@ -136,7 +140,17 @@
113d69e
           raise NoResultsError
113d69e
         end
113d69e
 
113d69e
-        title_header = doc%'h1.item-title'
113d69e
+
113d69e
+        if doc % 'table.table-results'
113d69e
+          log.info { "Found multiple results for lookup: fetching first result only" }
113d69e
+          search_results = parse_search_result_data(html)
113d69e
+          first = search_results.first
113d69e
+          rslt2 = transport.get_response(URI.parse(first[:url]))
113d69e
+          html2 = rslt2.body
113d69e
+          doc = Hpricot(html2)
113d69e
+        end
113d69e
+
113d69e
+        title_header = doc%'h1.title'
113d69e
         title = title_header.inner_text if title_header
113d69e
         unless title
113d69e
           log.warn { "Unexpected lack of title from WorldCat lookup" }
113d69e
@@ -145,18 +159,25 @@
113d69e
         log.info { "Found book #{title} at WorldCat" }
113d69e
 
113d69e
         authors = []
113d69e
-        authors_div = doc%'div.item-author'
113d69e
-        if authors_div
113d69e
-          (authors_div/:a).each do |a|
113d69e
+        authors_tr = doc%'tr#details-allauthors'
113d69e
+        if authors_tr
113d69e
+          (authors_tr/:a).each do |a|
113d69e
             authors << a.inner_text
113d69e
           end
113d69e
         end
113d69e
 
113d69e
         # can we do better? get the City name?? or multiple publishers?
113d69e
-        publisher_row = doc%'td.label[text()*=Publisher]/..'
113d69e
+        bibdata = doc % 'div#bibdata'
113d69e
+        bibdata_table = bibdata % :table
113d69e
+        publisher_row = bibdata_table % 'th[text()*=Publisher]/..'
113d69e
+
113d69e
         if publisher_row
113d69e
           publication_info = (publisher_row/'td').last.inner_text
113d69e
-          publication_info =~ /:*([^;,]+)/
113d69e
+          if publication_info.index(':')
113d69e
+            publication_info =~ /:[\s]*([^;,]+)/
113d69e
+          else
113d69e
+            publication_info =~ /([^;,]+)/
113d69e
+          end
113d69e
           publisher = $1
113d69e
           publication_info =~ /([12][0-9]{3})/
113d69e
           year = $1.to_i if $1
113d69e
@@ -167,7 +188,7 @@
113d69e
 
113d69e
         isbn = search_isbn
113d69e
         unless isbn
113d69e
-          isbn_row = doc%'td.label[text()*=ISBN]/..'
113d69e
+          isbn_row = doc % 'tr#details-standardno' ##bibdata_table % 'th[text()*=ISBN]/..'
113d69e
           if isbn_row
113d69e
             isbns = (isbn_row/'td').last.inner_text.split
113d69e
             isbn = Library.canonicalise_isbn(isbns.first)