Index: lib/alexandria/book_providers/worldcat.rb
===================================================================
--- lib/alexandria/book_providers/worldcat.rb (revision 1088)
+++ lib/alexandria/book_providers/worldcat.rb (revision 1089)
@@ -23,7 +23,11 @@
# New WorldCat provider, taken from the Palatina MetaDataSource and
# modified to fit the structure of Alexandria book providers.
# (25 Feb 2009)
+#
+# Updated from Palatina, to reflect changes in the worldcat website.
+# (1 Sep 2009)
+
require 'cgi'
require 'alexandria/net'
@@ -136,7 +140,17 @@
raise NoResultsError
end
- title_header = doc%'h1.item-title'
+
+ if doc % 'table.table-results'
+ log.info { "Found multiple results for lookup: fetching first result only" }
+ search_results = parse_search_result_data(html)
+ first = search_results.first
+ rslt2 = transport.get_response(URI.parse(first[:url]))
+ html2 = rslt2.body
+ doc = Hpricot(html2)
+ end
+
+ title_header = doc%'h1.title'
title = title_header.inner_text if title_header
unless title
log.warn { "Unexpected lack of title from WorldCat lookup" }
@@ -145,18 +159,25 @@
log.info { "Found book #{title} at WorldCat" }
authors = []
- authors_div = doc%'div.item-author'
- if authors_div
- (authors_div/:a).each do |a|
+ authors_tr = doc%'tr#details-allauthors'
+ if authors_tr
+ (authors_tr/:a).each do |a|
authors << a.inner_text
end
end
# can we do better? get the City name?? or multiple publishers?
- publisher_row = doc%'td.label[text()*=Publisher]/..'
+ bibdata = doc % 'div#bibdata'
+ bibdata_table = bibdata % :table
+ publisher_row = bibdata_table % 'th[text()*=Publisher]/..'
+
if publisher_row
publication_info = (publisher_row/'td').last.inner_text
- publication_info =~ /:*([^;,]+)/
+ if publication_info.index(':')
+ publication_info =~ /:[\s]*([^;,]+)/
+ else
+ publication_info =~ /([^;,]+)/
+ end
publisher = $1
publication_info =~ /([12][0-9]{3})/
year = $1.to_i if $1
@@ -167,7 +188,7 @@
isbn = search_isbn
unless isbn
- isbn_row = doc%'td.label[text()*=ISBN]/..'
+ isbn_row = doc % 'tr#details-standardno' ##bibdata_table % 'th[text()*=ISBN]/..'
if isbn_row
isbns = (isbn_row/'td').last.inner_text.split
isbn = Library.canonicalise_isbn(isbns.first)