Index: lib/alexandria/book_providers/deastore.rb =================================================================== --- lib/alexandria/book_providers/deastore.rb (revision 1086) +++ lib/alexandria/book_providers/deastore.rb (revision 1087) @@ -49,12 +49,18 @@ def get_book_from_search_result(result) log.debug { "Fetching book from #{result[:url]}" } html_data = agent.get(result[:url]) + #File.open("rsltflarn#{Time.now().usec()}.html", 'wb') do |f| + # f.write(html_data.body) + #end parse_result_data(html_data.body) end def search(criterion, type) criterion = criterion.convert("ISO-8859-1", "UTF-8") # still needed?? html_data = agent.get(create_search_uri(type, criterion)) + #File.open("flarn#{Time.now().usec()}.html", 'wb') do |f| + # f.write(html_data.body) + #end results = parse_search_result_data(html_data.body) raise NoResultsError if results.empty? @@ -89,7 +95,9 @@ search_term_encoded = CGI.escape(search_term) end - BASE_SEARCH_URL % [search_type_code, search_term_encoded] + uri = BASE_SEARCH_URL % [search_type_code, search_term_encoded] + log.debug { uri } + uri end def parse_search_result_data(html) @@ -109,13 +117,21 @@ # p Data di pubblicazione: \n 2009 # p.prezzo (price) - cover_url = '' - cover_images = div/'a/img' - unless cover_images.empty? - img = cover_images.first - image_url = img['src'] - cover_url = "#{SITE}#{image_url}" - end +# cover_url = '' +# cover_images = div/'a/img' +# unless cover_images.empty? +# img = cover_images.first +# image_url = img['src'] +# if image_url =~ /^http/ +# cover_url = '' # image_url +# elsif image_url[0..0] != '/' +# cover_url = "#{SITE}/#{image_url}" +# else +# cover_url = "#{SITE}#{image_url}" +# end +# log.debug { "Search Cover Image URL #{cover_url}" } + +# end content = div/'div.scheda_content' title_link = (content/:a).first @@ -167,12 +183,17 @@ end # author(s) + authors = [] author_span = data%'span.int_scheda[text()*=Autore]' - author_links = author_span/'a.info' - authors = [] - author_links.each do |link| - authors << normalize(link.inner_text) + unless author_span + author_span = data%'span.int_scheda[text()*=cura]' # editor end + if author_span + author_links = author_span/'a.info' + author_links.each do |link| + authors << normalize(link.inner_text) + end + end # publisher publisher_par = data%'span.int_scheda[text()*=Editore]/..' @@ -231,7 +252,19 @@ #cover image_url = nil if cover_link - image_url = "#{SITE}#{cover_link}" + if cover_link =~ /^http/ + # e.g. http://images.btol.com/ContentCafe/Jacket.aspx?\ + # Return=1&Type=M&Value=9788873641803&password=\ + # CC70580&userID=DEA40305 + # seems not to work, or to be blank anyway, so set to nil + image_url = nil + elsif cover_link[0..0] != '/' + image_url = "#{SITE}/#{cover_link}" + else + image_url = "#{SITE}#{cover_link}" + end + + log.debug { "Cover Image URL:: #{image_url}" } end book = Book.new(title, authors, isbn, publisher, publish_year, binding) @@ -239,7 +272,7 @@ return [book, image_url] rescue Exception => ex trace = ex.backtrace.join("\n> ") - log.error { "Failed parsing Siciliano product page #{ex.message}\n#{trace}" } + log.error { "Failed parsing DeaStore product page #{ex.message}\n#{trace}" } return nil end end