Blob Blame History Raw
From 31e74d82f85e1b4a5d2aca813dd571cc14fe15b1 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 10 Aug 2019 07:15:35 +0530
Subject: [PATCH 48/71] PML Input: Modernize the generated HTML a bit. Fixes
 #1839689 [Some PML conversions cause FlightCrew
 errors](https://bugs.launchpad.net/calibre/+bug/1839689)

---
 .../ebooks/conversion/plugins/pml_input.py    | 20 +++++++++++++++++++
 src/calibre/ebooks/pml/pmlconverter.py        |  4 ++--
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/conversion/plugins/pml_input.py b/src/calibre/ebooks/conversion/plugins/pml_input.py
index 31809ab659..927b7747df 100644
--- a/src/calibre/ebooks/conversion/plugins/pml_input.py
+++ b/src/calibre/ebooks/conversion/plugins/pml_input.py
@@ -143,3 +143,23 @@ class PMLInput(InputFormatPlugin):
                 opf.render(opffile, tocfile, 'toc.ncx')
 
         return os.path.join(getcwd(), 'metadata.opf')
+
+    def postprocess_book(self, oeb, opts, log):
+        from calibre.ebooks.oeb.base import XHTML, barename
+        for item in oeb.spine:
+            if hasattr(item.data, 'xpath'):
+                for heading in item.data.iterdescendants(*map(XHTML, 'h1 h2 h3 h4 h5 h6'.split())):
+                    if not len(heading):
+                        continue
+                    span = heading[0]
+                    if not heading.text and not span.text and not len(span) and barename(span.tag) == 'span':
+                        if not heading.get('id') and span.get('id'):
+                            heading.set('id', span.get('id'))
+                            heading.text = span.tail
+                            heading.remove(span)
+                    if len(heading) == 1 and heading[0].get('style') == 'text-align: center; margin: auto;':
+                        div = heading[0]
+                        if barename(div.tag) == 'div' and not len(div) and not div.get('id') and not heading.get('style'):
+                            heading.text = (heading.text or '') + (div.text or '') + (div.tail or '')
+                            heading.remove(div)
+                            heading.set('style', 'text-align: center')
diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index 4bef9a306b..23ee41ba07 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -220,7 +220,7 @@ class PML_HTMLizer(object):
         return html
 
     def cleanup_html_remove_redundant(self, html):
-        for key in self.STATES_TAGS.keys():
+        for key in self.STATES_TAGS:
             open, close = self.STATES_TAGS[key]
             if key in self.STATES_VALUE_REQ:
                 html = re.sub(r'(?u)%s\s*%s' % (open % '.*?', close), '', html)
@@ -618,7 +618,7 @@ class PML_HTMLizer(object):
                         pass
                     elif c == 'w':
                         empty = False
-                        text = '<hr width="%s" />' % self.code_value(line)
+                        text = '<hr style="width: %s" />' % self.code_value(line)
                     elif c == 't':
                         indent_state['t'] = not indent_state['t']
                     elif c == 'T':