From 31e74d82f85e1b4a5d2aca813dd571cc14fe15b1 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 10 Aug 2019 07:15:35 +0530
Subject: [PATCH 48/71] PML Input: Modernize the generated HTML a bit. Fixes
#1839689 [Some PML conversions cause FlightCrew
errors](https://bugs.launchpad.net/calibre/+bug/1839689)
---
.../ebooks/conversion/plugins/pml_input.py | 20 +++++++++++++++++++
src/calibre/ebooks/pml/pmlconverter.py | 4 ++--
2 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/src/calibre/ebooks/conversion/plugins/pml_input.py b/src/calibre/ebooks/conversion/plugins/pml_input.py
index 31809ab659..927b7747df 100644
--- a/src/calibre/ebooks/conversion/plugins/pml_input.py
+++ b/src/calibre/ebooks/conversion/plugins/pml_input.py
@@ -143,3 +143,23 @@ class PMLInput(InputFormatPlugin):
opf.render(opffile, tocfile, 'toc.ncx')
return os.path.join(getcwd(), 'metadata.opf')
+
+ def postprocess_book(self, oeb, opts, log):
+ from calibre.ebooks.oeb.base import XHTML, barename
+ for item in oeb.spine:
+ if hasattr(item.data, 'xpath'):
+ for heading in item.data.iterdescendants(*map(XHTML, 'h1 h2 h3 h4 h5 h6'.split())):
+ if not len(heading):
+ continue
+ span = heading[0]
+ if not heading.text and not span.text and not len(span) and barename(span.tag) == 'span':
+ if not heading.get('id') and span.get('id'):
+ heading.set('id', span.get('id'))
+ heading.text = span.tail
+ heading.remove(span)
+ if len(heading) == 1 and heading[0].get('style') == 'text-align: center; margin: auto;':
+ div = heading[0]
+ if barename(div.tag) == 'div' and not len(div) and not div.get('id') and not heading.get('style'):
+ heading.text = (heading.text or '') + (div.text or '') + (div.tail or '')
+ heading.remove(div)
+ heading.set('style', 'text-align: center')
diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index 4bef9a306b..23ee41ba07 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -220,7 +220,7 @@ class PML_HTMLizer(object):
return html
def cleanup_html_remove_redundant(self, html):
- for key in self.STATES_TAGS.keys():
+ for key in self.STATES_TAGS:
open, close = self.STATES_TAGS[key]
if key in self.STATES_VALUE_REQ:
html = re.sub(r'(?u)%s\s*%s' % (open % '.*?', close), '', html)
@@ -618,7 +618,7 @@ class PML_HTMLizer(object):
pass
elif c == 'w':
empty = False
- text = '<hr width="%s" />' % self.code_value(line)
+ text = '<hr style="width: %s" />' % self.code_value(line)
elif c == 't':
indent_state['t'] = not indent_state['t']
elif c == 'T':