zbyszek / rpms / pyhunspell

Forked from rpms/pyhunspell 6 years ago
Clone
Blob Blame History Raw
From d83879975432cc1dda2e143a6d85bb4121c3d2ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Beno=C3=AEt=20Latinier?= <benoit@latinier.fr>
Date: Tue, 20 Feb 2018 22:50:48 +0100
Subject: [PATCH 12/12] fix encoding problem (#32)

---
 .gitignore             |  2 +-
 CHANGELOG.md           |  1 +
 hunspell.cpp           | 15 +++++++++++----
 setup.py               |  2 +-
 tests/test_hunspell.py |  4 ++++
 5 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/.gitignore b/.gitignore
index b0ded7e..b961bfb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,5 @@
 .pyc
-venv
+venv*
 __pycache__
 MANIFEST
 dist
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 357f217..4a1ba5f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,7 @@
 ## 0.5.4 (???)
 ### Bug fixes
 - Fix a memory leak at instanciation (issue #39 fixed by @Far3t)
+- Fix an encoding problem (issue #32)
 
 ## 0.5.3 (2018-02-02)
 ### Improvements
diff --git a/hunspell.cpp b/hunspell.cpp
index 6650641..7c5cb71 100644
--- a/hunspell.cpp
+++ b/hunspell.cpp
@@ -152,8 +152,9 @@ static PyObject *
 HunSpell_suggest(HunSpell * self, PyObject *args)
 {
     char *word, **slist;
-    int i, num_slist, ret;
+    int i, num_slist, ret, str_size;
     PyObject *slist_list, *pystr;
+    PyObject *etype, *evalue, *etrace;
 
     if (!PyArg_ParseTuple(args, "et", self->encoding, &word))
         return NULL;
@@ -166,9 +167,15 @@ HunSpell_suggest(HunSpell * self, PyObject *args)
     PyMem_Free(word);
 
     for (i = 0, ret = 0; !ret && i < num_slist; i++) {
-        pystr = PyUnicode_FromString(slist[i]);
-        if (!pystr)
-            break;
+        str_size = strlen(slist[i]);
+        pystr = PyUnicode_DecodeUTF8(slist[i], str_size, "strict");
+        if (!pystr) {
+            PyErr_Fetch(&etype, &evalue, &etrace);
+            Py_DECREF(etype);
+            pystr = PyUnicode_DecodeLatin1(slist[i], str_size, "strict");
+            if (!pystr)
+                break;
+        }
         ret = PyList_Append(slist_list, pystr);
         Py_DECREF(pystr);
     }
diff --git a/setup.py b/setup.py
index 656c938..1b65ab5 100755
--- a/setup.py
+++ b/setup.py
@@ -43,7 +43,7 @@ else:
 main = Extension('hunspell', **main_module_kwargs)
 
 setup(name="hunspell",
-      version="0.5.3",
+      version="0.5.4",
       description="Module for the Hunspell spellchecker engine",
       author="Benoît Latinier",
       author_email="benoit@latinier.fr",
diff --git a/tests/test_hunspell.py b/tests/test_hunspell.py
index 320f8ad..cdfd08d 100644
--- a/tests/test_hunspell.py
+++ b/tests/test_hunspell.py
@@ -1,3 +1,4 @@
+#-*- coding: utf-8 -*-
 import os
 import unittest
 from hunspell import HunSpell, HunSpellError
@@ -28,6 +29,9 @@ class HunSpellTest(unittest.TestCase):
         self.assertEqual(self.hunspell.suggest('spookie'),
                          ['spookier', 'spookiness', 'spook', 'cookie',
                           'bookie', 'Spokane', 'spoken'])
+        self.assertEqual(self.hunspell.suggest('Eelysa'),
+                         ['Elyssa', 'Elysees', 'Elysha', 'Elysia',
+                          'Elissa', 'Elysée'])
 
     def test_hunspell_stem(self):
         self.assertEqual(self.hunspell.stem('dog'), [b'dog'])
-- 
2.14.3