From d83879975432cc1dda2e143a6d85bb4121c3d2ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Beno=C3=AEt=20Latinier?= <benoit@latinier.fr>
Date: Tue, 20 Feb 2018 22:50:48 +0100
Subject: [PATCH 12/12] fix encoding problem (#32)
---
.gitignore | 2 +-
CHANGELOG.md | 1 +
hunspell.cpp | 15 +++++++++++----
setup.py | 2 +-
tests/test_hunspell.py | 4 ++++
5 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/.gitignore b/.gitignore
index b0ded7e..b961bfb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,5 @@
.pyc
-venv
+venv*
__pycache__
MANIFEST
dist
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 357f217..4a1ba5f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,7 @@
## 0.5.4 (???)
### Bug fixes
- Fix a memory leak at instanciation (issue #39 fixed by @Far3t)
+- Fix an encoding problem (issue #32)
## 0.5.3 (2018-02-02)
### Improvements
diff --git a/hunspell.cpp b/hunspell.cpp
index 6650641..7c5cb71 100644
--- a/hunspell.cpp
+++ b/hunspell.cpp
@@ -152,8 +152,9 @@ static PyObject *
HunSpell_suggest(HunSpell * self, PyObject *args)
{
char *word, **slist;
- int i, num_slist, ret;
+ int i, num_slist, ret, str_size;
PyObject *slist_list, *pystr;
+ PyObject *etype, *evalue, *etrace;
if (!PyArg_ParseTuple(args, "et", self->encoding, &word))
return NULL;
@@ -166,9 +167,15 @@ HunSpell_suggest(HunSpell * self, PyObject *args)
PyMem_Free(word);
for (i = 0, ret = 0; !ret && i < num_slist; i++) {
- pystr = PyUnicode_FromString(slist[i]);
- if (!pystr)
- break;
+ str_size = strlen(slist[i]);
+ pystr = PyUnicode_DecodeUTF8(slist[i], str_size, "strict");
+ if (!pystr) {
+ PyErr_Fetch(&etype, &evalue, &etrace);
+ Py_DECREF(etype);
+ pystr = PyUnicode_DecodeLatin1(slist[i], str_size, "strict");
+ if (!pystr)
+ break;
+ }
ret = PyList_Append(slist_list, pystr);
Py_DECREF(pystr);
}
diff --git a/setup.py b/setup.py
index 656c938..1b65ab5 100755
--- a/setup.py
+++ b/setup.py
@@ -43,7 +43,7 @@ else:
main = Extension('hunspell', **main_module_kwargs)
setup(name="hunspell",
- version="0.5.3",
+ version="0.5.4",
description="Module for the Hunspell spellchecker engine",
author="Benoît Latinier",
author_email="benoit@latinier.fr",
diff --git a/tests/test_hunspell.py b/tests/test_hunspell.py
index 320f8ad..cdfd08d 100644
--- a/tests/test_hunspell.py
+++ b/tests/test_hunspell.py
@@ -1,3 +1,4 @@
+#-*- coding: utf-8 -*-
import os
import unittest
from hunspell import HunSpell, HunSpellError
@@ -28,6 +29,9 @@ class HunSpellTest(unittest.TestCase):
self.assertEqual(self.hunspell.suggest('spookie'),
['spookier', 'spookiness', 'spook', 'cookie',
'bookie', 'Spokane', 'spoken'])
+ self.assertEqual(self.hunspell.suggest('Eelysa'),
+ ['Elyssa', 'Elysees', 'Elysha', 'Elysia',
+ 'Elissa', 'Elysée'])
def test_hunspell_stem(self):
self.assertEqual(self.hunspell.stem('dog'), [b'dog'])
--
2.14.3