From d83879975432cc1dda2e143a6d85bb4121c3d2ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Latinier?= Date: Tue, 20 Feb 2018 22:50:48 +0100 Subject: [PATCH 12/12] fix encoding problem (#32) --- .gitignore | 2 +- CHANGELOG.md | 1 + hunspell.cpp | 15 +++++++++++---- setup.py | 2 +- tests/test_hunspell.py | 4 ++++ 5 files changed, 18 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index b0ded7e..b961bfb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ .pyc -venv +venv* __pycache__ MANIFEST dist diff --git a/CHANGELOG.md b/CHANGELOG.md index 357f217..4a1ba5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## 0.5.4 (???) ### Bug fixes - Fix a memory leak at instanciation (issue #39 fixed by @Far3t) +- Fix an encoding problem (issue #32) ## 0.5.3 (2018-02-02) ### Improvements diff --git a/hunspell.cpp b/hunspell.cpp index 6650641..7c5cb71 100644 --- a/hunspell.cpp +++ b/hunspell.cpp @@ -152,8 +152,9 @@ static PyObject * HunSpell_suggest(HunSpell * self, PyObject *args) { char *word, **slist; - int i, num_slist, ret; + int i, num_slist, ret, str_size; PyObject *slist_list, *pystr; + PyObject *etype, *evalue, *etrace; if (!PyArg_ParseTuple(args, "et", self->encoding, &word)) return NULL; @@ -166,9 +167,15 @@ HunSpell_suggest(HunSpell * self, PyObject *args) PyMem_Free(word); for (i = 0, ret = 0; !ret && i < num_slist; i++) { - pystr = PyUnicode_FromString(slist[i]); - if (!pystr) - break; + str_size = strlen(slist[i]); + pystr = PyUnicode_DecodeUTF8(slist[i], str_size, "strict"); + if (!pystr) { + PyErr_Fetch(&etype, &evalue, &etrace); + Py_DECREF(etype); + pystr = PyUnicode_DecodeLatin1(slist[i], str_size, "strict"); + if (!pystr) + break; + } ret = PyList_Append(slist_list, pystr); Py_DECREF(pystr); } diff --git a/setup.py b/setup.py index 656c938..1b65ab5 100755 --- a/setup.py +++ b/setup.py @@ -43,7 +43,7 @@ else: main = Extension('hunspell', **main_module_kwargs) setup(name="hunspell", - version="0.5.3", + version="0.5.4", description="Module for the Hunspell spellchecker engine", author="Benoît Latinier", author_email="benoit@latinier.fr", diff --git a/tests/test_hunspell.py b/tests/test_hunspell.py index 320f8ad..cdfd08d 100644 --- a/tests/test_hunspell.py +++ b/tests/test_hunspell.py @@ -1,3 +1,4 @@ +#-*- coding: utf-8 -*- import os import unittest from hunspell import HunSpell, HunSpellError @@ -28,6 +29,9 @@ class HunSpellTest(unittest.TestCase): self.assertEqual(self.hunspell.suggest('spookie'), ['spookier', 'spookiness', 'spook', 'cookie', 'bookie', 'Spokane', 'spoken']) + self.assertEqual(self.hunspell.suggest('Eelysa'), + ['Elyssa', 'Elysees', 'Elysha', 'Elysia', + 'Elissa', 'Elysée']) def test_hunspell_stem(self): self.assertEqual(self.hunspell.stem('dog'), [b'dog']) -- 2.14.3