diff -ur grep-2.5.1.orig/lib/regex.c grep-2.5.1/lib/regex.c --- grep-2.5.1.orig/lib/regex.c 2001-04-02 20:04:45.000000000 +0200 +++ grep-2.5.1/lib/regex.c 2002-08-14 12:27:01.000000000 +0200 @@ -60,6 +60,10 @@ #ifdef MBS_SUPPORT # define CHAR_TYPE wchar_t # define US_CHAR_TYPE wchar_t/* unsigned character type */ +# define CHAR_T_SIGN (1 << (sizeof(CHAR_TYPE) * 8 - 1)) +# if defined _AIX +# define WCHAR_T_NEED_SIGNEXTEND 1 +# endif /* _AIX */ # define COMPILED_BUFFER_VAR wc_buffer # define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */ # define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_TYPE)+1) @@ -618,10 +622,13 @@ /* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ #ifdef MBS_SUPPORT -# define EXTRACT_NUMBER(destination, source) \ - do { \ - (destination) = *(source); \ - } while (0) +# ifdef WCHAR_T_NEED_SIGNEXTEND +# define EXTRACT_NUMBER(destination, source) \ + (destination) = (*(source) ^ CHAR_T_SIGN) - CHAR_T_SIGN; +# else +# define EXTRACT_NUMBER(destination, source) \ + (destination) = *(source) +# endif /* WCHAR_T_NEED_SIGNEXTEND */ #else # define EXTRACT_NUMBER(destination, source) \ do { \ @@ -638,7 +645,11 @@ US_CHAR_TYPE *source; { #ifdef MBS_SUPPORT +# ifdef WCHAR_T_NEED_SIGNEXTEND + *dest = (*source ^ CHAR_T_SIGN) - CHAR_T_SIGN; +# else *dest = *source; +# endif /* WCHAR_T_NEED_SIGNEXTEND */ #else int temp = SIGN_EXTEND_CHAR (*(source + 1)); *dest = *source & 0377; diff -ur grep-2.5.1.orig/src/dfa.c grep-2.5.1/src/dfa.c --- grep-2.5.1.orig/src/dfa.c 2001-09-26 18:57:55.000000000 +0200 +++ grep-2.5.1/src/dfa.c 2002-08-14 12:27:01.000000000 +0200 @@ -414,7 +414,7 @@ /* This function fetch a wide character, and update cur_mb_len, used only if the current locale is a multibyte environment. */ -static wchar_t +static wint_t fetch_wc (char const *eoferr) { wchar_t wc; @@ -423,7 +423,7 @@ if (eoferr != 0) dfaerror (eoferr); else - return -1; + return WEOF; } cur_mb_len = mbrtowc(&wc, lexptr, lexleft, &mbs); @@ -459,7 +459,7 @@ static void parse_bracket_exp_mb () { - wchar_t wc, wc1, wc2; + wint_t wc, wc1, wc2; /* Work area to build a mb_char_classes. */ struct mb_char_classes *work_mbc; @@ -496,7 +496,7 @@ work_mbc->invert = 0; do { - wc1 = -1; /* mark wc1 is not initialized". */ + wc1 = WEOF; /* mark wc1 is not initialized". */ /* Note that if we're looking at some other [:...:] construct, we just treat it as a bunch of ordinary characters. We can do @@ -586,7 +586,7 @@ work_mbc->coll_elems[work_mbc->ncoll_elems++] = elem; } } - wc = -1; + wc = WEOF; } else /* We treat '[' as a normal character here. */ @@ -600,7 +600,7 @@ wc = fetch_wc(("Unbalanced [")); } - if (wc1 == -1) + if (wc1 == WEOF) wc1 = fetch_wc(_("Unbalanced [")); if (wc1 == L'-') @@ -630,17 +630,17 @@ } REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t, range_sts_al, work_mbc->nranges + 1); - work_mbc->range_sts[work_mbc->nranges] = wc; + work_mbc->range_sts[work_mbc->nranges] = (wchar_t)wc; REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t, range_ends_al, work_mbc->nranges + 1); - work_mbc->range_ends[work_mbc->nranges++] = wc2; + work_mbc->range_ends[work_mbc->nranges++] = (wchar_t)wc2; } - else if (wc != -1) + else if (wc != WEOF) /* build normal characters. */ { REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al, work_mbc->nchars + 1); - work_mbc->chars[work_mbc->nchars++] = wc; + work_mbc->chars[work_mbc->nchars++] = (wchar_t)wc; } } while ((wc = wc1) != L']'); diff -ur grep-2.5.1.orig/src/grep.c grep-2.5.1/src/grep.c --- grep-2.5.1.orig/src/grep.c 2002-03-26 16:54:12.000000000 +0100 +++ grep-2.5.1/src/grep.c 2002-08-14 12:27:01.000000000 +0200 @@ -30,6 +30,12 @@ # include # include #endif +#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC +/* We can handle multibyte string. */ +# define MBS_SUPPORT +# include +# include +#endif #include #include "system.h" #include "getopt.h" @@ -1697,6 +1703,37 @@ if (!install_matcher (matcher) && !install_matcher ("default")) abort (); +#ifdef MBS_SUPPORT + if (MB_CUR_MAX != 1 && match_icase) + { + wchar_t wc; + mbstate_t cur_state, prev_state; + int i, len = strlen(keys); + + memset(&cur_state, 0, sizeof(mbstate_t)); + for (i = 0; i <= len ;) + { + size_t mbclen; + mbclen = mbrtowc(&wc, keys + i, len - i, &cur_state); + if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) + { + /* An invalid sequence, or a truncated multibyte character. + We treat it as a singlebyte character. */ + mbclen = 1; + } + else + { + if (iswupper((wint_t)wc)) + { + wc = towlower((wint_t)wc); + wcrtomb(keys + i, wc, &cur_state); + } + } + i += mbclen; + } + } +#endif /* MBS_SUPPORT */ + (*compile)(keys, keycc); if ((argc - optind > 1 && !no_filenames) || with_filenames) diff -ur grep-2.5.1.orig/src/search.c grep-2.5.1/src/search.c --- grep-2.5.1.orig/src/search.c 2001-04-19 05:42:14.000000000 +0200 +++ grep-2.5.1/src/search.c 2002-08-14 12:27:51.000000000 +0200 @@ -149,15 +149,16 @@ static char* check_multibyte_string(char const *buf, size_t size) { - char *mb_properties = malloc(size); + char *mb_properties = xmalloc(size); mbstate_t cur_state; + wchar_t wc; int i; memset(&cur_state, 0, sizeof(mbstate_t)); memset(mb_properties, 0, sizeof(char)*size); for (i = 0; i < size ;) { size_t mbclen; - mbclen = mbrlen(buf + i, size - i, &cur_state); + mbclen = mbrtowc(&wc, buf + i, size - i, &cur_state); if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) { @@ -165,6 +166,14 @@ We treat it as a singlebyte character. */ mbclen = 1; } + else if (match_icase) + { + if (iswupper((wint_t)wc)) + { + wc = towlower((wint_t)wc); + wcrtomb(buf + i, wc, &cur_state); + } + } mb_properties[i] = mbclen; i += mbclen; } @@ -233,7 +242,7 @@ static char const line_end[] = "\\)$"; static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\("; static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)"; - char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end); + char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end); size_t i; strcpy (n, match_lines ? line_beg : word_beg); i = strlen (n); @@ -316,7 +325,7 @@ static char const line_end[] = ")$"; static char const word_beg[] = "(^|[^[:alnum:]_])("; static char const word_end[] = ")([^[:alnum:]_]|$)"; - char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end); + char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end); size_t i; strcpy (n, match_lines ? line_beg : word_beg); i = strlen(n); @@ -339,14 +348,20 @@ char eol = eolbyte; int backref, start, len; struct kwsmatch kwsm; - size_t i; + size_t i, ret_val; #ifdef MBS_SUPPORT char *mb_properties = NULL; -#endif /* MBS_SUPPORT */ - -#ifdef MBS_SUPPORT - if (MB_CUR_MAX > 1 && kwset) - mb_properties = check_multibyte_string(buf, size); + if (MB_CUR_MAX > 1) + { + if (match_icase) + { + char *case_buf = xmalloc(size); + memcpy(case_buf, buf, size); + buf = case_buf; + } + if (kwset) + mb_properties = check_multibyte_string(buf, size); + } #endif /* MBS_SUPPORT */ buflim = buf + size; @@ -363,8 +378,12 @@ { #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1) - free(mb_properties); -#endif + { + if (match_icase) + free ((char*)buf); + free(mb_properties); + } +#endif /* MBS_SUPPORT */ return (size_t)-1; } beg += offset; @@ -462,18 +481,29 @@ } /* for Regex patterns. */ } /* for (beg = end ..) */ #ifdef MBS_SUPPORT - if (MB_CUR_MAX > 1 && mb_properties) - free (mb_properties); + if (MB_CUR_MAX > 1) + { + if (match_icase) + free((char*)buf); + if (mb_properties) + free(mb_properties); + } #endif /* MBS_SUPPORT */ return (size_t) -1; success: + ret_val = beg - buf; #ifdef MBS_SUPPORT - if (MB_CUR_MAX > 1 && mb_properties) - free (mb_properties); + if (MB_CUR_MAX > 1) + { + if (match_icase) + free((char*)buf); + if (mb_properties) + free(mb_properties); + } #endif /* MBS_SUPPORT */ *match_size = end - beg; - return beg - buf; + return ret_val; } static void @@ -506,10 +536,19 @@ register size_t len; char eol = eolbyte; struct kwsmatch kwsmatch; + size_t ret_val; #ifdef MBS_SUPPORT - char *mb_properties; + char *mb_properties = NULL; if (MB_CUR_MAX > 1) - mb_properties = check_multibyte_string (buf, size); + { + if (match_icase) + { + char *case_buf = xmalloc(size); + memcpy(case_buf, buf, size); + buf = case_buf; + } + mb_properties = check_multibyte_string(buf, size); + } #endif /* MBS_SUPPORT */ for (beg = buf; beg <= buf + size; ++beg) @@ -518,8 +557,12 @@ if (offset == (size_t) -1) { #ifdef MBS_SUPPORT - if (MB_CUR_MAX > 1) - free(mb_properties); + if (MB_CUR_MAX > 1) + { + if (match_icase) + free ((char*)buf); + free(mb_properties); + } #endif /* MBS_SUPPORT */ return offset; } @@ -532,11 +575,16 @@ if (exact) { *match_size = len; + ret_val = beg - buf; #ifdef MBS_SUPPORT - if (MB_CUR_MAX > 1) - free (mb_properties); + if (MB_CUR_MAX > 1) + { + if (match_icase) + free ((char*)buf); + free(mb_properties); + } #endif /* MBS_SUPPORT */ - return beg - buf; + return ret_val; } if (match_lines) { @@ -557,8 +605,12 @@ if (offset == (size_t) -1) { #ifdef MBS_SUPPORT - if (MB_CUR_MAX > 1) - free (mb_properties); + if (MB_CUR_MAX > 1) + { + if (match_icase) + free ((char*)buf); + free(mb_properties); + } #endif /* MBS_SUPPORT */ return offset; } @@ -574,7 +626,12 @@ #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1) - free (mb_properties); + { + if (match_icase) + free((char*)buf); + if (mb_properties) + free(mb_properties); + } #endif /* MBS_SUPPORT */ return -1; @@ -584,11 +641,17 @@ while (buf < beg && beg[-1] != eol) --beg; *match_size = end - beg; + ret_val = beg - buf; #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1) - free (mb_properties); + { + if (match_icase) + free((char*)buf); + if (mb_properties) + free(mb_properties); + } #endif /* MBS_SUPPORT */ - return beg - buf; + return ret_val; } #if HAVE_LIBPCRE