bkabrda / rpms / grep

Forked from rpms/grep 6 years ago
Clone

Blame grep-2.6.3-dfa-speedup-digit-xdigit.patch

6ccb107
From ebca24d6c9eb12f91eed3993de65945ee97dd467 Mon Sep 17 00:00:00 2001
6ccb107
From: Paolo Bonzini <bonzini@gnu.org>
6ccb107
Date: Tue, 4 May 2010 18:07:28 +0200
6ccb107
Subject: [PATCH] dfa: speed up [[:digit:]] and [[:xdigit:]]
6ccb107
6ccb107
There's no "multibyte pain" in these two classes, since POSIX
6ccb107
and ISO C99 mandate their contents.
6ccb107
6ccb107
Time for "./grep -x '[[:digit:]]' /usr/share/dict/linux.words"
6ccb107
Before: 1.5s, after: 0.07s.  (sed manages only 0.5s).
6ccb107
6ccb107
* src/dfa.c (predicates): Declare struct dfa_ctype separately
6ccb107
from definition.  Add sb_only.
6ccb107
(find_pred): Return const struct dfa_ctype *.
6ccb107
(parse_bracket_exp): Return const struct dfa_ctype *.  Do
6ccb107
not fill MBCSET for sb_only character types.
6ccb107
---
6ccb107
 src/dfa.c |   55 ++++++++++++++++++++++++++++---------------------------
6ccb107
 1 files changed, 28 insertions(+), 27 deletions(-)
6ccb107
6ccb107
diff --git a/src/dfa.c b/src/dfa.c
6ccb107
index 4dd26c9..da5a306 100644
6ccb107
--- a/src/dfa.c
6ccb107
+++ b/src/dfa.c
6ccb107
@@ -429,26 +429,29 @@ typedef int predicate (int);
6ccb107
 /* The following list maps the names of the Posix named character classes
6ccb107
    to predicate functions that determine whether a given character is in
6ccb107
    the class.  The leading [ has already been eaten by the lexical analyzer. */
6ccb107
-static struct {
6ccb107
+struct dfa_ctype {
6ccb107
   const char *name;
6ccb107
-  predicate *pred;
6ccb107
-} const prednames[] = {
6ccb107
-  { "alpha", is_alpha },
6ccb107
-  { "upper", is_upper },
6ccb107
-  { "lower", is_lower },
6ccb107
-  { "digit", is_digit },
6ccb107
-  { "xdigit", is_xdigit },
6ccb107
-  { "space", is_space },
6ccb107
-  { "punct", is_punct },
6ccb107
-  { "alnum", is_alnum },
6ccb107
-  { "print", is_print },
6ccb107
-  { "graph", is_graph },
6ccb107
-  { "cntrl", is_cntrl },
6ccb107
-  { "blank", is_blank },
6ccb107
-  { NULL, NULL }
6ccb107
+  predicate *func;
6ccb107
+  bool sb_only;
6ccb107
 };
6ccb107
 
6ccb107
-static predicate *
6ccb107
+static const struct dfa_ctype prednames[] = {
6ccb107
+  { "alpha", isalpha, false },
6ccb107
+  { "upper", isupper, false },
6ccb107
+  { "lower", islower, false },
6ccb107
+  { "digit", isdigit, true },
6ccb107
+  { "xdigit", isxdigit, true },
6ccb107
+  { "space", isspace, false },
6ccb107
+  { "punct", ispunct, false },
6ccb107
+  { "alnum", isalnum, false },
6ccb107
+  { "print", isprint, false },
6ccb107
+  { "graph", isgraph, false },
6ccb107
+  { "cntrl", iscntrl, false },
6ccb107
+  { "blank", isblank, false },
6ccb107
+  { NULL, NULL, false }
6ccb107
+};
6ccb107
+
6ccb107
+static const struct dfa_ctype *
6ccb107
 find_pred (const char *str)
6ccb107
 {
6ccb107
   unsigned int i;
6ccb107
@@ -456,7 +459,7 @@ find_pred (const char *str)
6ccb107
     if (!strcmp(str, prednames[i].name))
6ccb107
       break;
6ccb107
 
6ccb107
-  return prednames[i].pred;
6ccb107
+  return &prednames[i];
6ccb107
 }
6ccb107
 
6ccb107
 /* Multibyte character handling sub-routine for lex.
6ccb107
@@ -553,8 +556,11 @@ parse_bracket_exp (void)
6ccb107
 				     || !strcmp (str, "lower"))
6ccb107
 				       ? "alpha"
6ccb107
 				       : str);
6ccb107
+                  const struct dfa_ctype *pred = find_pred (class);
6ccb107
+                  if (!pred)
6ccb107
+                    dfaerror(_("invalid character class"));
6ccb107
 #ifdef MBS_SUPPORT
6ccb107
-                  if (MB_CUR_MAX > 1)
6ccb107
+                  if (MB_CUR_MAX > 1 && !pred->sb_only)
6ccb107
                     {
6ccb107
 		      /* Store the character class as wctype_t.  */
6ccb107
                       wctype_t wt = wctype (class);
6ccb107
@@ -568,14 +574,9 @@ parse_bracket_exp (void)
6ccb107
                     }
6ccb107
 #endif
6ccb107
 
6ccb107
-                  {
6ccb107
-                    predicate *pred = find_pred (class);
6ccb107
-                    if (!pred)
6ccb107
-                      dfaerror(_("invalid character class"));
6ccb107
-                    for (c2 = 0; c2 < NOTCHAR; ++c2)
6ccb107
-                      if ((*pred)(c2))
6ccb107
-                        setbit_case_fold (c2, ccl);
6ccb107
-                  }
6ccb107
+                  for (c2 = 0; c2 < NOTCHAR; ++c2)
6ccb107
+                    if (pred->func(c2))
6ccb107
+                      setbit_case_fold (c2, ccl);
6ccb107
                 }
6ccb107
 
6ccb107
 #ifdef MBS_SUPPORT
6ccb107
-- 
6ccb107
1.6.6.1
6ccb107