bkabrda / rpms / grep

Forked from rpms/grep 6 years ago
Clone

Blame grep-2.22-better-encoding-errors-handling.patch

40d7c13
diff --git a/doc/grep.texi b/doc/grep.texi
40d7c13
index e3495bb..41f4fa5 100644
40d7c13
--- a/doc/grep.texi
40d7c13
+++ b/doc/grep.texi
40d7c13
@@ -596,13 +596,13 @@ If a file's allocation metadata,
40d7c13
 or if its data read before a line is selected for output,
40d7c13
 indicate that the file contains binary data,
40d7c13
 assume that the file is of type @var{type}.
40d7c13
-Non-text bytes indicate binary data; these are either data bytes
40d7c13
-improperly encoded for the current locale, or null bytes when the
40d7c13
+Non-text bytes indicate binary data; these are either output bytes that are
40d7c13
+improperly encoded for the current locale, or null input bytes when the
40d7c13
 @option{-z} (@option{--null-data}) option is not given (@pxref{Other
40d7c13
 Options}).
40d7c13
 
40d7c13
-By default, @var{type} is @samp{binary},
40d7c13
-and @command{grep} normally outputs either
40d7c13
+By default, @var{type} is @samp{binary}, and when @command{grep}
40d7c13
+discovers that a file is binary it normally outputs either
40d7c13
 a one-line message saying that a binary file matches,
40d7c13
 or no message if there is no match.
40d7c13
 When processing binary data, @command{grep} may treat non-text bytes
40d7c13
@@ -611,7 +611,8 @@ not match a null byte, as the null byte might be treated as a line
40d7c13
 terminator even without the @option{-z} (@option{--null-data}) option.
40d7c13
 
40d7c13
 If @var{type} is @samp{without-match},
40d7c13
-@command{grep} assumes that a binary file does not match;
40d7c13
+when @command{grep} discovers that a file is binary
40d7c13
+it assumes that the rest of the file does not match;
40d7c13
 this is equivalent to the @option{-I} option.
40d7c13
 
40d7c13
 If @var{type} is @samp{text},
40d7c13
diff --git a/src/grep.c b/src/grep.c
97a0815
index 2748fd3..eac540a 100644
40d7c13
--- a/src/grep.c
40d7c13
+++ b/src/grep.c
40d7c13
@@ -377,7 +377,6 @@ bool match_icase;
40d7c13
 bool match_words;
40d7c13
 bool match_lines;
40d7c13
 char eolbyte;
40d7c13
-enum textbin input_textbin;
40d7c13
 
40d7c13
 static char const *matcher;
40d7c13
 
40d7c13
@@ -389,6 +388,10 @@ static bool omit_dot_slash;
40d7c13
 static bool errseen;
40d7c13
 static bool write_error_seen;
40d7c13
 
40d7c13
+/* True if output from the current input file has been suppressed
40d7c13
+   because an output line had an encoding error.  */
40d7c13
+static bool encoding_error_output;
40d7c13
+
40d7c13
 enum directories_type
40d7c13
   {
40d7c13
     READ_DIRECTORIES = 2,
40d7c13
@@ -481,12 +484,6 @@ clean_up_stdout (void)
40d7c13
     close_stdout ();
40d7c13
 }
40d7c13
 
40d7c13
-static bool
40d7c13
-textbin_is_binary (enum textbin textbin)
40d7c13
-{
40d7c13
-  return textbin < TEXTBIN_UNKNOWN;
40d7c13
-}
40d7c13
-
40d7c13
 /* The high-order bit of a byte.  */
40d7c13
 enum { HIBYTE = 0x80 };
40d7c13
 
40d7c13
@@ -551,58 +548,60 @@ skip_easy_bytes (char const *buf)
40d7c13
   return p;
40d7c13
 }
40d7c13
 
40d7c13
-/* Return the text type of data in BUF, of size SIZE.
40d7c13
+/* Return true if BUF, of size SIZE, has an encoding error.
40d7c13
    BUF must be followed by at least sizeof (uword) bytes,
40d7c13
-   which may be arbitrarily written to or read from.  */
40d7c13
-static enum textbin
40d7c13
-buffer_textbin (char *buf, size_t size)
40d7c13
+   the first of which may be modified.  */
40d7c13
+static bool
40d7c13
+buf_has_encoding_errors (char *buf, size_t size)
40d7c13
 {
40d7c13
-  if (eolbyte && memchr (buf, '\0', size))
40d7c13
-    return TEXTBIN_BINARY;
40d7c13
+  if (MB_CUR_MAX <= 1)
40d7c13
+    return false;
40d7c13
 
40d7c13
-  if (1 < MB_CUR_MAX)
40d7c13
-    {
40d7c13
-      mbstate_t mbs = { 0 };
40d7c13
-      size_t clen;
40d7c13
-      char const *p;
40d7c13
+  mbstate_t mbs = { 0 };
40d7c13
+  size_t clen;
40d7c13
 
40d7c13
-      buf[size] = -1;
40d7c13
-      for (p = buf; (p = skip_easy_bytes (p)) < buf + size; p += clen)
40d7c13
-        {
40d7c13
-          clen = mbrlen (p, buf + size - p, &mbs);
40d7c13
-          if ((size_t) -2 <= clen)
40d7c13
-            return clen == (size_t) -2 ? TEXTBIN_UNKNOWN : TEXTBIN_BINARY;
40d7c13
-        }
40d7c13
+  buf[size] = -1;
40d7c13
+  for (char const *p = buf; (p = skip_easy_bytes (p)) < buf + size; p += clen)
40d7c13
+    {
40d7c13
+      clen = mbrlen (p, buf + size - p, &mbs);
40d7c13
+      if ((size_t) -2 <= clen)
40d7c13
+        return true;
40d7c13
     }
40d7c13
 
40d7c13
-  return TEXTBIN_TEXT;
40d7c13
+  return false;
40d7c13
 }
40d7c13
 
40d7c13
-/* Return the text type of a file.  BUF, of size SIZE, is the initial
40d7c13
-   buffer read from the file with descriptor FD and status ST.
40d7c13
-   BUF must be followed by at least sizeof (uword) bytes,
40d7c13
+
40d7c13
+/* Return true if BUF, of size SIZE, has a null byte.
40d7c13
+   BUF must be followed by at least one byte,
40d7c13
    which may be arbitrarily written to or read from.  */
40d7c13
-static enum textbin
40d7c13
-file_textbin (char *buf, size_t size, int fd, struct stat const *st)
40d7c13
+static bool
40d7c13
+buf_has_nulls (char *buf, size_t size)
40d7c13
 {
40d7c13
-  enum textbin textbin = buffer_textbin (buf, size);
40d7c13
-  if (textbin_is_binary (textbin))
40d7c13
-    return textbin;
40d7c13
+  buf[size] = 0;
40d7c13
+  return strlen (buf) != size;
40d7c13
+}
40d7c13
 
40d7c13
+/* Return true if a file is known to contain null bytes.
40d7c13
+   SIZE bytes have already been read from the file
40d7c13
+   with descriptor FD and status ST.  */
40d7c13
+static bool
40d7c13
+file_must_have_nulls (size_t size, int fd, struct stat const *st)
40d7c13
+{
40d7c13
   if (usable_st_size (st))
40d7c13
     {
40d7c13
       if (st->st_size <= size)
40d7c13
-        return textbin == TEXTBIN_UNKNOWN ? TEXTBIN_BINARY : textbin;
40d7c13
+        return false;
40d7c13
 
40d7c13
       /* If the file has holes, it must contain a null byte somewhere.  */
40d7c13
-      if (SEEK_HOLE != SEEK_SET && eolbyte)
40d7c13
+      if (SEEK_HOLE != SEEK_SET)
40d7c13
         {
40d7c13
           off_t cur = size;
40d7c13
           if (O_BINARY || fd == STDIN_FILENO)
40d7c13
             {
40d7c13
               cur = lseek (fd, 0, SEEK_CUR);
40d7c13
               if (cur < 0)
40d7c13
-                return TEXTBIN_UNKNOWN;
40d7c13
+                return false;
40d7c13
             }
40d7c13
 
40d7c13
           /* Look for a hole after the current location.  */
40d7c13
@@ -612,12 +611,12 @@ file_textbin (char *buf, size_t size, int fd, struct stat const *st)
40d7c13
               if (lseek (fd, cur, SEEK_SET) < 0)
40d7c13
                 suppressible_error (filename, errno);
40d7c13
               if (hole_start < st->st_size)
40d7c13
-                return TEXTBIN_BINARY;
40d7c13
+                return true;
40d7c13
             }
40d7c13
         }
40d7c13
     }
40d7c13
 
40d7c13
-  return TEXTBIN_UNKNOWN;
40d7c13
+  return false;
40d7c13
 }
40d7c13
 
40d7c13
 /* Convert STR to a nonnegative integer, storing the result in *OUT.
40d7c13
@@ -888,7 +887,7 @@ static char *label = NULL;      /* Fake filename for stdin */
40d7c13
 /* Internal variables to keep track of byte count, context, etc. */
40d7c13
 static uintmax_t totalcc;	/* Total character count before bufbeg. */
40d7c13
 static char const *lastnl;	/* Pointer after last newline counted. */
40d7c13
-static char const *lastout;	/* Pointer after last character output;
40d7c13
+static char *lastout;		/* Pointer after last character output;
40d7c13
                                    NULL if no character has been output
40d7c13
                                    or if it's conceptually before bufbeg. */
40d7c13
 static intmax_t outleft;	/* Maximum number of lines to be output.  */
40d7c13
@@ -960,10 +959,31 @@ print_offset (uintmax_t pos, int min_width, const char *color)
40d7c13
   pr_sgr_end_if (color);
40d7c13
 }
40d7c13
 
40d7c13
-/* Print a whole line head (filename, line, byte).  */
40d7c13
-static void
40d7c13
-print_line_head (char const *beg, char const *lim, char sep)
40d7c13
+/* Print a whole line head (filename, line, byte).  The output data
40d7c13
+   starts at BEG and contains LEN bytes; it is followed by at least
40d7c13
+   sizeof (uword) bytes, the first of which may be temporarily modified.
40d7c13
+   The output data comes from what is perhaps a larger input line that
40d7c13
+   goes until LIM, where LIM[-1] is an end-of-line byte.  Use SEP as
40d7c13
+   the separator on output.
40d7c13
+
40d7c13
+   Return true unless the line was suppressed due to an encoding error.  */
40d7c13
+
40d7c13
+static bool
40d7c13
+print_line_head (char *beg, size_t len, char const *lim, char sep)
40d7c13
 {
40d7c13
+  bool encoding_errors = false;
40d7c13
+  if (binary_files != TEXT_BINARY_FILES)
40d7c13
+    {
40d7c13
+      char ch = beg[len];
40d7c13
+      encoding_errors = buf_has_encoding_errors (beg, len);
40d7c13
+      beg[len] = ch;
40d7c13
+    }
40d7c13
+  if (encoding_errors)
40d7c13
+    {
40d7c13
+      encoding_error_output = done_on_match = out_quiet = true;
40d7c13
+      return false;
40d7c13
+    }
40d7c13
+
40d7c13
   bool pending_sep = false;
40d7c13
 
40d7c13
   if (out_file)
40d7c13
@@ -1010,22 +1030,27 @@ print_line_head (char const *beg, char const *lim, char sep)
40d7c13
 
40d7c13
       print_sep (sep);
40d7c13
     }
40d7c13
+
40d7c13
+  return true;
40d7c13
 }
40d7c13
 
40d7c13
-static const char *
40d7c13
-print_line_middle (const char *beg, const char *lim,
40d7c13
+static char *
40d7c13
+print_line_middle (char *beg, char *lim,
40d7c13
                    const char *line_color, const char *match_color)
40d7c13
 {
40d7c13
   size_t match_size;
40d7c13
   size_t match_offset;
40d7c13
-  const char *cur = beg;
40d7c13
-  const char *mid = NULL;
40d7c13
-
40d7c13
-  while (cur < lim
40d7c13
-         && ((match_offset = execute (beg, lim - beg, &match_size, cur))
40d7c13
-             != (size_t) -1))
40d7c13
+  char *cur = beg;
40d7c13
+  char *mid = NULL;
40d7c13
+  char *b;
40d7c13
+
40d7c13
+  for (cur = beg;
40d7c13
+       (cur < lim
40d7c13
+        && ((match_offset = execute (beg, lim - beg, &match_size, cur))
40d7c13
+            != (size_t) -1));
40d7c13
+       cur = b + match_size)
40d7c13
     {
40d7c13
-      char const *b = beg + match_offset;
40d7c13
+      b = beg + match_offset;
40d7c13
 
40d7c13
       /* Avoid matching the empty line at the end of the buffer. */
40d7c13
       if (b == lim)
40d7c13
@@ -1045,8 +1070,11 @@ print_line_middle (const char *beg, const char *lim,
40d7c13
           /* This function is called on a matching line only,
40d7c13
              but is it selected or rejected/context?  */
40d7c13
           if (only_matching)
40d7c13
-            print_line_head (b, lim, (out_invert ? SEP_CHAR_REJECTED
40d7c13
-                                      : SEP_CHAR_SELECTED));
40d7c13
+            {
40d7c13
+              char sep = out_invert ? SEP_CHAR_REJECTED : SEP_CHAR_SELECTED;
40d7c13
+              if (! print_line_head (b, match_size, lim, sep))
40d7c13
+                return NULL;
40d7c13
+            }
40d7c13
           else
40d7c13
             {
40d7c13
               pr_sgr_start (line_color);
40d7c13
@@ -1064,7 +1092,6 @@ print_line_middle (const char *beg, const char *lim,
40d7c13
           if (only_matching)
40d7c13
             fputs ("\n", stdout);
40d7c13
         }
40d7c13
-      cur = b + match_size;
40d7c13
     }
40d7c13
 
40d7c13
   if (only_matching)
40d7c13
@@ -1075,8 +1102,8 @@ print_line_middle (const char *beg, const char *lim,
40d7c13
   return cur;
40d7c13
 }
40d7c13
 
40d7c13
-static const char *
40d7c13
-print_line_tail (const char *beg, const char *lim, const char *line_color)
40d7c13
+static char *
40d7c13
+print_line_tail (char *beg, const char *lim, const char *line_color)
40d7c13
 {
40d7c13
   size_t eol_size;
40d7c13
   size_t tail_size;
40d7c13
@@ -1097,14 +1124,15 @@ print_line_tail (const char *beg, const char *lim, const char *line_color)
40d7c13
 }
40d7c13
 
40d7c13
 static void
40d7c13
-prline (char const *beg, char const *lim, char sep)
40d7c13
+prline (char *beg, char *lim, char sep)
40d7c13
 {
40d7c13
   bool matching;
40d7c13
   const char *line_color;
40d7c13
   const char *match_color;
40d7c13
 
40d7c13
   if (!only_matching)
40d7c13
-    print_line_head (beg, lim, sep);
40d7c13
+    if (! print_line_head (beg, lim - beg - 1, lim, sep))
40d7c13
+      return;
40d7c13
 
40d7c13
   matching = (sep == SEP_CHAR_SELECTED) ^ out_invert;
40d7c13
 
40d7c13
@@ -1124,7 +1152,11 @@ prline (char const *beg, char const *lim, char sep)
40d7c13
     {
40d7c13
       /* We already know that non-matching lines have no match (to colorize). */
40d7c13
       if (matching && (only_matching || *match_color))
40d7c13
-        beg = print_line_middle (beg, lim, line_color, match_color);
40d7c13
+        {
40d7c13
+          beg = print_line_middle (beg, lim, line_color, match_color);
40d7c13
+          if (! beg)
40d7c13
+            return;
40d7c13
+        }
40d7c13
 
40d7c13
       if (!only_matching && *line_color)
40d7c13
         {
40d7c13
@@ -1158,7 +1190,7 @@ prpending (char const *lim)
40d7c13
     lastout = bufbeg;
40d7c13
   while (pending > 0 && lastout < lim)
40d7c13
     {
40d7c13
-      char const *nl = memchr (lastout, eolbyte, lim - lastout);
40d7c13
+      char *nl = memchr (lastout, eolbyte, lim - lastout);
40d7c13
       size_t match_size;
40d7c13
       --pending;
40d7c13
       if (outleft
40d7c13
@@ -1173,7 +1205,7 @@ prpending (char const *lim)
40d7c13
 
40d7c13
 /* Output the lines between BEG and LIM.  Deal with context.  */
40d7c13
 static void
40d7c13
-prtext (char const *beg, char const *lim)
40d7c13
+prtext (char *beg, char *lim)
40d7c13
 {
40d7c13
   static bool used;	/* Avoid printing SEP_STR_GROUP before any output.  */
40d7c13
   char eol = eolbyte;
40d7c13
@@ -1181,7 +1213,7 @@ prtext (char const *beg, char const *lim)
40d7c13
   if (!out_quiet && pending > 0)
40d7c13
     prpending (beg);
40d7c13
 
40d7c13
-  char const *p = beg;
40d7c13
+  char *p = beg;
40d7c13
 
40d7c13
   if (!out_quiet)
40d7c13
     {
40d7c13
@@ -1207,7 +1239,7 @@ prtext (char const *beg, char const *lim)
40d7c13
 
40d7c13
       while (p < beg)
40d7c13
         {
40d7c13
-          char const *nl = memchr (p, eol, beg - p);
40d7c13
+          char *nl = memchr (p, eol, beg - p);
40d7c13
           nl++;
40d7c13
           prline (p, nl, SEP_CHAR_REJECTED);
40d7c13
           p = nl;
40d7c13
@@ -1220,7 +1252,7 @@ prtext (char const *beg, char const *lim)
40d7c13
       /* One or more lines are output.  */
40d7c13
       for (n = 0; p < lim && n < outleft; n++)
40d7c13
         {
40d7c13
-          char const *nl = memchr (p, eol, lim - p);
40d7c13
+          char *nl = memchr (p, eol, lim - p);
40d7c13
           nl++;
40d7c13
           if (!out_quiet)
40d7c13
             prline (p, nl, SEP_CHAR_SELECTED);
40d7c13
@@ -1267,13 +1299,12 @@ zap_nuls (char *p, char *lim, char eol)
40d7c13
    between matching lines if OUT_INVERT is true).  Return a count of
40d7c13
    lines printed.  Replace all NUL bytes with NUL_ZAPPER as we go.  */
40d7c13
 static intmax_t
40d7c13
-grepbuf (char const *beg, char const *lim)
40d7c13
+grepbuf (char *beg, char const *lim)
40d7c13
 {
40d7c13
   intmax_t outleft0 = outleft;
40d7c13
-  char const *p;
40d7c13
-  char const *endp;
40d7c13
+  char *endp;
40d7c13
 
40d7c13
-  for (p = beg; p < lim; p = endp)
40d7c13
+  for (char *p = beg; p < lim; p = endp)
40d7c13
     {
40d7c13
       size_t match_size;
40d7c13
       size_t match_offset = execute (p, lim - p, &match_size, NULL);
40d7c13
@@ -1284,15 +1315,15 @@ grepbuf (char const *beg, char const *lim)
40d7c13
           match_offset = lim - p;
40d7c13
           match_size = 0;
40d7c13
         }
40d7c13
-      char const *b = p + match_offset;
40d7c13
+      char *b = p + match_offset;
40d7c13
       endp = b + match_size;
40d7c13
       /* Avoid matching the empty line at the end of the buffer. */
40d7c13
       if (!out_invert && b == lim)
40d7c13
         break;
40d7c13
       if (!out_invert || p < b)
40d7c13
         {
40d7c13
-          char const *prbeg = out_invert ? p : b;
40d7c13
-          char const *prend = out_invert ? b : endp;
40d7c13
+          char *prbeg = out_invert ? p : b;
40d7c13
+          char *prend = out_invert ? b : endp;
40d7c13
           prtext (prbeg, prend);
40d7c13
           if (!outleft || done_on_match)
40d7c13
             {
40d7c13
@@ -1313,7 +1344,6 @@ static intmax_t
40d7c13
 grep (int fd, struct stat const *st)
40d7c13
 {
40d7c13
   intmax_t nlines, i;
40d7c13
-  enum textbin textbin;
40d7c13
   size_t residue, save;
40d7c13
   char oldc;
40d7c13
   char *beg;
40d7c13
@@ -1322,6 +1352,7 @@ grep (int fd, struct stat const *st)
40d7c13
   char nul_zapper = '\0';
40d7c13
   bool done_on_match_0 = done_on_match;
40d7c13
   bool out_quiet_0 = out_quiet;
40d7c13
+  bool has_nulls = false;
40d7c13
 
40d7c13
   if (! reset (fd, st))
40d7c13
     return 0;
40d7c13
@@ -1333,6 +1364,7 @@ grep (int fd, struct stat const *st)
40d7c13
   after_last_match = 0;
40d7c13
   pending = 0;
40d7c13
   skip_nuls = skip_empty_lines && !eol;
40d7c13
+  encoding_error_output = false;
40d7c13
   seek_data_failed = false;
40d7c13
 
40d7c13
   nlines = 0;
40d7c13
@@ -1345,26 +1377,20 @@ grep (int fd, struct stat const *st)
40d7c13
       return 0;
40d7c13
     }
40d7c13
 
40d7c13
-  if (binary_files == TEXT_BINARY_FILES)
40d7c13
-    textbin = TEXTBIN_TEXT;
40d7c13
-  else
40d7c13
+  for (bool firsttime = true; ; firsttime = false)
40d7c13
     {
40d7c13
-      textbin = file_textbin (bufbeg, buflim - bufbeg, fd, st);
40d7c13
-      if (textbin_is_binary (textbin))
40d7c13
+      if (!has_nulls && eol && binary_files != TEXT_BINARY_FILES
40d7c13
+          && (buf_has_nulls (bufbeg, buflim - bufbeg)
40d7c13
+              || (firsttime && file_must_have_nulls (buflim - bufbeg, fd, st))))
40d7c13
         {
40d7c13
+          has_nulls = true;
40d7c13
           if (binary_files == WITHOUT_MATCH_BINARY_FILES)
40d7c13
             return 0;
40d7c13
           done_on_match = out_quiet = true;
40d7c13
           nul_zapper = eol;
40d7c13
           skip_nuls = skip_empty_lines;
40d7c13
         }
40d7c13
-      else if (execute != Pexecute)
40d7c13
-        textbin = TEXTBIN_TEXT;
40d7c13
-    }
40d7c13
 
40d7c13
-  for (;;)
40d7c13
-    {
40d7c13
-      input_textbin = textbin;
40d7c13
       lastnl = bufbeg;
40d7c13
       if (lastout)
40d7c13
         lastout = bufbeg;
97a0815
@@ -1415,13 +1441,8 @@ grep (int fd, struct stat const *st)
97a0815
         }
97a0815
 
97a0815
       /* Detect whether leading context is adjacent to previous output.  */
97a0815
-      if (lastout)
97a0815
-        {
97a0815
-          if (textbin == TEXTBIN_UNKNOWN)
97a0815
-            textbin = TEXTBIN_TEXT;
97a0815
-          if (beg != lastout)
97a0815
-            lastout = 0;
97a0815
-        }
97a0815
+      if (beg != lastout)
97a0815
+        lastout = 0;
40d7c13
 
40d7c13
       /* Handle some details and read more data to scan.  */
40d7c13
       save = residue + lim - beg;
40d7c13
@@ -1434,22 +1455,6 @@ grep (int fd, struct stat const *st)
40d7c13
           suppressible_error (filename, errno);
40d7c13
           goto finish_grep;
40d7c13
         }
40d7c13
-
40d7c13
-      /* If the file's textbin has not been determined yet, assume
40d7c13
-         it's binary if the next input buffer suggests so.  */
40d7c13
-      if (textbin == TEXTBIN_UNKNOWN)
40d7c13
-        {
40d7c13
-          enum textbin tb = buffer_textbin (bufbeg, buflim - bufbeg);
40d7c13
-          if (textbin_is_binary (tb))
40d7c13
-            {
40d7c13
-              if (binary_files == WITHOUT_MATCH_BINARY_FILES)
40d7c13
-                return 0;
40d7c13
-              textbin = tb;
40d7c13
-              done_on_match = out_quiet = true;
40d7c13
-              nul_zapper = eol;
40d7c13
-              skip_nuls = skip_empty_lines;
40d7c13
-            }
40d7c13
-        }
40d7c13
     }
40d7c13
   if (residue)
40d7c13
     {
40d7c13
@@ -1463,7 +1468,7 @@ grep (int fd, struct stat const *st)
40d7c13
  finish_grep:
40d7c13
   done_on_match = done_on_match_0;
40d7c13
   out_quiet = out_quiet_0;
40d7c13
-  if (textbin_is_binary (textbin) && !out_quiet && nlines != 0)
40d7c13
+  if ((has_nulls || encoding_error_output) && !out_quiet && nlines != 0)
40d7c13
     printf (_("Binary file %s matches\n"), filename);
40d7c13
   return nlines;
40d7c13
 }
40d7c13
diff --git a/src/grep.h b/src/grep.h
40d7c13
index 580eb11..2e4527c 100644
40d7c13
--- a/src/grep.h
40d7c13
+++ b/src/grep.h
40d7c13
@@ -29,22 +29,4 @@ extern bool match_words;	/* -w */
40d7c13
 extern bool match_lines;	/* -x */
40d7c13
 extern char eolbyte;		/* -z */
40d7c13
 
40d7c13
-/* An enum textbin describes the file's type, inferred from data read
40d7c13
-   before the first line is selected for output.  */
40d7c13
-enum textbin
40d7c13
-  {
40d7c13
-    /* Binary, as it contains null bytes and the -z option is not in effect,
40d7c13
-       or it contains encoding errors.  */
40d7c13
-    TEXTBIN_BINARY = -1,
40d7c13
-
40d7c13
-    /* Not known yet.  Only text has been seen so far.  */
40d7c13
-    TEXTBIN_UNKNOWN = 0,
40d7c13
-
40d7c13
-    /* Text.  */
40d7c13
-    TEXTBIN_TEXT = 1
40d7c13
-  };
40d7c13
-
40d7c13
-/* Input file type.  */
40d7c13
-extern enum textbin input_textbin;
40d7c13
-
40d7c13
 #endif
40d7c13
diff --git a/src/pcresearch.c b/src/pcresearch.c
40d7c13
index b1f8310..a446b2c 100644
40d7c13
--- a/src/pcresearch.c
40d7c13
+++ b/src/pcresearch.c
40d7c13
@@ -194,32 +194,13 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
40d7c13
      error.  */
40d7c13
   char const *subject = buf;
40d7c13
 
40d7c13
-  /* If the input type is unknown, the caller is still testing the
40d7c13
-     input, which means the current buffer cannot contain encoding
40d7c13
-     errors and a multiline search is typically more efficient.
40d7c13
-     Otherwise, a single-line search is typically faster, so that
40d7c13
-     pcre_exec doesn't waste time validating the entire input
40d7c13
-     buffer.  */
40d7c13
-  bool multiline = input_textbin == TEXTBIN_UNKNOWN;
40d7c13
-
40d7c13
   for (; p < buf + size; p = line_start = line_end + 1)
40d7c13
     {
40d7c13
-      bool too_big;
40d7c13
-
40d7c13
-      if (multiline)
40d7c13
-        {
40d7c13
-          size_t pcre_size_max = MIN (INT_MAX, SIZE_MAX - 1);
40d7c13
-          size_t scan_size = MIN (pcre_size_max + 1, buf + size - p);
40d7c13
-          line_end = memrchr (p, eolbyte, scan_size);
40d7c13
-          too_big = ! line_end;
40d7c13
-        }
40d7c13
-      else
40d7c13
-        {
40d7c13
-          line_end = memchr (p, eolbyte, buf + size - p);
40d7c13
-          too_big = INT_MAX < line_end - p;
40d7c13
-        }
40d7c13
-
40d7c13
-      if (too_big)
40d7c13
+      /* A single-line search is typically faster, so that
40d7c13
+         pcre_exec doesn't waste time validating the entire input
40d7c13
+         buffer.  */
40d7c13
+      line_end = memchr (p, eolbyte, buf + size - p);
40d7c13
+      if (INT_MAX < line_end - p)
40d7c13
         error (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit"));
40d7c13
 
40d7c13
       for (;;)
40d7c13
@@ -247,27 +228,11 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
40d7c13
           int options = 0;
40d7c13
           if (!bol)
40d7c13
             options |= PCRE_NOTBOL;
40d7c13
-          if (multiline)
40d7c13
-            options |= PCRE_NO_UTF8_CHECK;
40d7c13
 
40d7c13
           e = jit_exec (subject, line_end - subject, search_offset,
40d7c13
                         options, sub);
40d7c13
           if (e != PCRE_ERROR_BADUTF8)
40d7c13
-            {
40d7c13
-              if (0 < e && multiline && sub[1] - sub[0] != 0)
40d7c13
-                {
40d7c13
-                  char const *nl = memchr (subject + sub[0], eolbyte,
40d7c13
-                                           sub[1] - sub[0]);
40d7c13
-                  if (nl)
40d7c13
-                    {
40d7c13
-                      /* This match crosses a line boundary; reject it.  */
40d7c13
-                      p = subject + sub[0];
40d7c13
-                      line_end = nl;
40d7c13
-                      continue;
40d7c13
-                    }
40d7c13
-                }
40d7c13
-              break;
40d7c13
-            }
40d7c13
+            break;
40d7c13
           int valid_bytes = sub[0];
40d7c13
 
40d7c13
           /* Try to match the string before the encoding error.  */
40d7c13
@@ -337,15 +302,6 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
40d7c13
           beg = matchbeg;
40d7c13
           end = matchend;
40d7c13
         }
40d7c13
-      else if (multiline)
40d7c13
-        {
40d7c13
-          char const *prev_nl = memrchr (line_start - 1, eolbyte,
40d7c13
-                                         matchbeg - (line_start - 1));
40d7c13
-          char const *next_nl = memchr (matchend, eolbyte,
40d7c13
-                                        line_end + 1 - matchend);
40d7c13
-          beg = prev_nl + 1;
40d7c13
-          end = next_nl + 1;
40d7c13
-        }
40d7c13
       else
40d7c13
         {
40d7c13
           beg = line_start;
40d7c13
diff --git a/tests/Makefile.am b/tests/Makefile.am
40d7c13
index d379821..2ade5be 100644
40d7c13
--- a/tests/Makefile.am
40d7c13
+++ b/tests/Makefile.am
40d7c13
@@ -70,6 +70,7 @@ TESTS =						\
40d7c13
   empty						\
40d7c13
   empty-line					\
40d7c13
   empty-line-mb					\
40d7c13
+  encoding-error				\
40d7c13
   epipe						\
40d7c13
   equiv-classes					\
40d7c13
   ere						\
40d7c13
diff --git a/tests/Makefile.in b/tests/Makefile.in
40d7c13
index 6de6f49..b5bd7b5 100644
40d7c13
--- a/tests/Makefile.in
40d7c13
+++ b/tests/Makefile.in
40d7c13
@@ -1406,6 +1406,7 @@ TESTS = \
40d7c13
   empty						\
40d7c13
   empty-line					\
40d7c13
   empty-line-mb					\
40d7c13
+  encoding-error				\
40d7c13
   epipe						\
40d7c13
   equiv-classes					\
40d7c13
   ere						\
40d7c13
@@ -1997,6 +1998,13 @@ empty-line-mb.log: empty-line-mb
40d7c13
 	--log-file $$b.log --trs-file $$b.trs \
40d7c13
 	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
40d7c13
 	"$$tst" $(AM_TESTS_FD_REDIRECT)
40d7c13
+encoding-error.log: encoding-error
40d7c13
+	@p='encoding-error'; \
40d7c13
+	b='encoding-error'; \
40d7c13
+	$(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
40d7c13
+	--log-file $$b.log --trs-file $$b.trs \
40d7c13
+	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
40d7c13
+	"$$tst" $(AM_TESTS_FD_REDIRECT)
40d7c13
 epipe.log: epipe
40d7c13
 	@p='epipe'; \
40d7c13
 	b='epipe'; \
40d7c13
diff --git a/tests/encoding-error b/tests/encoding-error
40d7c13
new file mode 100755
40d7c13
index 0000000..fe52de2
40d7c13
--- a/dev/null
40d7c13
+++ b/tests/encoding-error
40d7c13
@@ -0,0 +1,41 @@
40d7c13
+#! /bin/sh
40d7c13
+# Test grep's behavior on encoding errors.
40d7c13
+#
40d7c13
+# Copyright 2015 Free Software Foundation, Inc.
40d7c13
+#
40d7c13
+# Copying and distribution of this file, with or without modification,
40d7c13
+# are permitted in any medium without royalty provided the copyright
40d7c13
+# notice and this notice are preserved.
40d7c13
+
40d7c13
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
40d7c13
+
40d7c13
+require_en_utf8_locale_
40d7c13
+
40d7c13
+LC_ALL=en_US.UTF-8
40d7c13
+export LC_ALL
40d7c13
+
40d7c13
+printf 'Alfred Jones\n' > a || framework_failure_
40d7c13
+printf 'John Smith\n' >j || framework_failure_
40d7c13
+printf 'Pedro P\xe9rez\n' >p || framework_failure_
40d7c13
+cat a p j >in || framework_failure_
40d7c13
+
40d7c13
+fail=0
40d7c13
+
40d7c13
+grep '^A' in >out || fail=1
40d7c13
+compare a out || fail=1
40d7c13
+
40d7c13
+grep '^P' in >out || fail=1
40d7c13
+printf 'Binary file in matches\n' >exp || framework_failure_
40d7c13
+compare exp out || fail=1
40d7c13
+
40d7c13
+grep '^J' in >out || fail=1
40d7c13
+compare j out || fail=1
40d7c13
+
40d7c13
+grep '^X' in >out
40d7c13
+test $? = 1 || fail=1
40d7c13
+compare /dev/null out || fail=1
40d7c13
+
40d7c13
+grep -a . in >out || fail=1
40d7c13
+compare in out
40d7c13
+
40d7c13
+Exit $fail