Blob Blame History Raw
To: vim_dev@googlegroups.com
Subject: Patch 7.3.427
Fcc: outbox
From: Bram Moolenaar <Bram@moolenaar.net>
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
------------

Patch 7.3.427
Problem:    readfile() can be slow with long lines.
Solution:   Use realloc() instead of alloc(). (John Little)
Files:      src/eval.c


*** ../vim-7.3.426/src/eval.c	2012-01-26 14:32:26.000000000 +0100
--- src/eval.c	2012-02-05 00:25:39.000000000 +0100
***************
*** 14325,14346 ****
      typval_T	*rettv;
  {
      int		binary = FALSE;
      char_u	*fname;
      FILE	*fd;
!     listitem_T	*li;
! #define FREAD_SIZE 200	    /* optimized for text lines */
!     char_u	buf[FREAD_SIZE];
!     int		readlen;    /* size of last fread() */
!     int		buflen;	    /* nr of valid chars in buf[] */
!     int		filtd;	    /* how much in buf[] was NUL -> '\n' filtered */
!     int		tolist;	    /* first byte in buf[] still to be put in list */
!     int		chop;	    /* how many CR to chop off */
!     char_u	*prev = NULL;	/* previously read bytes, if any */
!     int		prevlen = 0;    /* length of "prev" if not NULL */
!     char_u	*s;
!     int		len;
!     long	maxline = MAXLNUM;
!     long	cnt = 0;
  
      if (argvars[1].v_type != VAR_UNKNOWN)
      {
--- 14325,14343 ----
      typval_T	*rettv;
  {
      int		binary = FALSE;
+     int		failed = FALSE;
      char_u	*fname;
      FILE	*fd;
!     char_u	buf[(IOSIZE/256)*256];	/* rounded to avoid odd + 1 */
!     int		io_size = sizeof(buf);
!     int		readlen;		/* size of last fread() */
!     char_u	*prev	 = NULL;	/* previously read bytes, if any */
!     long	prevlen  = 0;		/* length of data in prev */
!     long	prevsize = 0;		/* size of prev buffer */
!     long	maxline  = MAXLNUM;
!     long	cnt	 = 0;
!     char_u	*p;			/* position in buf */
!     char_u	*start;			/* start of current line */
  
      if (argvars[1].v_type != VAR_UNKNOWN)
      {
***************
*** 14362,14410 ****
  	return;
      }
  
-     filtd = 0;
      while (cnt < maxline || maxline < 0)
      {
! 	readlen = (int)fread(buf + filtd, 1, FREAD_SIZE - filtd, fd);
! 	buflen = filtd + readlen;
! 	tolist = 0;
! 	for ( ; filtd < buflen || readlen <= 0; ++filtd)
! 	{
! 	    if (readlen <= 0 || buf[filtd] == '\n')
! 	    {
! 		/* In binary mode add an empty list item when the last
! 		 * non-empty line ends in a '\n'. */
! 		if (!binary && readlen == 0 && filtd == 0 && prev == NULL)
! 		    break;
  
! 		/* Found end-of-line or end-of-file: add a text line to the
! 		 * list. */
! 		chop = 0;
! 		if (!binary)
! 		    while (filtd - chop - 1 >= tolist
! 					  && buf[filtd - chop - 1] == '\r')
! 			++chop;
! 		len = filtd - tolist - chop;
! 		if (prev == NULL)
! 		    s = vim_strnsave(buf + tolist, len);
  		else
  		{
! 		    s = alloc((unsigned)(prevlen + len + 1));
! 		    if (s != NULL)
  		    {
! 			mch_memmove(s, prev, prevlen);
! 			vim_free(prev);
! 			prev = NULL;
! 			mch_memmove(s + prevlen, buf + tolist, len);
  			s[prevlen + len] = NUL;
  		    }
  		}
! 		tolist = filtd + 1;
  
! 		li = listitem_alloc();
! 		if (li == NULL)
  		{
  		    vim_free(s);
  		    break;
  		}
  		li->li_tv.v_type = VAR_STRING;
--- 14359,14419 ----
  	return;
      }
  
      while (cnt < maxline || maxline < 0)
      {
! 	readlen = (int)fread(buf, 1, io_size, fd);
! 
! 	/* This for loop processes what was read, but is also entered at end
! 	 * of file so that either:
! 	 * - an incomplete line gets written
! 	 * - a "binary" file gets an empty line at the end if it ends in a
! 	 *   newline.  */
! 	for (p = buf, start = buf;
! 		p < buf + readlen || (readlen <= 0 && (prevlen > 0 || binary));
! 		++p)
! 	{
! 	    if (*p == '\n' || readlen <= 0)
! 	    {
! 		listitem_T  *li;
! 		char_u	    *s	= NULL;
! 		long_u	    len = p - start;
  
! 		/* Finished a line.  Remove CRs before NL. */
! 		if (readlen > 0 && !binary)
! 		{
! 		    while (len > 0 && start[len - 1] == '\r')
! 			--len;
! 		    /* removal may cross back to the "prev" string */
! 		    if (len == 0)
! 			while (prevlen > 0 && prev[prevlen - 1] == '\r')
! 			    --prevlen;
! 		}
! 		if (prevlen == 0)
! 		    s = vim_strnsave(start, len);
  		else
  		{
! 		    /* Change "prev" buffer to be the right size.  This way
! 		     * the bytes are only copied once, and very long lines are
! 		     * allocated only once.  */
! 		    if ((s = vim_realloc(prev, prevlen + len + 1)) != NULL)
  		    {
! 			mch_memmove(s + prevlen, start, len);
  			s[prevlen + len] = NUL;
+ 			prev = NULL; /* the list will own the string */
+ 			prevlen = prevsize = 0;
  		    }
  		}
! 		if (s == NULL)
! 		{
! 		    do_outofmem_msg((long_u) prevlen + len + 1);
! 		    failed = TRUE;
! 		    break;
! 		}
  
! 		if ((li = listitem_alloc()) == NULL)
  		{
  		    vim_free(s);
+ 		    failed = TRUE;
  		    break;
  		}
  		li->li_tv.v_type = VAR_STRING;
***************
*** 14412,14485 ****
  		li->li_tv.vval.v_string = s;
  		list_append(rettv->vval.v_list, li);
  
! 		if (++cnt >= maxline && maxline >= 0)
! 		    break;
! 		if (readlen <= 0)
  		    break;
  	    }
! 	    else if (buf[filtd] == NUL)
! 		buf[filtd] = '\n';
  #ifdef FEAT_MBYTE
! 	    else if (buf[filtd] == 0xef
! 		    && enc_utf8
! 		    && filtd + 2 < buflen
! 		    && !binary
! 		    && buf[filtd + 1] == 0xbb
! 		    && buf[filtd + 2] == 0xbf)
! 	    {
! 		/* remove utf-8 byte order mark */
! 		mch_memmove(buf + filtd, buf + filtd + 3, buflen - filtd - 3);
! 		--filtd;
! 		buflen -= 3;
  	    }
  #endif
! 	}
! 	if (readlen <= 0)
! 	    break;
  
! 	if (tolist == 0)
  	{
! 	    if (buflen >= FREAD_SIZE / 2)
  	    {
! 		/* "buf" is full, need to move text to an allocated buffer */
! 		if (prev == NULL)
  		{
! 		    prev = vim_strnsave(buf, buflen);
! 		    prevlen = buflen;
  		}
! 		else
  		{
! 		    s = alloc((unsigned)(prevlen + buflen));
! 		    if (s != NULL)
! 		    {
! 			mch_memmove(s, prev, prevlen);
! 			mch_memmove(s + prevlen, buf, buflen);
! 			vim_free(prev);
! 			prev = s;
! 			prevlen += buflen;
! 		    }
  		}
! 		filtd = 0;
  	    }
  	}
! 	else
! 	{
! 	    mch_memmove(buf, buf + tolist, buflen - tolist);
! 	    filtd -= tolist;
! 	}
!     }
  
      /*
       * For a negative line count use only the lines at the end of the file,
       * free the rest.
       */
!     if (maxline < 0)
  	while (cnt > -maxline)
  	{
  	    listitem_remove(rettv->vval.v_list, rettv->vval.v_list->lv_first);
  	    --cnt;
  	}
  
      vim_free(prev);
      fclose(fd);
  }
--- 14421,14529 ----
  		li->li_tv.vval.v_string = s;
  		list_append(rettv->vval.v_list, li);
  
! 		start = p + 1; /* step over newline */
! 		if ((++cnt >= maxline && maxline >= 0) || readlen <= 0)
  		    break;
  	    }
! 	    else if (*p == NUL)
! 		*p = '\n';
  #ifdef FEAT_MBYTE
! 	    /* Check for utf8 "bom"; U+FEFF is encoded as EF BB BF.  Do this
! 	     * when finding the BF and check the previous two bytes. */
! 	    else if (*p == 0xbf && enc_utf8 && !binary)
! 	    {
! 		/* Find the two bytes before the 0xbf.	If p is at buf, or buf
! 		 * + 1, these may be in the "prev" string. */
! 		char_u back1 = p >= buf + 1 ? p[-1]
! 				     : prevlen >= 1 ? prev[prevlen - 1] : NUL;
! 		char_u back2 = p >= buf + 2 ? p[-2]
! 			  : p == buf + 1 && prevlen >= 1 ? prev[prevlen - 1]
! 			  : prevlen >= 2 ? prev[prevlen - 2] : NUL;
! 
! 		if (back2 == 0xef && back1 == 0xbb)
! 		{
! 		    char_u *dest = p - 2;
! 
! 		    /* Usually a BOM is at the beginning of a file, and so at
! 		     * the beginning of a line; then we can just step over it.
! 		     */
! 		    if (start == dest)
! 			start = p + 1;
! 		    else
! 		    {
! 			/* have to shuffle buf to close gap */
! 			int adjust_prevlen = 0;
! 
! 			if (dest < buf)
! 			{
! 			    adjust_prevlen = buf - dest; /* must be 1 or 2 */
! 			    dest = buf;
! 			}
! 			if (readlen > p - buf + 1)
! 			    mch_memmove(dest, p + 1, readlen - (p - buf) - 1);
! 			readlen -= 3 - adjust_prevlen;
! 			prevlen -= adjust_prevlen;
! 			p = dest - 1;
! 		    }
! 		}
  	    }
  #endif
! 	} /* for */
  
! 	if (failed || (cnt >= maxline && maxline >= 0) || readlen <= 0)
! 	    break;
! 	if (start < p)
  	{
! 	    /* There's part of a line in buf, store it in "prev". */
! 	    if (p - start + prevlen >= prevsize)
  	    {
! 		/* need bigger "prev" buffer */
! 		char_u *newprev;
! 
! 		/* A common use case is ordinary text files and "prev" gets a
! 		 * fragment of a line, so the first allocation is made
! 		 * small, to avoid repeatedly 'allocing' large and
! 		 * 'reallocing' small. */
! 		if (prevsize == 0)
! 		    prevsize = p - start;
! 		else
  		{
! 		    long grow50pc = (prevsize * 3) / 2;
! 		    long growmin  = (p - start) * 2 + prevlen;
! 		    prevsize = grow50pc > growmin ? grow50pc : growmin;
  		}
! 		if ((newprev = vim_realloc(prev, prevsize)) == NULL)
  		{
! 		    do_outofmem_msg((long_u)prevsize);
! 		    failed = TRUE;
! 		    break;
  		}
! 		prev = newprev;
  	    }
+ 	    /* Add the line part to end of "prev". */
+ 	    mch_memmove(prev + prevlen, start, p - start);
+ 	    prevlen += p - start;
  	}
!     } /* while */
  
      /*
       * For a negative line count use only the lines at the end of the file,
       * free the rest.
       */
!     if (!failed && maxline < 0)
  	while (cnt > -maxline)
  	{
  	    listitem_remove(rettv->vval.v_list, rettv->vval.v_list->lv_first);
  	    --cnt;
  	}
  
+     if (failed)
+     {
+ 	list_free(rettv->vval.v_list, TRUE);
+ 	/* readfile doc says an empty list is returned on error */
+ 	rettv->vval.v_list = list_alloc();
+     }
+ 
      vim_free(prev);
      fclose(fd);
  }
***************
*** 21802,21808 ****
  	fp = HI2UF(hi);
  
  	if (fp->uf_flags & FC_DICT)
! 	    return ""; /* don't show dict functions */
  
  	if (STRLEN(fp->uf_name) + 4 >= IOSIZE)
  	    return fp->uf_name;	/* prevents overflow */
--- 21846,21852 ----
  	fp = HI2UF(hi);
  
  	if (fp->uf_flags & FC_DICT)
! 	    return (char_u *)""; /* don't show dict functions */
  
  	if (STRLEN(fp->uf_name) + 4 >= IOSIZE)
  	    return fp->uf_name;	/* prevents overflow */
*** ../vim-7.3.426/src/version.c	2012-02-04 23:34:57.000000000 +0100
--- src/version.c	2012-02-05 00:38:34.000000000 +0100
***************
*** 716,717 ****
--- 716,719 ----
  {   /* Add new patch number below this line */
+ /**/
+     427,
  /**/

-- 
One difference between a man and a machine is that a machine is quiet
when well oiled.

 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///