diff -urNp coreutils-7.2-orig/lib/gnulib.mk coreutils-7.2/lib/gnulib.mk --- coreutils-7.2-orig/lib/gnulib.mk 2009-03-26 09:12:33.000000000 +0100 +++ coreutils-7.2/lib/gnulib.mk 2009-09-08 10:08:09.000000000 +0200 @@ -1600,6 +1600,12 @@ EXTRA_DIST += str-kmp.h ## end gnulib module mbsstr +## begin gnulib module mbsalign + +libcoreutils_a_SOURCES += mbsalign.c mbsalign.h + +## end gnulib module mbsalign + ## begin gnulib module mbswidth libcoreutils_a_SOURCES += mbswidth.h mbswidth.c diff -urNp coreutils-7.2-orig/lib/mbsalign.c coreutils-7.2/lib/mbsalign.c --- coreutils-7.2-orig/lib/mbsalign.c 1970-01-01 01:00:00.000000000 +0100 +++ coreutils-7.2/lib/mbsalign.c 2009-08-15 17:25:32.000000000 +0200 @@ -0,0 +1,236 @@ +/* Align/Truncate a string in a given screen width + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Pádraig Brady. */ + +#include +#include "mbsalign.h" + +#include +#include +#include +#include +#include +#include +#include + +#ifndef MIN +# define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif + +/* Replace non printable chars. + Return 1 if replacement made, 0 otherwise. */ + +static bool +wc_ensure_printable (wchar_t *wchars) +{ + bool replaced = false; + wchar_t *wc = wchars; + while (*wc) + { + if (!iswprint ((wint_t) *wc)) + { + *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */ + replaced = true; + } + wc++; + } + return replaced; +} + +/* Truncate wchar string to width cells. + * Returns number of cells used. */ + +static size_t +wc_truncate (wchar_t *wc, size_t width) +{ + size_t cells = 0; + int next_cells = 0; + + while (*wc) + { + next_cells = wcwidth (*wc); + if (next_cells == -1) /* non printable */ + { + *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */ + next_cells = 1; + } + if (cells + next_cells > width) + break; + cells += next_cells; + wc++; + } + *wc = L'\0'; + return cells; +} + +/* FIXME: move this function to gnulib as it's missing on: + OpenBSD 3.8, IRIX 5.3, Solaris 2.5.1, mingw, BeOS */ + +static int +rpl_wcswidth (const wchar_t *s, size_t n) +{ + int ret = 0; + + while (n-- > 0 && *s != L'\0') + { + int nwidth = wcwidth (*s++); + if (nwidth == -1) /* non printable */ + return -1; + if (ret > (INT_MAX - nwidth)) /* overflow */ + return -1; + ret += nwidth; + } + + return ret; +} + +/* Write N_SPACES space characters to DEST while ensuring + nothing is written beyond DEST_END. A terminating NUL + is always added to DEST. + A pointer to the terminating NUL is returned. */ + +static char* +mbs_align_pad (char *dest, const char* dest_end, size_t n_spaces) +{ + /* FIXME: Should we pad with "figure space" (\u2007) + if non ascii data present? */ + while (n_spaces-- && (dest < dest_end)) + *dest++ = ' '; + *dest = '\0'; + return dest; +} + +/* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte + characters; write the result into the DEST_SIZE-byte buffer, DEST. + ALIGNMENT specifies whether to left- or right-justify or to center. + If SRC requires more than *WIDTH columns, truncate it to fit. + When centering, the number of trailing spaces may be one less than the + number of leading spaces. The FLAGS parameter is unused at present. + Return the length in bytes required for the final result, not counting + the trailing NUL. A return value of DEST_SIZE or larger means there + wasn't enough space. DEST will be NUL terminated in any case. + Return (size_t) -1 upon error (invalid multi-byte sequence in SRC, + or malloc failure). + Update *WIDTH to indicate how many columns were used before padding. */ + +size_t +mbsalign (const char *src, char *dest, size_t dest_size, + size_t *width, mbs_align_t align, int flags) +{ + size_t ret = -1; + size_t src_size = strlen (src) + 1; + char *newstr = NULL; + wchar_t *str_wc = NULL; + const char *str_to_print = src; + size_t n_cols = src_size - 1; + size_t n_used_bytes = n_cols; /* Not including NUL */ + size_t n_spaces = 0; + bool conversion = false; + bool wc_enabled = false; + + /* In multi-byte locales convert to wide characters + to allow easy truncation. Also determine number + of screen columns used. */ + if (MB_CUR_MAX > 1) + { + size_t src_chars = mbstowcs (NULL, src, 0); + if (src_chars == (size_t) -1) + goto mbsalign_cleanup; + src_chars += 1; /* make space for NUL */ + str_wc = malloc (src_chars * sizeof (wchar_t)); + if (str_wc == NULL) + goto mbsalign_cleanup; + if (mbstowcs (str_wc, src, src_chars) > 0) + { + str_wc[src_chars - 1] = L'\0'; + wc_enabled = true; + conversion = wc_ensure_printable (str_wc); + n_cols = rpl_wcswidth (str_wc, src_chars); + } + } + + /* If we transformed or need to truncate the source string + then create a modified copy of it. */ + if (conversion || (n_cols > *width)) + { + newstr = malloc (src_size); + if (newstr == NULL) + goto mbsalign_cleanup; + str_to_print = newstr; + if (wc_enabled) + { + n_cols = wc_truncate (str_wc, *width); + n_used_bytes = wcstombs (newstr, str_wc, src_size); + } + else + { + n_cols = *width; + n_used_bytes = n_cols; + memcpy (newstr, src, n_cols); + newstr[n_cols] = '\0'; + } + } + + if (*width > n_cols) + n_spaces = *width - n_cols; + + /* indicate to caller how many cells needed (not including padding). */ + *width = n_cols; + + /* indicate to caller how many bytes needed (not including NUL). */ + ret = n_used_bytes + (n_spaces * 1); + + /* Write as much NUL terminated output to DEST as possible. */ + if (dest_size != 0) + { + char *dest_end = dest + dest_size - 1; + size_t start_spaces = n_spaces / 2 + n_spaces % 2; + size_t end_spaces = n_spaces / 2; + + switch (align) + { + case MBS_ALIGN_CENTER: + start_spaces = n_spaces / 2 + n_spaces % 2; + end_spaces = n_spaces / 2; + break; + case MBS_ALIGN_LEFT: + start_spaces = 0; + end_spaces = n_spaces; + break; + case MBS_ALIGN_RIGHT: + start_spaces = n_spaces; + end_spaces = 0; + break; + } + + dest = mbs_align_pad (dest, dest_end, start_spaces); + dest = mempcpy(dest, str_to_print, MIN (n_used_bytes, dest_end - dest)); + dest = mbs_align_pad (dest, dest_end, end_spaces); + } + +mbsalign_cleanup: + + free (str_wc); + free (newstr); + + return ret; +} +/* + * Local variables: + * indent-tabs-mode: nil + * End: + */ diff -urNp coreutils-7.2-orig/lib/mbsalign.h coreutils-7.2/lib/mbsalign.h --- coreutils-7.2-orig/lib/mbsalign.h 1970-01-01 01:00:00.000000000 +0100 +++ coreutils-7.2/lib/mbsalign.h 2009-08-15 17:25:32.000000000 +0200 @@ -0,0 +1,23 @@ +/* Align/Truncate a string in a given screen width + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +typedef enum { MBS_ALIGN_LEFT, MBS_ALIGN_RIGHT, MBS_ALIGN_CENTER } mbs_align_t; + +size_t +mbsalign (const char *src, char *dest, size_t dest_size, + size_t *width, mbs_align_t align, int flags); diff -urNp coreutils-7.2-orig/src/ls.c coreutils-7.2/src/ls.c --- coreutils-7.2-orig/src/ls.c 2009-09-08 10:00:39.000000000 +0200 +++ coreutils-7.2/src/ls.c 2009-09-08 10:13:41.000000000 +0200 @@ -63,6 +63,10 @@ #include #include +#if HAVE_LANGINFO_CODESET +# include +#endif + /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is present. */ #ifndef SA_NOCLDSTOP @@ -105,6 +109,7 @@ #include "strftime.h" #include "xstrtol.h" #include "areadlink.h" +#include "mbsalign.h" #define PROGRAM_NAME (ls_mode == LS_LS ? "ls" \ : (ls_mode == LS_MULTI_COL \ @@ -719,6 +724,11 @@ static char const *long_time_format[2] = screen columns small, because many people work in windows with only 80 columns. But make this as wide as the other string below, for recent files. */ + /* TRANSLATORS: ls output needs to be aligned for ease of reading, + so be wary of using variable width fields from the locale. + Note %b is handled specially by ls and aligned correctly. + Note also that specifying a width as in %5b is erroneous as strftime + will count bytes rather than characters in multibyte locales. */ N_("%b %e %Y"), /* strftime format for recent files (younger than 6 months), in -l output. This should contain the month, day and time (at @@ -727,6 +737,11 @@ static char const *long_time_format[2] = screen columns small, because many people work in windows with only 80 columns. But make this as wide as the other string above, for non-recent files. */ + /* TRANSLATORS: ls output needs to be aligned for ease of reading, + so be wary of using variable width fields from the locale. + Note %b is handled specially by ls and aligned correctly. + Note also that specifying a width as in %5b is erroneous as strftime + will count bytes rather than characters in multibyte locales. */ N_("%b %e %H:%M") }; @@ -1007,6 +1022,57 @@ dired_dump_obstack (const char *prefix, } } +/* Read the abbreviated month names from the locale, to align them + and to determine the max width of the field and to truncate names + greater than our max allowed. + Note even though this handles multibyte locales correctly + it's not restricted to them as single byte locales can have + variable width abbreviated months and also precomputing/caching + the names was seen to increase the performance of ls significantly. */ + +/* max number of display cells to use */ +enum { MAX_MON_WIDTH = 5 }; +/* In the unlikely event that the abmon[] storage is not big enough + an error message will be displayed, and we revert to using + unmodified abbreviated month names from the locale database. */ +static char abmon[12][MAX_MON_WIDTH * 2 * MB_LEN_MAX + 1]; +/* minimum width needed to align %b, 0 => don't use precomputed values. */ +static size_t required_mon_width; + +static size_t +abmon_init (void) +{ +#ifdef HAVE_NL_LANGINFO + required_mon_width = MAX_MON_WIDTH; + size_t curr_max_width; + do + { + curr_max_width = required_mon_width; + required_mon_width = 0; + for (int i = 0; i < 12; i++) + { + size_t width = curr_max_width; + + int req = mbsalign (nl_langinfo (ABMON_1 + i), + abmon[i], sizeof (abmon[i]), + &width, MBS_ALIGN_LEFT, 0); + + if (req == -1 || req >= sizeof(abmon[i])) + { + required_mon_width = 0; /* ignore precomputed strings. */ + return required_mon_width; + } + + required_mon_width = MAX (required_mon_width, width); + } + } + while (curr_max_width > required_mon_width); +#endif + + return required_mon_width; +} + + static size_t dev_ino_hash (void const *x, size_t table_size) { @@ -1997,6 +2063,10 @@ decode_switches (int argc, char **argv) } } } + /* Note we leave %5b etc. alone so user widths/flags are honoured. */ + if (strstr(long_time_format[0],"%b") || strstr(long_time_format[1],"%b")) + if (!abmon_init()) + error (0, 0, _("error initializing month strings")); } return optind; @@ -3375,6 +3445,35 @@ print_current_files (void) } } +/* Replace the first %b with precomputed aligned month names. + Note on glibc-2.7 on linux at least this speeds up the whole `ls -lU` + process by around 17%, compared to letting strftime() handle the %b. */ + +static size_t +align_nstrftime (char *src, size_t size, char const *fmt, struct tm const *tm, + int __utc, int __ns) +{ + const char *nfmt = fmt; + /* In the unlikely event that rpl_fmt below is not large enough, + the replacement is not done. A malloc here slows ls down by 2% */ + char rpl_fmt[sizeof (abmon[0]) + 100]; + char *pb = NULL; + if (required_mon_width && (pb = strstr (fmt, "%b"))) + { + if (strlen(fmt) < (sizeof (rpl_fmt) - sizeof (abmon[0]) + 2)) + { + char *pfmt = rpl_fmt; + nfmt = rpl_fmt; + + pfmt = mempcpy (pfmt, fmt, pb - fmt); + pfmt = stpcpy (pfmt, abmon[tm->tm_mon]); + strcpy (pfmt, pb + 2); + } + } + size_t ret = nstrftime (src, size, nfmt, tm, __utc, __ns); + return ret; +} + /* Return the expected number of columns in a long-format time stamp, or zero if it cannot be calculated. */ @@ -3399,7 +3498,7 @@ long_time_expected_width (void) if (tm) { size_t len = - nstrftime (buf, sizeof buf, long_time_format[0], tm, 0, 0); + align_nstrftime (buf, sizeof buf, long_time_format[0], tm, 0, 0); if (len != 0) width = mbsnwidth (buf, len, 0); } @@ -3740,8 +3839,8 @@ print_long_format (const struct fileinfo /* We assume here that all time zones are offset from UTC by a whole number of seconds. */ - s = nstrftime (p, TIME_STAMP_LEN_MAXIMUM + 1, fmt, - when_local, 0, when_timespec.tv_nsec); + s = align_nstrftime (p, TIME_STAMP_LEN_MAXIMUM + 1, fmt, + when_local, 0, when_timespec.tv_nsec); } if (s || !*p)