diff options
Diffstat (limited to 'yocto-poky/meta/recipes-extended/grep')
12 files changed, 1840 insertions, 0 deletions
diff --git a/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/Makevars b/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/Makevars new file mode 100644 index 000000000..8b09f53b0 --- /dev/null +++ b/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/Makevars @@ -0,0 +1,25 @@ +# Makefile variables for PO directory in any package using GNU gettext. + +# Usually the message domain is the same as the package name. +DOMAIN = $(PACKAGE) + +# These two variables depend on the location of this directory. +subdir = po +top_builddir = .. + +# These options get passed to xgettext. +XGETTEXT_OPTIONS = --keyword=_ --keyword=N_ + +# This is the copyright holder that gets inserted into the header of the +# $(DOMAIN).pot file. Set this to the copyright holder of the surrounding +# package. (Note that the msgstr strings, extracted from the package's +# sources, belong to the copyright holder of the package.) Translators are +# expected to transfer the copyright for their translations to this person +# or entity, or to disclaim their copyright. The empty string stands for +# the public domain; in this case the translators are expected to disclaim +# their copyright. +COPYRIGHT_HOLDER = Free Software Foundation, Inc. + +# This is the list of locale categories, beyond LC_MESSAGES, for which the +# message catalogs shall be used. It is usually empty. +EXTRA_LOCALE_CATEGORIES = diff --git a/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/fix-for-texinfo-5.1.patch b/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/fix-for-texinfo-5.1.patch new file mode 100644 index 000000000..5a4149cfc --- /dev/null +++ b/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/fix-for-texinfo-5.1.patch @@ -0,0 +1,17 @@ +Upstream-Status: Inappropriate [Poky Specific this is gplv2 version] + +Signed-off-by Saul Wold <sgw@linux.intel.com> + +Index: grep-2.5.1a/doc/grep.texi +=================================================================== +--- grep-2.5.1a.orig/doc/grep.texi ++++ grep-2.5.1a/doc/grep.texi +@@ -288,7 +288,7 @@ This version number should be included i + Print a usage message briefly summarizing these command-line options + and the bug-reporting address, then exit. + +-@itemx --binary-files=@var{type} ++@item --binary-files=@var{type} + @opindex --binary-files + @cindex binary files + If the first few bytes of a file indicate that the file contains binary diff --git a/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/fix64-int-to-pointer.patch b/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/fix64-int-to-pointer.patch new file mode 100644 index 000000000..3b91520fb --- /dev/null +++ b/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/fix64-int-to-pointer.patch @@ -0,0 +1,17 @@ +Always use locale.h as HAVE_LOCALE_H is no longer handled by ./configure +Upstream-Status: Inappropriate [ old version that will not be maintained ] +Signed-off-by: Alex DAMIAN <alexandru.damian@intel.com> + +diff --recursive --unified grep-2.5.1a-orig/lib/hard-locale.c grep-2.5.1a/lib/hard-locale.c +--- grep-2.5.1a-orig/lib/hard-locale.c 2001-03-04 07:33:12.000000000 +0200 ++++ grep-2.5.1a/lib/hard-locale.c 2013-03-11 17:05:52.086444891 +0200 +@@ -38,9 +38,7 @@ + # endif + #endif + +-#if HAVE_LOCALE_H + # include <locale.h> +-#endif + + #if HAVE_STRING_H + # include <string.h> diff --git a/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/gettext.patch b/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/gettext.patch new file mode 100644 index 000000000..57463355a --- /dev/null +++ b/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/gettext.patch @@ -0,0 +1,15 @@ +Enable operation with later versions of gettext. + +Upstream-Status: Inappropriate +RP 2012/10/19 + +Index: grep-2.5.1a/configure.in +=================================================================== +--- grep-2.5.1a.orig/configure.in 2012-10-19 12:57:51.646970204 +0000 ++++ grep-2.5.1a/configure.in 2012-10-19 12:59:49.946968803 +0000 +@@ -140,4 +140,4 @@ + AC_CHECK_LIB(pcre, pcre_exec) + fi + +-AC_OUTPUT(Makefile lib/Makefile lib/posix/Makefile src/Makefile tests/Makefile po/Makefile.in intl/Makefile doc/Makefile m4/Makefile vms/Makefile bootstrap/Makefile, [sed -e "/POTFILES =/r po/POTFILES" po/Makefile.in > po/Makefile; echo timestamp > stamp-h]) ++AC_OUTPUT(Makefile lib/Makefile lib/posix/Makefile src/Makefile tests/Makefile po/Makefile.in intl/Makefile doc/Makefile m4/Makefile vms/Makefile bootstrap/Makefile, [echo timestamp > stamp-h]) diff --git a/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/grep-CVE-2012-5667.patch b/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/grep-CVE-2012-5667.patch new file mode 100644 index 000000000..059d0687b --- /dev/null +++ b/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/grep-CVE-2012-5667.patch @@ -0,0 +1,32 @@ +The patch to fix CVE-2012-5667 +Reference: https://bugzilla.redhat.com/attachment.cgi?id=686605&action=diff + +Multiple integer overflows in GNU Grep before 2.11 might allow +context-dependent attackers to execute arbitrary code via vectors +involving a long input line that triggers a heap-based buffer overflow. + +http://web.nvd.nist.gov/view/vuln/detail?vulnId=CVE-2012-5667 + +Upstream-Status: Inappropriate [other] +This version of GNU Grep has been abandoned upstream and they are no longer +accepting patches. This is not a backport. + +Signed-off-by Ming Liu <ming.liu@windriver.com> +--- + grep.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +--- a/src/grep.c 2013-05-15 13:39:33.359191769 +0800 ++++ a/src/grep.c 2013-05-15 13:50:22.609191882 +0800 +@@ -306,6 +306,11 @@ fillbuf (size_t save, struct stats const + int cc = 1; + char *readbuf; + size_t readsize; ++ const size_t max_save = INT_MAX / 2; ++ ++ /* Limit the amount of saved data to INT_MAX to fix CVE-2012-5667 */ ++ if (save > max_save) ++ error (2, 0, _("line too long")); + + /* Offset from start of buffer to start of old stuff + that we want to save. */ diff --git a/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/grep-egrep-fgrep-Fix-LSB-NG-cases.patch b/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/grep-egrep-fgrep-Fix-LSB-NG-cases.patch new file mode 100644 index 000000000..327ee5640 --- /dev/null +++ b/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/grep-egrep-fgrep-Fix-LSB-NG-cases.patch @@ -0,0 +1,1342 @@ +From c884dd12ec062569335702848fc5f29f436c28fa Mon Sep 17 00:00:00 2001 +From: Li xin <lixin.fnst@cn.fujitsu.com> +Date: Mon, 25 May 2015 10:15:57 +0900 +Subject: [PATCH] grep egrep fgrep: Fix LSB NG cases. + +The LSB core test requires grep egrep and fgrep can +perform pattern matching in searches without regard +to case if -i option is specified. + +Upstream-Status: backport. + +Signed-off-by: Li Xin <lixin.fnst@cn.fujitsu.com> +--- + lib/posix/regex.h | 4 + + src/dfa.c | 22 +- + src/grep.c | 96 ++++--- + src/search.c | 833 +++++++++++++++++++++++++++++++++++++++++++++--------- + 4 files changed, 768 insertions(+), 187 deletions(-) + +diff --git a/lib/posix/regex.h b/lib/posix/regex.h +index 63c2fef..7bb2b0e 100644 +--- a/lib/posix/regex.h ++++ b/lib/posix/regex.h +@@ -109,6 +109,10 @@ typedef unsigned long int reg_syntax_t; + If not set, \{, \}, {, and } are literals. */ + #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) + ++/* If this bit is set, then ignore case when matching. ++ If not set, then case is significant. */ ++#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) ++ + /* If this bit is set, +, ? and | aren't recognized as operators. + If not set, they are. */ + #define RE_LIMITED_OPS (RE_INTERVALS << 1) +diff --git a/src/dfa.c b/src/dfa.c +index 590bfa7..27c876a 100644 +--- a/src/dfa.c ++++ b/src/dfa.c +@@ -414,7 +414,7 @@ update_mb_len_index (unsigned char const *p, int len) + + /* This function fetch a wide character, and update cur_mb_len, + used only if the current locale is a multibyte environment. */ +-static wchar_t ++static wint_t + fetch_wc (char const *eoferr) + { + wchar_t wc; +@@ -423,7 +423,7 @@ fetch_wc (char const *eoferr) + if (eoferr != 0) + dfaerror (eoferr); + else +- return -1; ++ return WEOF; + } + + cur_mb_len = mbrtowc(&wc, lexptr, lexleft, &mbs); +@@ -459,7 +459,7 @@ fetch_wc (char const *eoferr) + static void + parse_bracket_exp_mb () + { +- wchar_t wc, wc1, wc2; ++ wint_t wc, wc1, wc2; + + /* Work area to build a mb_char_classes. */ + struct mb_char_classes *work_mbc; +@@ -496,7 +496,7 @@ parse_bracket_exp_mb () + work_mbc->invert = 0; + do + { +- wc1 = -1; /* mark wc1 is not initialized". */ ++ wc1 = WEOF; /* mark wc1 is not initialized". */ + + /* Note that if we're looking at some other [:...:] construct, + we just treat it as a bunch of ordinary characters. We can do +@@ -586,7 +586,7 @@ parse_bracket_exp_mb () + work_mbc->coll_elems[work_mbc->ncoll_elems++] = elem; + } + } +- wc = -1; ++ wc1 = wc = WEOF; + } + else + /* We treat '[' as a normal character here. */ +@@ -600,7 +600,7 @@ parse_bracket_exp_mb () + wc = fetch_wc(("Unbalanced [")); + } + +- if (wc1 == -1) ++ if (wc1 == WEOF) + wc1 = fetch_wc(_("Unbalanced [")); + + if (wc1 == L'-') +@@ -630,17 +630,17 @@ parse_bracket_exp_mb () + } + REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t, + range_sts_al, work_mbc->nranges + 1); +- work_mbc->range_sts[work_mbc->nranges] = wc; ++ work_mbc->range_sts[work_mbc->nranges] = (wchar_t)wc; + REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t, + range_ends_al, work_mbc->nranges + 1); +- work_mbc->range_ends[work_mbc->nranges++] = wc2; ++ work_mbc->range_ends[work_mbc->nranges++] = (wchar_t)wc2; + } +- else if (wc != -1) ++ else if (wc != WEOF) + /* build normal characters. */ + { + REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al, + work_mbc->nchars + 1); +- work_mbc->chars[work_mbc->nchars++] = wc; ++ work_mbc->chars[work_mbc->nchars++] = (wchar_t)wc; + } + } + while ((wc = wc1) != L']'); +@@ -2552,6 +2552,8 @@ match_mb_charset (struct dfa *d, int s, position pos, int index) + } + + /* match with a character? */ ++ if (case_fold) ++ wc = towlower (wc); + for (i = 0; i<work_mbc->nchars; i++) + { + if (wc == work_mbc->chars[i]) +diff --git a/src/grep.c b/src/grep.c +index 2fb2fac..3fd4b47 100644 +--- a/src/grep.c ++++ b/src/grep.c +@@ -30,6 +30,12 @@ + # include <sys/time.h> + # include <sys/resource.h> + #endif ++#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC ++/* We can handle multibyte string. */ ++# define MBS_SUPPORT ++# include <wchar.h> ++# include <wctype.h> ++#endif + #include <stdio.h> + #include "system.h" + #include "getopt.h" +@@ -255,19 +261,6 @@ reset (int fd, char const *file, struct stats *stats) + bufbeg[-1] = eolbyte; + bufdesc = fd; + +- if (fstat (fd, &stats->stat) != 0) +- { +- error (0, errno, "fstat"); +- return 0; +- } +- if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode)) +- return 0; +-#ifndef DJGPP +- if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode) || S_ISSOCK(stats->stat.st_mode))) +-#else +- if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode))) +-#endif +- return 0; + if (S_ISREG (stats->stat.st_mode)) + { + if (file) +@@ -558,33 +551,6 @@ prline (char const *beg, char const *lim, int sep) + { + size_t match_size; + size_t match_offset; +- if(match_icase) +- { +- /* Yuck, this is tricky */ +- char *buf = (char*) xmalloc (lim - beg); +- char *ibeg = buf; +- char *ilim = ibeg + (lim - beg); +- int i; +- for (i = 0; i < lim - beg; i++) +- ibeg[i] = tolower (beg[i]); +- while ((match_offset = (*execute) (ibeg, ilim-ibeg, &match_size, 1)) +- != (size_t) -1) +- { +- char const *b = beg + match_offset; +- if (b == lim) +- break; +- fwrite (beg, sizeof (char), match_offset, stdout); +- printf ("\33[%sm", grep_color); +- fwrite (b, sizeof (char), match_size, stdout); +- fputs ("\33[00m", stdout); +- beg = b + match_size; +- ibeg = ibeg + match_offset + match_size; +- } +- fwrite (beg, 1, lim - beg, stdout); +- free (buf); +- lastout = lim; +- return; +- } + while (lim-beg && (match_offset = (*execute) (beg, lim - beg, &match_size, 1)) + != (size_t) -1) + { +@@ -601,6 +567,7 @@ prline (char const *beg, char const *lim, int sep) + fputs ("\33[00m", stdout); + beg = b + match_size; + } ++ fputs ("\33[K", stdout); + } + fwrite (beg, 1, lim - beg, stdout); + if (ferror (stdout)) +@@ -623,7 +590,7 @@ prpending (char const *lim) + size_t match_size; + --pending; + if (outleft +- || (((*execute) (lastout, nl - lastout, &match_size, 0) == (size_t) -1) ++ || (((*execute) (lastout, nl + 1 - lastout, &match_size, 0) == (size_t) -1) + == !out_invert)) + prline (lastout, nl + 1, '-'); + else +@@ -895,6 +862,19 @@ grepfile (char const *file, struct stats *stats) + } + else + { ++ if (stat (file, &stats->stat) != 0) ++ { ++ suppressible_error (file, errno); ++ return 1; ++ } ++ if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode)) ++ return 1; ++#ifndef DJGPP ++ if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode) || S_ISSOCK(stats->stat.st_mode) || S_ISFIFO(stats->stat.st_mode))) ++#else ++ if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode))) ++#endif ++ return 1; + while ((desc = open (file, O_RDONLY)) < 0 && errno == EINTR) + continue; + +@@ -1681,9 +1661,6 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n")) + out_invert ^= 1; + match_lines = match_words = 0; + } +- else +- /* Strip trailing newline. */ +- --keycc; + } + else + if (optind < argc) +@@ -1697,6 +1674,37 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n")) + if (!install_matcher (matcher) && !install_matcher ("default")) + abort (); + ++#ifdef MBS_SUPPORT ++ if (MB_CUR_MAX != 1 && match_icase) ++ { ++ wchar_t wc; ++ mbstate_t cur_state, prev_state; ++ int i, len = strlen(keys); ++ ++ memset(&cur_state, 0, sizeof(mbstate_t)); ++ for (i = 0; i <= len ;) ++ { ++ size_t mbclen; ++ mbclen = mbrtowc(&wc, keys + i, len - i, &cur_state); ++ if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) ++ { ++ /* An invalid sequence, or a truncated multibyte character. ++ We treat it as a singlebyte character. */ ++ mbclen = 1; ++ } ++ else ++ { ++ if (iswupper((wint_t)wc)) ++ { ++ wc = towlower((wint_t)wc); ++ wcrtomb(keys + i, wc, &cur_state); ++ } ++ } ++ i += mbclen; ++ } ++ } ++#endif /* MBS_SUPPORT */ ++ + (*compile)(keys, keycc); + + if ((argc - optind > 1 && !no_filenames) || with_filenames) +diff --git a/src/search.c b/src/search.c +index 7bd233f..3c6a485 100644 +--- a/src/search.c ++++ b/src/search.c +@@ -18,9 +18,13 @@ + + /* Written August 1992 by Mike Haertel. */ + ++#ifndef _GNU_SOURCE ++# define _GNU_SOURCE 1 ++#endif + #ifdef HAVE_CONFIG_H + # include <config.h> + #endif ++#include <assert.h> + #include <sys/types.h> + #if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC + /* We can handle multibyte string. */ +@@ -31,7 +35,7 @@ + + #include "system.h" + #include "grep.h" +-#include "regex.h" ++#include <regex.h> + #include "dfa.h" + #include "kwset.h" + #include "error.h" +@@ -39,6 +43,9 @@ + #ifdef HAVE_LIBPCRE + # include <pcre.h> + #endif ++#ifdef HAVE_LANGINFO_CODESET ++# include <langinfo.h> ++#endif + + #define NCHAR (UCHAR_MAX + 1) + +@@ -70,9 +77,10 @@ static kwset_t kwset; + call the regexp matcher at all. */ + static int kwset_exact_matches; + +-#if defined(MBS_SUPPORT) +-static char* check_multibyte_string PARAMS ((char const *buf, size_t size)); +-#endif ++/* UTF-8 encoding allows some optimizations that we can't otherwise ++ assume in a multibyte encoding. */ ++static int using_utf8; ++ + static void kwsinit PARAMS ((void)); + static void kwsmusts PARAMS ((void)); + static void Gcompile PARAMS ((char const *, size_t)); +@@ -84,6 +92,15 @@ static void Pcompile PARAMS ((char const *, size_t )); + static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int)); + + void ++check_utf8 (void) ++{ ++#ifdef HAVE_LANGINFO_CODESET ++ if (strcmp (nl_langinfo (CODESET), "UTF-8") == 0) ++ using_utf8 = 1; ++#endif ++} ++ ++void + dfaerror (char const *mesg) + { + error (2, 0, mesg); +@@ -141,38 +158,6 @@ kwsmusts (void) + } + } + +-#ifdef MBS_SUPPORT +-/* This function allocate the array which correspond to "buf". +- Then this check multibyte string and mark on the positions which +- are not singlebyte character nor the first byte of a multibyte +- character. Caller must free the array. */ +-static char* +-check_multibyte_string(char const *buf, size_t size) +-{ +- char *mb_properties = malloc(size); +- mbstate_t cur_state; +- int i; +- memset(&cur_state, 0, sizeof(mbstate_t)); +- memset(mb_properties, 0, sizeof(char)*size); +- for (i = 0; i < size ;) +- { +- size_t mbclen; +- mbclen = mbrlen(buf + i, size - i, &cur_state); +- +- if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) +- { +- /* An invalid sequence, or a truncated multibyte character. +- We treat it as a singlebyte character. */ +- mbclen = 1; +- } +- mb_properties[i] = mbclen; +- i += mbclen; +- } +- +- return mb_properties; +-} +-#endif +- + static void + Gcompile (char const *pattern, size_t size) + { +@@ -181,7 +166,8 @@ Gcompile (char const *pattern, size_t size) + size_t total = size; + char const *motif = pattern; + +- re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE); ++ check_utf8 (); ++ re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE | (match_icase ? RE_ICASE : 0)); + dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte); + + /* For GNU regex compiler we have to pass the patterns separately to detect +@@ -218,6 +204,10 @@ Gcompile (char const *pattern, size_t size) + motif = sep; + } while (sep && total != 0); + ++ /* Strip trailing newline. */ ++ if (size && pattern[size - 1] == '\n') ++ size--; ++ + /* In the match_words and match_lines cases, we use a different pattern + for the DFA matcher that will quickly throw out cases that won't work. + Then if DFA succeeds we do some hairy stuff using the regex matcher +@@ -233,7 +223,7 @@ Gcompile (char const *pattern, size_t size) + static char const line_end[] = "\\)$"; + static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\("; + static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)"; +- char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end); ++ char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end); + size_t i; + strcpy (n, match_lines ? line_beg : word_beg); + i = strlen (n); +@@ -257,14 +247,15 @@ Ecompile (char const *pattern, size_t size) + size_t total = size; + char const *motif = pattern; + ++ check_utf8 (); + if (strcmp (matcher, "awk") == 0) + { +- re_set_syntax (RE_SYNTAX_AWK); ++ re_set_syntax (RE_SYNTAX_AWK | (match_icase ? RE_ICASE : 0)); + dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte); + } + else + { +- re_set_syntax (RE_SYNTAX_POSIX_EGREP); ++ re_set_syntax (RE_SYNTAX_POSIX_EGREP | (match_icase ? RE_ICASE : 0)); + dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte); + } + +@@ -301,6 +292,10 @@ Ecompile (char const *pattern, size_t size) + motif = sep; + } while (sep && total != 0); + ++ /* Strip trailing newline. */ ++ if (size && pattern[size - 1] == '\n') ++ size--; ++ + /* In the match_words and match_lines cases, we use a different pattern + for the DFA matcher that will quickly throw out cases that won't work. + Then if DFA succeeds we do some hairy stuff using the regex matcher +@@ -316,7 +311,7 @@ Ecompile (char const *pattern, size_t size) + static char const line_end[] = ")$"; + static char const word_beg[] = "(^|[^[:alnum:]_])("; + static char const word_end[] = ")([^[:alnum:]_]|$)"; +- char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end); ++ char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end); + size_t i; + strcpy (n, match_lines ? line_beg : word_beg); + i = strlen(n); +@@ -339,15 +334,34 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact) + char eol = eolbyte; + int backref, start, len; + struct kwsmatch kwsm; +- size_t i; ++ size_t i, ret_val; ++ static int use_dfa; ++ static int use_dfa_checked = 0; + #ifdef MBS_SUPPORT +- char *mb_properties = NULL; ++ int mb_cur_max = MB_CUR_MAX; ++ mbstate_t mbs; ++ memset (&mbs, '\0', sizeof (mbstate_t)); + #endif /* MBS_SUPPORT */ + ++ if (!use_dfa_checked) ++ { ++ char *grep_use_dfa = getenv ("GREP_USE_DFA"); ++ if (!grep_use_dfa) ++ { + #ifdef MBS_SUPPORT +- if (MB_CUR_MAX > 1 && kwset) +- mb_properties = check_multibyte_string(buf, size); ++ /* Turn off DFA when processing multibyte input. */ ++ use_dfa = (MB_CUR_MAX == 1); ++#else ++ use_dfa = 1; + #endif /* MBS_SUPPORT */ ++ } ++ else ++ { ++ use_dfa = atoi (grep_use_dfa); ++ } ++ ++ use_dfa_checked = 1; ++ } + + buflim = buf + size; + +@@ -358,47 +372,120 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact) + if (kwset) + { + /* Find a possible match using the KWset matcher. */ +- size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm); ++#ifdef MBS_SUPPORT ++ size_t bytes_left = 0; ++#endif /* MBS_SUPPORT */ ++ size_t offset; ++#ifdef MBS_SUPPORT ++ /* kwsexec doesn't work with match_icase and multibyte input. */ ++ if (match_icase && mb_cur_max > 1) ++ /* Avoid kwset */ ++ offset = 0; ++ else ++#endif /* MBS_SUPPORT */ ++ offset = kwsexec (kwset, beg, buflim - beg, &kwsm); + if (offset == (size_t) -1) +- { ++ goto failure; + #ifdef MBS_SUPPORT +- if (MB_CUR_MAX > 1) +- free(mb_properties); +-#endif +- return (size_t)-1; ++ if (mb_cur_max > 1 && !using_utf8) ++ { ++ bytes_left = offset; ++ while (bytes_left) ++ { ++ size_t mlen = mbrlen (beg, bytes_left, &mbs); ++ if (mlen == (size_t) -1 || mlen == 0) ++ { ++ /* Incomplete character: treat as single-byte. */ ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ beg++; ++ bytes_left--; ++ continue; ++ } ++ ++ if (mlen == (size_t) -2) ++ /* Offset points inside multibyte character: ++ * no good. */ ++ break; ++ ++ beg += mlen; ++ bytes_left -= mlen; ++ } + } ++ else ++#endif /* MBS_SUPPORT */ + beg += offset; + /* Narrow down to the line containing the candidate, and + run it through DFA. */ + end = memchr(beg, eol, buflim - beg); + end++; + #ifdef MBS_SUPPORT +- if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0) ++ if (mb_cur_max > 1 && bytes_left) + continue; +-#endif ++#endif /* MBS_SUPPORT */ + while (beg > buf && beg[-1] != eol) + --beg; +- if (kwsm.index < kwset_exact_matches) +- goto success; +- if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) ++ if ( ++#ifdef MBS_SUPPORT ++ !(match_icase && mb_cur_max > 1) && ++#endif /* MBS_SUPPORT */ ++ (kwsm.index < kwset_exact_matches)) ++ goto success_in_beg_and_end; ++ if (use_dfa && ++ dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) + continue; + } + else + { + /* No good fixed strings; start with DFA. */ +- size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref); ++#ifdef MBS_SUPPORT ++ size_t bytes_left = 0; ++#endif /* MBS_SUPPORT */ ++ size_t offset = 0; ++ if (use_dfa) ++ offset = dfaexec (&dfa, beg, buflim - beg, &backref); + if (offset == (size_t) -1) + break; + /* Narrow down to the line we've found. */ ++#ifdef MBS_SUPPORT ++ if (mb_cur_max > 1 && !using_utf8) ++ { ++ bytes_left = offset; ++ while (bytes_left) ++ { ++ size_t mlen = mbrlen (beg, bytes_left, &mbs); ++ if (mlen == (size_t) -1 || mlen == 0) ++ { ++ /* Incomplete character: treat as single-byte. */ ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ beg++; ++ bytes_left--; ++ continue; ++ } ++ ++ if (mlen == (size_t) -2) ++ /* Offset points inside multibyte character: ++ * no good. */ ++ break; ++ ++ beg += mlen; ++ bytes_left -= mlen; ++ } ++ } ++ else ++#endif /* MBS_SUPPORT */ + beg += offset; + end = memchr (beg, eol, buflim - beg); + end++; ++#ifdef MBS_SUPPORT ++ if (mb_cur_max > 1 && bytes_left) ++ continue; ++#endif /* MBS_SUPPORT */ + while (beg > buf && beg[-1] != eol) + --beg; + } + /* Successful, no backreferences encountered! */ +- if (!backref) +- goto success; ++ if (use_dfa && !backref) ++ goto success_in_beg_and_end; + } + else + end = beg + size; +@@ -413,14 +500,11 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact) + end - beg - 1, &(patterns[i].regs)))) + { + len = patterns[i].regs.end[0] - start; +- if (exact) +- { +- *match_size = len; +- return start; +- } ++ if (exact && !match_words) ++ goto success_in_start_and_len; + if ((!match_lines && !match_words) + || (match_lines && len == end - beg - 1)) +- goto success; ++ goto success_in_beg_and_end; + /* If -w, check if the match aligns with word boundaries. + We do this iteratively because: + (a) the line may contain more than one occurence of the +@@ -431,10 +515,114 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact) + if (match_words) + while (start >= 0) + { +- if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1])) +- && (len == end - beg - 1 +- || !WCHAR ((unsigned char) beg[start + len]))) +- goto success; ++ int lword_match = 0; ++ if (start == 0) ++ lword_match = 1; ++ else ++ { ++ assert (start > 0); ++#ifdef MBS_SUPPORT ++ if (mb_cur_max > 1) ++ { ++ const char *s; ++ size_t mr; ++ wchar_t pwc; ++ ++ /* Locate the start of the multibyte character ++ before the match position (== beg + start). */ ++ if (using_utf8) ++ { ++ /* UTF-8 is a special case: scan backwards ++ until we find a 7-bit character or a ++ lead byte. */ ++ s = beg + start - 1; ++ while (s > buf ++ && (unsigned char) *s >= 0x80 ++ && (unsigned char) *s <= 0xbf) ++ --s; ++ } ++ else ++ { ++ /* Scan forwards to find the start of the ++ last complete character before the ++ match position. */ ++ size_t bytes_left = start - 1; ++ s = beg; ++ while (bytes_left > 0) ++ { ++ mr = mbrlen (s, bytes_left, &mbs); ++ if (mr == (size_t) -1 || mr == 0) ++ { ++ memset (&mbs, '\0', sizeof (mbs)); ++ s++; ++ bytes_left--; ++ continue; ++ } ++ if (mr == (size_t) -2) ++ { ++ memset (&mbs, '\0', sizeof (mbs)); ++ break; ++ } ++ s += mr; ++ bytes_left -= mr; ++ } ++ } ++ mr = mbrtowc (&pwc, s, beg + start - s, &mbs); ++ if (mr == (size_t) -2 || mr == (size_t) -1 || ++ mr == 0) ++ { ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ lword_match = 1; ++ } ++ else if (!(iswalnum (pwc) || pwc == L'_') ++ && mr == beg + start - s) ++ lword_match = 1; ++ } ++ else ++#endif /* MBS_SUPPORT */ ++ if (!WCHAR ((unsigned char) beg[start - 1])) ++ lword_match = 1; ++ } ++ ++ if (lword_match) ++ { ++ int rword_match = 0; ++ if (start + len == end - beg - 1) ++ rword_match = 1; ++ else ++ { ++#ifdef MBS_SUPPORT ++ if (mb_cur_max > 1) ++ { ++ wchar_t nwc; ++ int mr; ++ ++ mr = mbtowc (&nwc, beg + start + len, ++ end - beg - start - len - 1); ++ if (mr <= 0) ++ { ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ rword_match = 1; ++ } ++ else if (!iswalnum (nwc) && nwc != L'_') ++ rword_match = 1; ++ } ++ else ++#endif /* MBS_SUPPORT */ ++ if (!WCHAR ((unsigned char) beg[start + len])) ++ rword_match = 1; ++ } ++ ++ if (rword_match) ++ { ++ if (!exact) ++ /* Returns the whole line. */ ++ goto success_in_beg_and_end; ++ else ++ /* Returns just this word match. */ ++ goto success_in_start_and_len; ++ } ++ } + if (len > 0) + { + /* Try a shorter length anchored at the same place. */ +@@ -461,26 +649,154 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact) + } + } /* for Regex patterns. */ + } /* for (beg = end ..) */ +-#ifdef MBS_SUPPORT +- if (MB_CUR_MAX > 1 && mb_properties) +- free (mb_properties); +-#endif /* MBS_SUPPORT */ ++ ++ failure: + return (size_t) -1; + +- success: +-#ifdef MBS_SUPPORT +- if (MB_CUR_MAX > 1 && mb_properties) +- free (mb_properties); +-#endif /* MBS_SUPPORT */ +- *match_size = end - beg; +- return beg - buf; ++ success_in_beg_and_end: ++ len = end - beg; ++ start = beg - buf; ++ /* FALLTHROUGH */ ++ ++ success_in_start_and_len: ++ *match_size = len; ++ return start; + } + ++#ifdef MBS_SUPPORT ++static int f_i_multibyte; /* whether we're using the new -Fi MB method */ ++static struct ++{ ++ wchar_t **patterns; ++ size_t count, maxlen; ++ unsigned char *match; ++} Fimb; ++#endif ++ + static void + Fcompile (char const *pattern, size_t size) + { ++ int mb_cur_max = MB_CUR_MAX; + char const *beg, *lim, *err; + ++ check_utf8 (); ++#ifdef MBS_SUPPORT ++ /* Support -F -i for UTF-8 input. */ ++ if (match_icase && mb_cur_max > 1) ++ { ++ mbstate_t mbs; ++ wchar_t *wcpattern = xmalloc ((size + 1) * sizeof (wchar_t)); ++ const char *patternend = pattern; ++ size_t wcsize; ++ kwset_t fimb_kwset = NULL; ++ char *starts = NULL; ++ wchar_t *wcbeg, *wclim; ++ size_t allocated = 0; ++ ++ memset (&mbs, '\0', sizeof (mbs)); ++# ifdef __GNU_LIBRARY__ ++ wcsize = mbsnrtowcs (wcpattern, &patternend, size, size, &mbs); ++ if (patternend != pattern + size) ++ wcsize = (size_t) -1; ++# else ++ { ++ char *patterncopy = xmalloc (size + 1); ++ ++ memcpy (patterncopy, pattern, size); ++ patterncopy[size] = '\0'; ++ patternend = patterncopy; ++ wcsize = mbsrtowcs (wcpattern, &patternend, size, &mbs); ++ if (patternend != patterncopy + size) ++ wcsize = (size_t) -1; ++ free (patterncopy); ++ } ++# endif ++ if (wcsize + 2 <= 2) ++ { ++fimb_fail: ++ free (wcpattern); ++ free (starts); ++ if (fimb_kwset) ++ kwsfree (fimb_kwset); ++ free (Fimb.patterns); ++ Fimb.patterns = NULL; ++ } ++ else ++ { ++ if (!(fimb_kwset = kwsalloc (NULL))) ++ error (2, 0, _("memory exhausted")); ++ ++ starts = xmalloc (mb_cur_max * 3); ++ wcbeg = wcpattern; ++ do ++ { ++ int i; ++ size_t wclen; ++ ++ if (Fimb.count >= allocated) ++ { ++ if (allocated == 0) ++ allocated = 128; ++ else ++ allocated *= 2; ++ Fimb.patterns = xrealloc (Fimb.patterns, ++ sizeof (wchar_t *) * allocated); ++ } ++ Fimb.patterns[Fimb.count++] = wcbeg; ++ for (wclim = wcbeg; ++ wclim < wcpattern + wcsize && *wclim != L'\n'; ++wclim) ++ *wclim = towlower (*wclim); ++ *wclim = L'\0'; ++ wclen = wclim - wcbeg; ++ if (wclen > Fimb.maxlen) ++ Fimb.maxlen = wclen; ++ if (wclen > 3) ++ wclen = 3; ++ if (wclen == 0) ++ { ++ if ((err = kwsincr (fimb_kwset, "", 0)) != 0) ++ error (2, 0, err); ++ } ++ else ++ for (i = 0; i < (1 << wclen); i++) ++ { ++ char *p = starts; ++ int j, k; ++ ++ for (j = 0; j < wclen; ++j) ++ { ++ wchar_t wc = wcbeg[j]; ++ if (i & (1 << j)) ++ { ++ wc = towupper (wc); ++ if (wc == wcbeg[j]) ++ continue; ++ } ++ k = wctomb (p, wc); ++ if (k <= 0) ++ goto fimb_fail; ++ p += k; ++ } ++ if ((err = kwsincr (fimb_kwset, starts, p - starts)) != 0) ++ error (2, 0, err); ++ } ++ if (wclim < wcpattern + wcsize) ++ ++wclim; ++ wcbeg = wclim; ++ } ++ while (wcbeg < wcpattern + wcsize); ++ f_i_multibyte = 1; ++ kwset = fimb_kwset; ++ free (starts); ++ Fimb.match = xmalloc (Fimb.count); ++ if ((err = kwsprep (kwset)) != 0) ++ error (2, 0, err); ++ return; ++ } ++ } ++#endif /* MBS_SUPPORT */ ++ ++ + kwsinit (); + beg = pattern; + do +@@ -499,6 +815,76 @@ Fcompile (char const *pattern, size_t size) + error (2, 0, err); + } + ++#ifdef MBS_SUPPORT ++static int ++Fimbexec (const char *buf, size_t size, size_t *plen, int exact) ++{ ++ size_t len, letter, i; ++ int ret = -1; ++ mbstate_t mbs; ++ wchar_t wc; ++ int patterns_left; ++ ++ assert (match_icase && f_i_multibyte == 1); ++ assert (MB_CUR_MAX > 1); ++ ++ memset (&mbs, '\0', sizeof (mbs)); ++ memset (Fimb.match, '\1', Fimb.count); ++ letter = len = 0; ++ patterns_left = 1; ++ while (patterns_left && len <= size) ++ { ++ size_t c; ++ ++ patterns_left = 0; ++ if (len < size) ++ { ++ c = mbrtowc (&wc, buf + len, size - len, &mbs); ++ if (c + 2 <= 2) ++ return ret; ++ ++ wc = towlower (wc); ++ } ++ else ++ { ++ c = 1; ++ wc = L'\0'; ++ } ++ ++ for (i = 0; i < Fimb.count; i++) ++ { ++ if (Fimb.match[i]) ++ { ++ if (Fimb.patterns[i][letter] == L'\0') ++ { ++ /* Found a match. */ ++ *plen = len; ++ if (!exact && !match_words) ++ return 0; ++ else ++ { ++ /* For -w or exact look for longest match. */ ++ ret = 0; ++ Fimb.match[i] = '\0'; ++ continue; ++ } ++ } ++ ++ if (Fimb.patterns[i][letter] == wc) ++ patterns_left = 1; ++ else ++ Fimb.match[i] = '\0'; ++ } ++ } ++ ++ len += c; ++ letter++; ++ } ++ ++ return ret; ++} ++#endif /* MBS_SUPPORT */ ++ + static size_t + Fexecute (char const *buf, size_t size, size_t *match_size, int exact) + { +@@ -506,88 +892,268 @@ Fexecute (char const *buf, size_t size, size_t *match_size, int exact) + register size_t len; + char eol = eolbyte; + struct kwsmatch kwsmatch; ++ size_t ret_val; + #ifdef MBS_SUPPORT +- char *mb_properties; +- if (MB_CUR_MAX > 1) +- mb_properties = check_multibyte_string (buf, size); ++ int mb_cur_max = MB_CUR_MAX; ++ mbstate_t mbs; ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ const char *last_char = NULL; + #endif /* MBS_SUPPORT */ + +- for (beg = buf; beg <= buf + size; ++beg) ++ for (beg = buf; beg < buf + size; ++beg) + { +- size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); ++ size_t offset; ++ offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); ++ + if (offset == (size_t) -1) +- { ++ goto failure; + #ifdef MBS_SUPPORT +- if (MB_CUR_MAX > 1) +- free(mb_properties); +-#endif /* MBS_SUPPORT */ +- return offset; ++ if (mb_cur_max > 1 && !using_utf8) ++ { ++ size_t bytes_left = offset; ++ while (bytes_left) ++ { ++ size_t mlen = mbrlen (beg, bytes_left, &mbs); ++ ++ last_char = beg; ++ if (mlen == (size_t) -1 || mlen == 0) ++ { ++ /* Incomplete character: treat as single-byte. */ ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ beg++; ++ bytes_left--; ++ continue; ++ } ++ ++ if (mlen == (size_t) -2) ++ /* Offset points inside multibyte character: no good. */ ++ break; ++ ++ beg += mlen; ++ bytes_left -= mlen; ++ } ++ ++ if (bytes_left) ++ continue; + } +-#ifdef MBS_SUPPORT +- if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0) +- continue; /* It is a part of multibyte character. */ ++ else + #endif /* MBS_SUPPORT */ + beg += offset; +- len = kwsmatch.size[0]; +- if (exact) +- { +- *match_size = len; + #ifdef MBS_SUPPORT +- if (MB_CUR_MAX > 1) +- free (mb_properties); ++ /* For f_i_multibyte, the string at beg now matches first 3 chars of ++ one of the search strings (less if there are shorter search strings). ++ See if this is a real match. */ ++ if (f_i_multibyte ++ && Fimbexec (beg, buf + size - beg, &kwsmatch.size[0], exact)) ++ goto next_char; + #endif /* MBS_SUPPORT */ +- return beg - buf; +- } ++ len = kwsmatch.size[0]; ++ if (exact && !match_words) ++ goto success_in_beg_and_len; + if (match_lines) + { + if (beg > buf && beg[-1] != eol) +- continue; ++ goto next_char; + if (beg + len < buf + size && beg[len] != eol) +- continue; ++ goto next_char; + goto success; + } + else if (match_words) +- for (try = beg; len; ) +- { +- if (try > buf && WCHAR((unsigned char) try[-1])) +- break; +- if (try + len < buf + size && WCHAR((unsigned char) try[len])) +- { +- offset = kwsexec (kwset, beg, --len, &kwsmatch); +- if (offset == (size_t) -1) +- { ++ { ++ while (len) ++ { ++ int word_match = 0; ++ if (beg > buf) ++ { + #ifdef MBS_SUPPORT +- if (MB_CUR_MAX > 1) +- free (mb_properties); ++ if (mb_cur_max > 1) ++ { ++ const char *s; ++ int mr; ++ wchar_t pwc; ++ ++ if (using_utf8) ++ { ++ s = beg - 1; ++ while (s > buf ++ && (unsigned char) *s >= 0x80 ++ && (unsigned char) *s <= 0xbf) ++ --s; ++ } ++ else ++ s = last_char; ++ mr = mbtowc (&pwc, s, beg - s); ++ if (mr <= 0) ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ else if ((iswalnum (pwc) || pwc == L'_') ++ && mr == (int) (beg - s)) ++ goto next_char; ++ } ++ else + #endif /* MBS_SUPPORT */ +- return offset; +- } +- try = beg + offset; +- len = kwsmatch.size[0]; +- } +- else +- goto success; +- } ++ if (WCHAR ((unsigned char) beg[-1])) ++ goto next_char; ++ } ++#ifdef MBS_SUPPORT ++ if (mb_cur_max > 1) ++ { ++ wchar_t nwc; ++ int mr; ++ ++ mr = mbtowc (&nwc, beg + len, buf + size - beg - len); ++ if (mr <= 0) ++ { ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ word_match = 1; ++ } ++ else if (!iswalnum (nwc) && nwc != L'_') ++ word_match = 1; ++ } ++ else ++#endif /* MBS_SUPPORT */ ++ if (beg + len >= buf + size || !WCHAR ((unsigned char) beg[len])) ++ word_match = 1; ++ if (word_match) ++ { ++ if (!exact) ++ /* Returns the whole line now we know there's a word match. */ ++ goto success; ++ else ++ /* Returns just this word match. */ ++ goto success_in_beg_and_len; ++ } ++ if (len > 0) ++ { ++ /* Try a shorter length anchored at the same place. */ ++ --len; ++ offset = kwsexec (kwset, beg, len, &kwsmatch); ++ ++ if (offset == -1) ++ goto next_char; /* Try a different anchor. */ ++#ifdef MBS_SUPPORT ++ if (mb_cur_max > 1 && !using_utf8) ++ { ++ size_t bytes_left = offset; ++ while (bytes_left) ++ { ++ size_t mlen = mbrlen (beg, bytes_left, &mbs); ++ ++ last_char = beg; ++ if (mlen == (size_t) -1 || mlen == 0) ++ { ++ /* Incomplete character: treat as single-byte. */ ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ beg++; ++ bytes_left--; ++ continue; ++ } ++ ++ if (mlen == (size_t) -2) ++ { ++ /* Offset points inside multibyte character: ++ * no good. */ ++ break; ++ } ++ ++ beg += mlen; ++ bytes_left -= mlen; ++ } ++ ++ if (bytes_left) ++ { ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ goto next_char; /* Try a different anchor. */ ++ } ++ } ++ else ++#endif /* MBS_SUPPORT */ ++ beg += offset; ++#ifdef MBS_SUPPORT ++ /* The string at beg now matches first 3 chars of one of ++ the search strings (less if there are shorter search ++ strings). See if this is a real match. */ ++ if (f_i_multibyte ++ && Fimbexec (beg, len - offset, &kwsmatch.size[0], ++ exact)) ++ goto next_char; ++#endif /* MBS_SUPPORT */ ++ len = kwsmatch.size[0]; ++ } ++ } ++ } + else + goto success; +- } +- ++next_char:; + #ifdef MBS_SUPPORT +- if (MB_CUR_MAX > 1) +- free (mb_properties); ++ /* Advance to next character. For MB_CUR_MAX == 1 case this is handled ++ by ++beg above. */ ++ if (mb_cur_max > 1) ++ { ++ if (using_utf8) ++ { ++ unsigned char c = *beg; ++ if (c >= 0xc2) ++ { ++ if (c < 0xe0) ++ ++beg; ++ else if (c < 0xf0) ++ beg += 2; ++ else if (c < 0xf8) ++ beg += 3; ++ else if (c < 0xfc) ++ beg += 4; ++ else if (c < 0xfe) ++ beg += 5; ++ } ++ } ++ else ++ { ++ size_t l = mbrlen (beg, buf + size - beg, &mbs); ++ ++ last_char = beg; ++ if (l + 2 >= 2) ++ beg += l - 1; ++ else ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ } ++ } + #endif /* MBS_SUPPORT */ ++ } ++ ++ failure: + return -1; + + success: ++#ifdef MBS_SUPPORT ++ if (mb_cur_max > 1 && !using_utf8) ++ { ++ end = beg + len; ++ while (end < buf + size) ++ { ++ size_t mlen = mbrlen (end, buf + size - end, &mbs); ++ if (mlen == (size_t) -1 || mlen == (size_t) -2 || mlen == 0) ++ { ++ memset (&mbs, '\0', sizeof (mbstate_t)); ++ mlen = 1; ++ } ++ if (mlen == 1 && *end == eol) ++ break; ++ ++ end += mlen; ++ } ++ } ++ else ++#endif /* MBS_SUPPORT */ + end = memchr (beg + len, eol, (buf + size) - (beg + len)); ++ + end++; + while (buf < beg && beg[-1] != eol) + --beg; +- *match_size = end - beg; +-#ifdef MBS_SUPPORT +- if (MB_CUR_MAX > 1) +- free (mb_properties); +-#endif /* MBS_SUPPORT */ ++ len = end - beg; ++ /* FALLTHROUGH */ ++ ++ success_in_beg_and_len: ++ *match_size = len; + return beg - buf; + } + +@@ -701,8 +1267,9 @@ Pexecute (char const *buf, size_t size, size_t *match_size, int exact) + char eol = eolbyte; + if (!exact) + { +- end = memchr (end, eol, buflim - end); +- end++; ++ while (end < buflim) ++ if (*end++ == eol) ++ break; + while (buf < beg && beg[-1] != eol) + --beg; + } +-- +1.8.4.2 + diff --git a/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/grep_fix_for_automake-1.12.patch b/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/grep_fix_for_automake-1.12.patch new file mode 100644 index 000000000..3ccce5fc3 --- /dev/null +++ b/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/grep_fix_for_automake-1.12.patch @@ -0,0 +1,52 @@ +Upstream-Status: Pending + +automake 1.12 has depricated automatic de-ANSI-fication support + +this patch avoids these kinds of errors: + +| configure.in:33: error: automatic de-ANSI-fication support has been removed +| /srv/home/nitin/builds/build-gcc47/tmp/sysroots/x86_64-linux/usr/share/aclocal-1.12/protos.m4:12: AM_C_PROTOTYPES is expanded from... +| configure.in:33: the top level +| autom4te: m4 failed with exit status: 1 +... +| lib/Makefile.am:2: error: automatic de-ANSI-fication support has been removed +| src/Makefile.am:2: error: automatic de-ANSI-fication support has been removed +| autoreconf: automake failed with exit status: 1 + +Signed-Off-By: Nitin A Kamble <nitin.a.kamble@intel.com> +2012/05/04 + +Index: grep-2.5.1a/configure.in +=================================================================== +--- grep-2.5.1a.orig/configure.in ++++ grep-2.5.1a/configure.in +@@ -30,7 +30,6 @@ AC_PROG_RANLIB + + dnl Checks for typedefs, structures, and compiler characteristics. + AC_SYS_LARGEFILE +-AM_C_PROTOTYPES + AC_TYPE_SIZE_T + AC_CHECK_TYPE(ssize_t, int) + AC_C_CONST +Index: grep-2.5.1a/lib/Makefile.am +=================================================================== +--- grep-2.5.1a.orig/lib/Makefile.am ++++ grep-2.5.1a/lib/Makefile.am +@@ -1,5 +1,5 @@ + # +-AUTOMAKE_OPTIONS = ../src/ansi2knr ++AUTOMAKE_OPTIONS = + + SUBDIRS = posix + +Index: grep-2.5.1a/src/Makefile.am +=================================================================== +--- grep-2.5.1a.orig/src/Makefile.am ++++ grep-2.5.1a/src/Makefile.am +@@ -1,5 +1,5 @@ + ## Process this file with automake to create Makefile.in +-AUTOMAKE_OPTIONS = ansi2knr no-dependencies ++AUTOMAKE_OPTIONS = no-dependencies + + LN = ln + diff --git a/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/uclibc-fix.patch b/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/uclibc-fix.patch new file mode 100644 index 000000000..de054fc75 --- /dev/null +++ b/yocto-poky/meta/recipes-extended/grep/grep-2.5.1a/uclibc-fix.patch @@ -0,0 +1,55 @@ +Upstream-Status: Inappropriate [licensing] + +# Fix to use mempcpy instead of __mempcpy. This is needed for uclibc which +# doesn't define __mempcpy, only mempcpy. Since both uclibc and glibc have +# mempcpy, we'll just use that instead. +# Patch source: OpenEmbedded + +Index: grep-2.5.1/intl/localealias.c +=================================================================== +--- grep-2.5.1.orig/intl/localealias.c 2002-03-14 00:39:06.000000000 +1100 ++++ grep-2.5.1/intl/localealias.c 2007-05-17 13:53:58.000000000 +1000 +@@ -65,7 +65,7 @@ + # define strcasecmp __strcasecmp + + # ifndef mempcpy +-# define mempcpy __mempcpy ++# error "mempcpy not detected" + # endif + # define HAVE_MEMPCPY 1 + # define HAVE___FSETLOCKING 1 +Index: grep-2.5.1/lib/getopt.c +=================================================================== +--- grep-2.5.1.orig/lib/getopt.c 2001-03-04 16:33:12.000000000 +1100 ++++ grep-2.5.1/lib/getopt.c 2007-05-17 13:51:44.000000000 +1000 +@@ -326,7 +326,7 @@ + nonoption_flags_len = nonoption_flags_max_len = 0; + else + { +- memset (__mempcpy (new_str, __getopt_nonoption_flags, ++ memset (mempcpy (new_str, __getopt_nonoption_flags, + nonoption_flags_max_len), + '\0', top + 1 - nonoption_flags_max_len); + nonoption_flags_max_len = top + 1; +@@ -437,7 +437,7 @@ + if (__getopt_nonoption_flags == NULL) + nonoption_flags_max_len = -1; + else +- memset (__mempcpy (__getopt_nonoption_flags, orig_str, len), ++ memset (mempcpy (__getopt_nonoption_flags, orig_str, len), + '\0', nonoption_flags_max_len - len); + } + } +Index: grep-2.5.1/lib/regex.c +=================================================================== +--- grep-2.5.1.orig/lib/regex.c 2001-04-03 04:04:45.000000000 +1000 ++++ grep-2.5.1/lib/regex.c 2007-05-17 13:51:48.000000000 +1000 +@@ -7842,7 +7842,7 @@ + if (msg_size > errbuf_size) + { + #if defined HAVE_MEMPCPY || defined _LIBC +- *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0'; ++ *((char *) mempcpy (errbuf, msg, errbuf_size - 1)) = '\0'; + #else + memcpy (errbuf, msg, errbuf_size - 1); + errbuf[errbuf_size - 1] = 0; diff --git a/yocto-poky/meta/recipes-extended/grep/grep/0001-Unset-need_charset_alias-when-building-for-musl.patch b/yocto-poky/meta/recipes-extended/grep/grep/0001-Unset-need_charset_alias-when-building-for-musl.patch new file mode 100644 index 000000000..ba1a4bab4 --- /dev/null +++ b/yocto-poky/meta/recipes-extended/grep/grep/0001-Unset-need_charset_alias-when-building-for-musl.patch @@ -0,0 +1,33 @@ +From b9565dc2fe0c4f7daaec91b7e83bc7313dee2f4a Mon Sep 17 00:00:00 2001 +From: Khem Raj <raj.khem@gmail.com> +Date: Mon, 13 Apr 2015 17:02:13 -0700 +Subject: [PATCH] Unset need_charset_alias when building for musl + +localcharset uses ac_cv_gnu_library_2_1 from glibc21.m4 +which actually shoudl be fixed in gnulib and then all downstream +projects will get it eventually. For now we apply the fix to +coreutils + +Upstream-Status: Pending + +Signed-off-by: Khem Raj <raj.khem@gmail.com> +--- + lib/gnulib.mk | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/gnulib.mk b/lib/gnulib.mk +index e1d74db..c0e92dd 100644 +--- a/lib/gnulib.mk ++++ b/lib/gnulib.mk +@@ -1882,7 +1882,7 @@ install-exec-localcharset: all-local + case '$(host_os)' in \ + darwin[56]*) \ + need_charset_alias=true ;; \ +- darwin* | cygwin* | mingw* | pw32* | cegcc*) \ ++ darwin* | cygwin* | mingw* | pw32* | cegcc* | linux-musl*) \ + need_charset_alias=false ;; \ + *) \ + need_charset_alias=true ;; \ +-- +2.1.4 + diff --git a/yocto-poky/meta/recipes-extended/grep/grep/grep-fix-CVE-2015-1345.patch b/yocto-poky/meta/recipes-extended/grep/grep/grep-fix-CVE-2015-1345.patch new file mode 100644 index 000000000..e88a9880f --- /dev/null +++ b/yocto-poky/meta/recipes-extended/grep/grep/grep-fix-CVE-2015-1345.patch @@ -0,0 +1,154 @@ +Upstream-Status: Backport + +Backport patch to fix CVE-2015-1345. +http://git.savannah.gnu.org/cgit/grep.git/commit/?id=83a95bd + +Signed-off-by: Kai Kang <kai.kang@windriver.com> +--- +From 83a95bd8c8561875b948cadd417c653dbe7ef2e2 Mon Sep 17 00:00:00 2001 +From: Yuliy Pisetsky <ypisetsky@fb.com> +Date: Thu, 1 Jan 2015 15:36:55 -0800 +Subject: [PATCH] grep -F: fix a heap buffer (read) overrun + +grep's read buffer is often filled to its full size, except when +reading the final buffer of a file. In that case, the number of +bytes read may be far less than the size of the buffer. However, for +certain unusual pattern/text combinations, grep -F would mistakenly +examine bytes in that uninitialized region of memory when searching +for a match. With carefully chosen inputs, one can cause grep -F to +read beyond the end of that buffer altogether. This problem arose via +commit v2.18-90-g73893ff with the introduction of a more efficient +heuristic using what is now the memchr_kwset function. The use of +that function in bmexec_trans could leave TP much larger than EP, +and the subsequent call to bm_delta2_search would mistakenly access +beyond end of the main input read buffer. + +* src/kwset.c (bmexec_trans): When TP reaches or exceeds EP, +do not call bm_delta2_search. +* tests/kwset-abuse: New file. +* tests/Makefile.am (TESTS): Add it. +* THANKS.in: Update. +* NEWS (Bug fixes): Mention it. + +Prior to this patch, this command would trigger a UMR: + + printf %0360db 0 | valgrind src/grep -F $(printf %019dXb 0) + + Use of uninitialised value of size 8 + at 0x4142BE: bmexec_trans (kwset.c:657) + by 0x4143CA: bmexec (kwset.c:678) + by 0x414973: kwsexec (kwset.c:848) + by 0x414DC4: Fexecute (kwsearch.c:128) + by 0x404E2E: grepbuf (grep.c:1238) + by 0x4054BF: grep (grep.c:1417) + by 0x405CEB: grepdesc (grep.c:1645) + by 0x405EC1: grep_command_line_arg (grep.c:1692) + by 0x4077D4: main (grep.c:2570) + +See the accompanying test for how to trigger the heap buffer overrun. + +Thanks to Nima Aghdaii for testing and finding numerous +ways to break early iterations of this patch. +--- + NEWS | 5 +++++ + THANKS.in | 1 + + src/kwset.c | 2 ++ + tests/Makefile.am | 1 + + tests/kwset-abuse | 32 ++++++++++++++++++++++++++++++++ + 5 files changed, 41 insertions(+) + create mode 100755 tests/kwset-abuse + +diff --git a/NEWS b/NEWS +index 975440d..3835d8d 100644 +--- a/NEWS ++++ b/NEWS +@@ -2,6 +2,11 @@ GNU grep NEWS -*- outline -*- + + * Noteworthy changes in release ?.? (????-??-??) [?] + ++** Bug fixes ++ ++ grep no longer reads from uninitialized memory or from beyond the end ++ of the heap-allocated input buffer. ++ + + * Noteworthy changes in release 2.21 (2014-11-23) [stable] + +diff --git a/THANKS.in b/THANKS.in +index aeaf516..624478d 100644 +--- a/THANKS.in ++++ b/THANKS.in +@@ -62,6 +62,7 @@ Michael Aichlmayr mikla@nx.com + Miles Bader miles@ccs.mt.nec.co.jp + Mirraz Mirraz mirraz1@rambler.ru + Nelson H. F. Beebe beebe@math.utah.edu ++Nima Aghdaii naghdaii@fb.com + Olaf Kirch okir@ns.lst.de + Paul Kimoto kimoto@spacenet.tn.cornell.edu + Péter Radics mitchnull@gmail.com +diff --git a/src/kwset.c b/src/kwset.c +index 4003c8d..376f7c3 100644 +--- a/src/kwset.c ++++ b/src/kwset.c +@@ -643,6 +643,8 @@ bmexec_trans (kwset_t kwset, char const *text, size_t size) + if (! tp) + return -1; + tp++; ++ if (ep <= tp) ++ break; + } + } + } +diff --git a/tests/Makefile.am b/tests/Makefile.am +index 2cba2cd..0508cd2 100644 +--- a/tests/Makefile.am ++++ b/tests/Makefile.am +@@ -75,6 +75,7 @@ TESTS = \ + inconsistent-range \ + invalid-multibyte-infloop \ + khadafy \ ++ kwset-abuse \ + long-line-vs-2GiB-read \ + match-lines \ + max-count-overread \ +diff --git a/tests/kwset-abuse b/tests/kwset-abuse +new file mode 100755 +index 0000000..6d8ec0c +--- /dev/null ++++ b/tests/kwset-abuse +@@ -0,0 +1,32 @@ ++#! /bin/sh ++# Evoke a segfault in a hard-to-reach code path of kwset.c. ++# This bug affected grep versions 2.19 through 2.21. ++# ++# Copyright (C) 2015 Free Software Foundation, Inc. ++# ++# This program is free software: you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation, either version 3 of the License, or ++# (at your option) any later version. ++ ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++ ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see <http://www.gnu.org/licenses/>. ++ ++. "${srcdir=.}/init.sh"; path_prepend_ ../src ++ ++fail=0 ++ ++# This test case chooses a haystack of size 260,000, since prodding ++# with gdb showed a reallocation slightly larger than that in fillbuf. ++# To reach the buggy code, the needle must have length < 1/11 that of ++# the haystack, and 10,000 is a nice round number that fits the bill. ++printf '%0260000dXy\n' 0 | grep -F $(printf %010000dy 0) ++ ++test $? = 1 || fail=1 ++ ++Exit $fail +-- +2.4.1 + diff --git a/yocto-poky/meta/recipes-extended/grep/grep_2.21.bb b/yocto-poky/meta/recipes-extended/grep/grep_2.21.bb new file mode 100644 index 000000000..3661098c5 --- /dev/null +++ b/yocto-poky/meta/recipes-extended/grep/grep_2.21.bb @@ -0,0 +1,42 @@ +SUMMARY = "GNU grep utility" +HOMEPAGE = "http://savannah.gnu.org/projects/grep/" +BUGTRACKER = "http://savannah.gnu.org/bugs/?group=grep" +SECTION = "console/utils" +LICENSE = "GPLv3" +LIC_FILES_CHKSUM = "file://COPYING;md5=8006d9c814277c1bfc4ca22af94b59ee" + +SRC_URI = "${GNU_MIRROR}/grep/grep-${PV}.tar.xz \ + file://0001-Unset-need_charset_alias-when-building-for-musl.patch \ + file://grep-fix-CVE-2015-1345.patch \ + " + +SRC_URI[md5sum] = "43c48064d6409862b8a850db83c8038a" +SRC_URI[sha256sum] = "5244a11c00dee8e7e5e714b9aaa053ac6cbfa27e104abee20d3c778e4bb0e5de" + +inherit autotools gettext texinfo + +EXTRA_OECONF = "--disable-perl-regexp" + +do_configure_prepend () { + rm -f ${S}/m4/init.m4 +} + +do_install () { + autotools_do_install + install -d ${D}${base_bindir} + mv ${D}${bindir}/grep ${D}${base_bindir}/grep + mv ${D}${bindir}/egrep ${D}${base_bindir}/egrep + mv ${D}${bindir}/fgrep ${D}${base_bindir}/fgrep + rmdir ${D}${bindir}/ +} + +inherit update-alternatives + +ALTERNATIVE_PRIORITY = "100" + +ALTERNATIVE_${PN} = "grep egrep fgrep" +ALTERNATIVE_LINK_NAME[grep] = "${base_bindir}/grep" +ALTERNATIVE_LINK_NAME[egrep] = "${base_bindir}/egrep" +ALTERNATIVE_LINK_NAME[fgrep] = "${base_bindir}/fgrep" + +export CONFIG_SHELL="/bin/sh" diff --git a/yocto-poky/meta/recipes-extended/grep/grep_2.5.1a.bb b/yocto-poky/meta/recipes-extended/grep/grep_2.5.1a.bb new file mode 100644 index 000000000..5a2da2832 --- /dev/null +++ b/yocto-poky/meta/recipes-extended/grep/grep_2.5.1a.bb @@ -0,0 +1,56 @@ +SUMMARY = "Pattern matching utilities" +DESCRIPTION = "The GNU versions of commonly used grep utilities. The grep command searches one or more input \ +files for lines containing a match to a specified pattern." +SECTION = "console/utils" +LICENSE = "GPLv2" +LIC_FILES_CHKSUM = "file://COPYING;md5=0636e73ff0215e8d672dc4c32c317bb3" + +PR = "r2" + +SRC_URI = "${GNU_MIRROR}/grep/grep-${PV}.tar.bz2 \ + file://uclibc-fix.patch \ + file://grep_fix_for_automake-1.12.patch \ + file://gettext.patch \ + file://fix64-int-to-pointer.patch \ + file://Makevars \ + file://grep-CVE-2012-5667.patch \ + file://fix-for-texinfo-5.1.patch \ + file://grep-egrep-fgrep-Fix-LSB-NG-cases.patch \ + " + +SRC_URI[md5sum] = "52202fe462770fa6be1bb667bd6cf30c" +SRC_URI[sha256sum] = "38c8a2bb9223d1fb1b10bdd607cf44830afc92fd451ac4cd07619bf92bdd3132" + +inherit autotools gettext texinfo + +EXTRA_OECONF_INCLUDED_REGEX = "--without-included-regex" +EXTRA_OECONF_INCLUDED_REGEX_libc-musl = "--with-included-regex" + +EXTRA_OECONF = "--disable-perl-regexp \ + ${EXTRA_OECONF_INCLUDED_REGEX}" + +CFLAGS += "-D PROTOTYPES" +do_configure_prepend () { + rm -f ${S}/m4/init.m4 + cp -f ${WORKDIR}/Makevars ${S}/po/ +} + +do_install () { + autotools_do_install + install -d ${D}${base_bindir} + mv ${D}${bindir}/grep ${D}${base_bindir}/grep + mv ${D}${bindir}/egrep ${D}${base_bindir}/egrep + mv ${D}${bindir}/fgrep ${D}${base_bindir}/fgrep + rmdir ${D}${bindir}/ +} + +inherit update-alternatives + +ALTERNATIVE_PRIORITY = "100" + +ALTERNATIVE_${PN} = "grep egrep fgrep" +ALTERNATIVE_LINK_NAME[grep] = "${base_bindir}/grep" +ALTERNATIVE_LINK_NAME[egrep] = "${base_bindir}/egrep" +ALTERNATIVE_LINK_NAME[fgrep] = "${base_bindir}/fgrep" + +export CONFIG_SHELL="/bin/sh" |