Package: grep;
Reported by: Zev Weiss <zev <at> bewilderbeest.net>
Date: Thu, 18 Aug 2016 10:51:02 UTC
Severity: normal
Tags: patch
Done: Jim Meyering <jim <at> meyering.net>
Bug is archived. No further changes may be made.
To add a comment to this bug, you must first unarchive it, by sending
a message to control AT debbugs.gnu.org, with unarchive 24260 in the body.
You can then email your comments to 24260 AT debbugs.gnu.org in the normal way.
Toggle the display of automated, internal messages from the tracker.
View this report as an mbox folder, status mbox, maintainer mbox
bug-grep <at> gnu.org
:bug#24260
; Package grep
.
(Thu, 18 Aug 2016 10:51:02 GMT) Full text and rfc822 format available.Zev Weiss <zev <at> bewilderbeest.net>
:bug-grep <at> gnu.org
.
(Thu, 18 Aug 2016 10:51:02 GMT) Full text and rfc822 format available.Message #5 received at submit <at> debbugs.gnu.org (full text, mbox):
From: Zev Weiss <zev <at> bewilderbeest.net> To: bug-grep <at> gnu.org Cc: Zev Weiss <zev <at> bewilderbeest.net> Subject: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c Date: Thu, 18 Aug 2016 05:50:14 -0500
* src/dfa.c: remove global dfa struct. A pointer to a struct dfa is instead added as a parameter to the functions that had been using the global. --- src/dfa.c | 207 +++++++++++++++++++++++++++++--------------------------------- 1 file changed, 98 insertions(+), 109 deletions(-) diff --git a/src/dfa.c b/src/dfa.c index d337bb6..5bd2a92 100644 --- a/src/dfa.c +++ b/src/dfa.c @@ -451,7 +451,7 @@ struct dfa #define ACCEPTS_IN_CONTEXT(prev, curr, state, dfa) \ SUCCEEDS_IN_CONTEXT ((dfa).states[state].constraint, prev, curr) -static void regexp (void); +static void regexp (struct dfa *dfa); /* A table indexed by byte values that contains the corresponding wide character (if any) for that byte. WEOF means the byte is not a @@ -670,16 +670,6 @@ dfa_charclass_index (struct dfa *d, charclass const s) return i; } -/* A pointer to the current dfa is kept here during parsing. */ -static struct dfa *dfa; - -/* Find the index of charclass S in the current DFA, or allocate a new one. */ -static size_t -charclass_index (charclass const s) -{ - return dfa_charclass_index (dfa, s); -} - /* Syntax bits controlling the behavior of the lexical analyzer. */ static reg_syntax_t syntax_bits; static bool syntax_bits_set; @@ -807,7 +797,7 @@ using_utf8 (void) processed more efficiently. */ static bool -using_simple_locale (void) +using_simple_locale (struct dfa *dfa) { /* The native character set is known to be compatible with the C locale. The following test isn't perfect, but it's good @@ -870,7 +860,7 @@ static wint_t wctok; /* Wide character representation of the current of length 1); otherwise set WC to WEOF. If there is no more input, report EOFERR if EOFERR is not null, and return lasttok = END otherwise. */ -# define FETCH_WC(c, wc, eoferr) \ +# define FETCH_WC(dfa, c, wc, eoferr) \ do { \ if (! lexleft) \ { \ @@ -984,7 +974,7 @@ find_pred (const char *str) /* Multibyte character handling sub-routine for lex. Parse a bracket expression and build a struct mb_char_classes. */ static token -parse_bracket_exp (void) +parse_bracket_exp (struct dfa *dfa) { bool invert; int c, c1, c2; @@ -1028,12 +1018,12 @@ parse_bracket_exp (void) work_mbc = NULL; memset (ccl, 0, sizeof ccl); - FETCH_WC (c, wc, _("unbalanced [")); + FETCH_WC (dfa, c, wc, _("unbalanced [")); if (c == '^') { - FETCH_WC (c, wc, _("unbalanced [")); + FETCH_WC (dfa, c, wc, _("unbalanced [")); invert = true; - known_bracket_exp = using_simple_locale (); + known_bracket_exp = using_simple_locale (dfa); } else invert = false; @@ -1050,7 +1040,7 @@ parse_bracket_exp (void) dfa is ever called. */ if (c == '[') { - FETCH_WC (c1, wc1, _("unbalanced [")); + FETCH_WC (dfa, c1, wc1, _("unbalanced [")); if ((c1 == ':' && (syntax_bits & RE_CHAR_CLASSES)) || c1 == '.' || c1 == '=') @@ -1060,7 +1050,7 @@ parse_bracket_exp (void) size_t len = 0; for (;;) { - FETCH_WC (c, wc, _("unbalanced [")); + FETCH_WC (dfa, c, wc, _("unbalanced [")); if ((c == c1 && *lexptr == ']') || lexleft == 0) break; if (len < MAX_BRACKET_STRING_LEN) @@ -1072,7 +1062,7 @@ parse_bracket_exp (void) str[len] = '\0'; /* Fetch bracket. */ - FETCH_WC (c, wc, _("unbalanced [")); + FETCH_WC (dfa, c, wc, _("unbalanced [")); if (c1 == ':') /* Build character class. POSIX allows character classes to match multicharacter collating elements, @@ -1099,7 +1089,7 @@ parse_bracket_exp (void) colon_warning_state |= 8; /* Fetch new lookahead character. */ - FETCH_WC (c1, wc1, _("unbalanced [")); + FETCH_WC (dfa, c1, wc1, _("unbalanced [")); continue; } @@ -1108,15 +1098,15 @@ parse_bracket_exp (void) } if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) - FETCH_WC (c, wc, _("unbalanced [")); + FETCH_WC (dfa, c, wc, _("unbalanced [")); if (c1 == NOTCHAR) - FETCH_WC (c1, wc1, _("unbalanced [")); + FETCH_WC (dfa, c1, wc1, _("unbalanced [")); if (c1 == '-') /* build range characters. */ { - FETCH_WC (c2, wc2, _("unbalanced [")); + FETCH_WC (dfa, c2, wc2, _("unbalanced [")); /* A bracket expression like [a-[.aa.]] matches an unknown set. Treat it like [-a[.aa.]] while parsing it, and @@ -1137,17 +1127,17 @@ parse_bracket_exp (void) else { if (c2 == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) - FETCH_WC (c2, wc2, _("unbalanced [")); + FETCH_WC (dfa, c2, wc2, _("unbalanced [")); colon_warning_state |= 8; - FETCH_WC (c1, wc1, _("unbalanced [")); + FETCH_WC (dfa, c1, wc1, _("unbalanced [")); /* Treat [x-y] as a range if x != y. */ if (wc != wc2 || wc == WEOF) { if (dfa->multibyte) known_bracket_exp = false; - else if (using_simple_locale ()) + else if (using_simple_locale (dfa)) { int ci; for (ci = c; ci <= c2; ci++) @@ -1214,7 +1204,7 @@ parse_bracket_exp (void) if (dfa->multibyte) { work_mbc->invert = invert; - work_mbc->cset = emptyset (ccl) ? -1 : charclass_index (ccl); + work_mbc->cset = emptyset (ccl) ? -1 : dfa_charclass_index (dfa, ccl); return MBCSET; } @@ -1226,7 +1216,7 @@ parse_bracket_exp (void) clrbit ('\n', ccl); } - return CSET + charclass_index (ccl); + return CSET + dfa_charclass_index (dfa, ccl); } #define PUSH_LEX_STATE(s) \ @@ -1244,7 +1234,7 @@ parse_bracket_exp (void) while (false) static token -lex (void) +lex (struct dfa *dfa) { int c, c2; bool backslash = false; @@ -1259,7 +1249,7 @@ lex (void) "if (backslash) ...". */ for (i = 0; i < 2; ++i) { - FETCH_WC (c, wctok, NULL); + FETCH_WC (dfa, c, wctok, NULL); switch (c) { @@ -1472,7 +1462,7 @@ lex (void) if (syntax_bits & RE_DOT_NOT_NULL) clrbit ('\0', ccl); laststart = false; - return lasttok = CSET + charclass_index (ccl); + return lasttok = CSET + dfa_charclass_index (dfa, ccl); case 's': case 'S': @@ -1487,7 +1477,7 @@ lex (void) if (c == 'S') notset (ccl); laststart = false; - return lasttok = CSET + charclass_index (ccl); + return lasttok = CSET + dfa_charclass_index (dfa, ccl); } /* FIXME: see if optimizing this, as is done with ANYCHAR and @@ -1498,7 +1488,7 @@ lex (void) strings, each minus its "already processed" '['. */ PUSH_LEX_STATE (c == 's' ? "[:space:]]" : "^[:space:]]"); - lasttok = parse_bracket_exp (); + lasttok = parse_bracket_exp (dfa); POP_LEX_STATE (); @@ -1519,7 +1509,7 @@ lex (void) if (c == 'W') notset (ccl); laststart = false; - return lasttok = CSET + charclass_index (ccl); + return lasttok = CSET + dfa_charclass_index (dfa, ccl); } /* FIXME: see if optimizing this, as is done with ANYCHAR and @@ -1530,7 +1520,7 @@ lex (void) strings, each minus its "already processed" '['. */ PUSH_LEX_STATE (c == 'w' ? "_[:alnum:]]" : "^_[:alnum:]]"); - lasttok = parse_bracket_exp (); + lasttok = parse_bracket_exp (dfa); POP_LEX_STATE (); @@ -1541,7 +1531,7 @@ lex (void) if (backslash) goto normal_char; laststart = false; - return lasttok = parse_bracket_exp (); + return lasttok = parse_bracket_exp (dfa); default: normal_char: @@ -1555,7 +1545,7 @@ lex (void) { zeroset (ccl); setbit_case_fold_c (c, ccl); - return lasttok = CSET + charclass_index (ccl); + return lasttok = CSET + dfa_charclass_index (dfa, ccl); } return lasttok = c; @@ -1578,7 +1568,7 @@ static size_t depth; /* Current depth of a hypothetical stack dfaanalyze. */ static void -addtok_mb (token t, int mbprop) +addtok_mb (struct dfa *dfa, token t, int mbprop) { if (dfa->talloc == dfa->tindex) { @@ -1618,12 +1608,12 @@ addtok_mb (token t, int mbprop) dfa->depth = depth; } -static void addtok_wc (wint_t wc); +static void addtok_wc (struct dfa *dfa, wint_t wc); /* Add the given token to the parse tree, maintaining the depth count and updating the maximum depth if necessary. */ static void -addtok (token t) +addtok (struct dfa *dfa, token t) { if (dfa->multibyte && t == MBCSET) { @@ -1635,9 +1625,9 @@ addtok (token t) This does not require UTF-8. */ for (i = 0; i < work_mbc->nchars; i++) { - addtok_wc (work_mbc->chars[i]); + addtok_wc (dfa, work_mbc->chars[i]); if (need_or) - addtok (OR); + addtok (dfa, OR); need_or = true; } work_mbc->nchars = 0; @@ -1646,14 +1636,14 @@ addtok (token t) that the mbcset is empty now. Do nothing in that case. */ if (work_mbc->cset != -1) { - addtok (CSET + work_mbc->cset); + addtok (dfa, CSET + work_mbc->cset); if (need_or) - addtok (OR); + addtok (dfa, OR); } } else { - addtok_mb (t, 3); + addtok_mb (dfa, t, 3); } } @@ -1664,7 +1654,7 @@ addtok (token t) <mb1(1st-byte)><mb1(2nd-byte)><CAT><mb1(3rd-byte)><CAT> <mb2(1st-byte)><mb2(2nd-byte)><CAT><mb2(3rd-byte)><CAT><CAT> */ static void -addtok_wc (wint_t wc) +addtok_wc (struct dfa *dfa, wint_t wc) { unsigned char buf[MB_LEN_MAX]; mbstate_t s = { 0 }; @@ -1681,16 +1671,16 @@ addtok_wc (wint_t wc) buf[0] = 0; } - addtok_mb (buf[0], cur_mb_len == 1 ? 3 : 1); + addtok_mb (dfa, buf[0], cur_mb_len == 1 ? 3 : 1); for (i = 1; i < cur_mb_len; i++) { - addtok_mb (buf[i], i == cur_mb_len - 1 ? 2 : 0); - addtok (CAT); + addtok_mb (dfa, buf[i], i == cur_mb_len - 1 ? 2 : 0); + addtok (dfa, CAT); } } static void -add_utf8_anychar (void) +add_utf8_anychar (struct dfa *dfa) { static charclass const utf8_classes[5] = { /* 80-bf: non-leading bytes. */ @@ -1724,7 +1714,7 @@ add_utf8_anychar (void) if (syntax_bits & RE_DOT_NOT_NULL) clrbit ('\0', c); } - dfa->utf8_anychar_classes[i] = CSET + charclass_index (c); + dfa->utf8_anychar_classes[i] = CSET + dfa_charclass_index (dfa, c); } /* A valid UTF-8 character is @@ -1738,12 +1728,12 @@ add_utf8_anychar (void) and you get "B|(C|(D|EA)A)A". And since the token buffer is in reverse Polish notation, you get "B C D E A CAT OR A CAT OR A CAT OR". */ for (i = 1; i < n; i++) - addtok (dfa->utf8_anychar_classes[i]); + addtok (dfa, dfa->utf8_anychar_classes[i]); while (--i > 1) { - addtok (dfa->utf8_anychar_classes[0]); - addtok (CAT); - addtok (OR); + addtok (dfa, dfa->utf8_anychar_classes[0]); + addtok (dfa, CAT); + addtok (dfa, OR); } } @@ -1783,15 +1773,15 @@ add_utf8_anychar (void) The parser builds a parse tree in postfix form in an array of tokens. */ static void -atom (void) +atom (struct dfa *dfa) { if (tok == WCHAR) { if (wctok == WEOF) - addtok (BACKREF); + addtok (dfa, BACKREF); else { - addtok_wc (wctok); + addtok_wc (dfa, wctok); if (case_fold) { @@ -1799,13 +1789,13 @@ atom (void) unsigned int i, n = case_folded_counterparts (wctok, folded); for (i = 0; i < n; i++) { - addtok_wc (folded[i]); - addtok (OR); + addtok_wc (dfa, folded[i]); + addtok (dfa, OR); } } } - tok = lex (); + tok = lex (dfa); } else if (tok == ANYCHAR && using_utf8 ()) { @@ -1816,32 +1806,32 @@ atom (void) it is done above in add_utf8_anychar. So, let's start with UTF-8: it is the most used, and the structure of the encoding makes the correctness more obvious. */ - add_utf8_anychar (); - tok = lex (); + add_utf8_anychar (dfa); + tok = lex (dfa); } else if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD || tok == ANYCHAR || tok == MBCSET || tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD) { - addtok (tok); - tok = lex (); + addtok (dfa, tok); + tok = lex (dfa); } else if (tok == LPAREN) { - tok = lex (); - regexp (); + tok = lex (dfa); + regexp (dfa); if (tok != RPAREN) dfaerror (_("unbalanced (")); - tok = lex (); + tok = lex (dfa); } else - addtok (EMPTY); + addtok (dfa, EMPTY); } /* Return the number of tokens in the given subexpression. */ static size_t _GL_ATTRIBUTE_PURE -nsubtoks (size_t tindex) +nsubtoks (struct dfa *dfa, size_t tindex) { size_t ntoks1; @@ -1852,90 +1842,90 @@ nsubtoks (size_t tindex) case QMARK: case STAR: case PLUS: - return 1 + nsubtoks (tindex - 1); + return 1 + nsubtoks (dfa, tindex - 1); case CAT: case OR: - ntoks1 = nsubtoks (tindex - 1); - return 1 + ntoks1 + nsubtoks (tindex - 1 - ntoks1); + ntoks1 = nsubtoks (dfa, tindex - 1); + return 1 + ntoks1 + nsubtoks (dfa, tindex - 1 - ntoks1); } } /* Copy the given subexpression to the top of the tree. */ static void -copytoks (size_t tindex, size_t ntokens) +copytoks (struct dfa *dfa, size_t tindex, size_t ntokens) { size_t i; if (dfa->multibyte) for (i = 0; i < ntokens; ++i) - addtok_mb (dfa->tokens[tindex + i], dfa->multibyte_prop[tindex + i]); + addtok_mb (dfa, dfa->tokens[tindex + i], dfa->multibyte_prop[tindex + i]); else for (i = 0; i < ntokens; ++i) - addtok_mb (dfa->tokens[tindex + i], 3); + addtok_mb (dfa, dfa->tokens[tindex + i], 3); } static void -closure (void) +closure (struct dfa *dfa) { int i; size_t tindex, ntokens; - atom (); + atom (dfa); while (tok == QMARK || tok == STAR || tok == PLUS || tok == REPMN) if (tok == REPMN && (minrep || maxrep)) { - ntokens = nsubtoks (dfa->tindex); + ntokens = nsubtoks (dfa, dfa->tindex); tindex = dfa->tindex - ntokens; if (maxrep < 0) - addtok (PLUS); + addtok (dfa, PLUS); if (minrep == 0) - addtok (QMARK); + addtok (dfa, QMARK); for (i = 1; i < minrep; ++i) { - copytoks (tindex, ntokens); - addtok (CAT); + copytoks (dfa, tindex, ntokens); + addtok (dfa, CAT); } for (; i < maxrep; ++i) { - copytoks (tindex, ntokens); - addtok (QMARK); - addtok (CAT); + copytoks (dfa, tindex, ntokens); + addtok (dfa, QMARK); + addtok (dfa, CAT); } - tok = lex (); + tok = lex (dfa); } else if (tok == REPMN) { - dfa->tindex -= nsubtoks (dfa->tindex); - tok = lex (); - closure (); + dfa->tindex -= nsubtoks (dfa, dfa->tindex); + tok = lex (dfa); + closure (dfa); } else { - addtok (tok); - tok = lex (); + addtok (dfa, tok); + tok = lex (dfa); } } static void -branch (void) +branch (struct dfa* dfa) { - closure (); + closure (dfa); while (tok != RPAREN && tok != OR && tok >= 0) { - closure (); - addtok (CAT); + closure (dfa); + addtok (dfa, CAT); } } static void -regexp (void) +regexp (struct dfa *dfa) { - branch (); + branch (dfa); while (tok == OR) { - tok = lex (); - branch (); - addtok (OR); + tok = lex (dfa); + branch (dfa); + addtok (dfa, OR); } } @@ -1945,13 +1935,12 @@ regexp (void) static void dfaparse (char const *s, size_t len, struct dfa *d) { - dfa = d; lexptr = s; lexleft = len; lasttok = END; laststart = true; parens = 0; - if (dfa->multibyte) + if (d->multibyte) { cur_mb_len = 0; memset (&d->mbs, 0, sizeof d->mbs); @@ -1960,19 +1949,19 @@ dfaparse (char const *s, size_t len, struct dfa *d) if (!syntax_bits_set) dfaerror (_("no syntax specified")); - tok = lex (); + tok = lex (d); depth = d->depth; - regexp (); + regexp (d); if (tok != END) dfaerror (_("unbalanced )")); - addtok (END - d->nregexps); - addtok (CAT); + addtok (d, END - d->nregexps); + addtok (d, CAT); if (d->nregexps) - addtok (OR); + addtok (d, OR); ++d->nregexps; } -- 2.8.1
bug-grep <at> gnu.org
:bug#24260
; Package grep
.
(Thu, 18 Aug 2016 14:48:01 GMT) Full text and rfc822 format available.Message #8 received at 24260 <at> debbugs.gnu.org (full text, mbox):
From: Jim Meyering <jim <at> meyering.net> To: Zev Weiss <zev <at> bewilderbeest.net> Cc: 24260 <at> debbugs.gnu.org Subject: Re: bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c Date: Thu, 18 Aug 2016 07:46:53 -0700
On Thu, Aug 18, 2016 at 3:50 AM, Zev Weiss <zev <at> bewilderbeest.net> wrote: > * src/dfa.c: remove global dfa struct. A pointer to a struct dfa is > instead added as a parameter to the functions that had been using the > global. Thank you. At first glance, I like it, but saw no "const" attribute on any added parameter declaration. Surely some of those can be "const". Would you make all new dfa parameters const that can be?
bug-grep <at> gnu.org
:bug#24260
; Package grep
.
(Fri, 19 Aug 2016 16:10:01 GMT) Full text and rfc822 format available.Message #11 received at 24260 <at> debbugs.gnu.org (full text, mbox):
From: Norihiro Tanaka <noritnk <at> kcn.ne.jp> To: Zev Weiss <zev <at> bewilderbeest.net> Cc: 24260 <at> debbugs.gnu.org Subject: Re: bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c Date: Sat, 20 Aug 2016 01:09:20 +0900
On Thu, 18 Aug 2016 05:50:14 -0500 Zev Weiss <zev <at> bewilderbeest.net> wrote: > * src/dfa.c: remove global dfa struct. A pointer to a struct dfa is > instead added as a parameter to the functions that had been using the > global. Hi, Why we move global variable DFA into struct dfa, Although only used in dfacomp() which I seem that does not support multithread even after applying your patches? Variables for syntax (lexptr etc.), like. Thanks, Norihiro
bug-grep <at> gnu.org
:bug#24260
; Package grep
.
(Fri, 19 Aug 2016 16:39:01 GMT) Full text and rfc822 format available.Message #14 received at 24260 <at> debbugs.gnu.org (full text, mbox):
From: Jim Meyering <jim <at> meyering.net> To: Zev Weiss <zev <at> bewilderbeest.net> Cc: 24260 <at> debbugs.gnu.org Subject: Re: bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c Date: Fri, 19 Aug 2016 09:37:54 -0700
[Message part 1 (text/plain, inline)]
On Thu, Aug 18, 2016 at 7:46 AM, Jim Meyering <jim <at> meyering.net> wrote: > On Thu, Aug 18, 2016 at 3:50 AM, Zev Weiss <zev <at> bewilderbeest.net> wrote: >> * src/dfa.c: remove global dfa struct. A pointer to a struct dfa is >> instead added as a parameter to the functions that had been using the >> global. > > Thank you. > At first glance, I like it, but saw no "const" attribute on any added > parameter declaration. > Surely some of those can be "const". Would you make all new dfa > parameters const that can be? Hi Zev, I went ahead and did that for you. There were only two: using_simple_locale and nsubtoks. I amended your patch to make those "dfa" parameters "const" and adjusted the commit message to conform to our standards: - use active voice, not passive - enumerate each affected variable/function Yes, it is tedious, but for a project as mature as grep, it is essential. You would not believe the number of times that having the ability to cross-check the prose of a ChangeLog entry with the actual change has led to pre-commit improvements. Now, since I've changed a commit in your name that I expect to push, it is your job to proof-read it to make sure you're still happy to have your name on this modified commit. We treat our git repository as write-once. No non-fast-forward pushes on master, so once pushed, it's there forever. Let me know.
[dfa-remove-file-scoped-global.diff (text/plain, attachment)]
bug-grep <at> gnu.org
:bug#24260
; Package grep
.
(Fri, 19 Aug 2016 21:47:02 GMT) Full text and rfc822 format available.Message #17 received at 24260 <at> debbugs.gnu.org (full text, mbox):
From: Zev Weiss <zev <at> bewilderbeest.net> To: Norihiro Tanaka <noritnk <at> kcn.ne.jp> Cc: 24260 <at> debbugs.gnu.org Subject: Re: bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c Date: Fri, 19 Aug 2016 16:46:16 -0500
On Sat, Aug 20, 2016 at 01:09:20AM +0900, Norihiro Tanaka wrote: > >On Thu, 18 Aug 2016 05:50:14 -0500 >Zev Weiss <zev <at> bewilderbeest.net> wrote: > >> * src/dfa.c: remove global dfa struct. A pointer to a struct dfa is >> instead added as a parameter to the functions that had been using the >> global. > >Hi, > >Why we move global variable DFA into struct dfa, Although only used in >dfacomp() which I seem that does not support multithread even after >applying your patches? Variables for syntax (lexptr etc.), like. > >Thanks, >Norihiro > I'm not sure I understand -- the first patch in my series just removes the global dfa variable and instead passes it as a parameter. This alone doesn't make the whole thing thread-safe, it's just a first step along the way. With the whole series applied though I believe everything should be thread-safe (for example, lexptr is moved into a sub-struct of struct dfa in the second patch in the series). Is there some non-thread-safe code that would remain after applying all six patches in the series? Thanks, Zev
bug-grep <at> gnu.org
:bug#24260
; Package grep
.
(Fri, 19 Aug 2016 21:55:01 GMT) Full text and rfc822 format available.Message #20 received at 24260 <at> debbugs.gnu.org (full text, mbox):
From: Zev Weiss <zev <at> bewilderbeest.net> To: Jim Meyering <jim <at> meyering.net> Cc: 24260 <at> debbugs.gnu.org Subject: Re: bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c Date: Fri, 19 Aug 2016 16:54:37 -0500
On Fri, Aug 19, 2016 at 09:37:54AM -0700, Jim Meyering wrote: >On Thu, Aug 18, 2016 at 7:46 AM, Jim Meyering <jim <at> meyering.net> wrote: >> On Thu, Aug 18, 2016 at 3:50 AM, Zev Weiss <zev <at> bewilderbeest.net> wrote: >>> * src/dfa.c: remove global dfa struct. A pointer to a struct dfa is >>> instead added as a parameter to the functions that had been using the >>> global. >> >> Thank you. >> At first glance, I like it, but saw no "const" attribute on any added >> parameter declaration. >> Surely some of those can be "const". Would you make all new dfa >> parameters const that can be? > >Hi Zev, > >I went ahead and did that for you. >There were only two: using_simple_locale and nsubtoks. >I amended your patch to make those "dfa" parameters "const" >and adjusted the commit message to conform to our standards: > > - use active voice, not passive > - enumerate each affected variable/function > >Yes, it is tedious, but for a project as mature as grep, it is >essential. You would not believe the number of times that having the >ability to cross-check the prose of a ChangeLog entry with the actual >change has led to pre-commit improvements. > >Now, since I've changed a commit in your name that I expect to push, >it is your job to proof-read it to make sure you're still happy to >have your name on this modified commit. We treat our git repository as >write-once. No non-fast-forward pushes on master, so once pushed, it's >there forever. Let me know. Hi Jim, The amended version of the patch looks fine to me, thanks for checking. Zev
bug-grep <at> gnu.org
:bug#24260
; Package grep
.
(Fri, 19 Aug 2016 22:26:02 GMT) Full text and rfc822 format available.Message #23 received at 24260 <at> debbugs.gnu.org (full text, mbox):
From: Norihiro Tanaka <noritnk <at> kcn.ne.jp> To: Zev Weiss <zev <at> bewilderbeest.net> Cc: 24260 <at> debbugs.gnu.org Subject: Re: bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c Date: Sat, 20 Aug 2016 07:25:06 +0900
On Fri, 19 Aug 2016 16:46:16 -0500 Zev Weiss <zev <at> bewilderbeest.net> wrote: > I'm not sure I understand -- the first patch in my series just removes the global dfa variable and instead passes it as a parameter. This alone doesn't make the whole thing thread-safe, it's just a first step along the way. With the whole series applied though I believe everything should be thread-safe (for example, lexptr is moved into a sub-struct of struct dfa in the second patch in the series). > > Is there some non-thread-safe code that would remain after applying all six patches in the series? Hi Zev, Thanks for replying. I say a reverse thing. I believe that there is no problem if only dfaexec() is thread safe. In other words, I think that variables that we must really move to support multipthread are eolbyte, sbit and letters, newline only which depend on eolbyte, and mbrtowc_cache[] and never_trail should be changed as the value does not changed by other thread at running dfaexec() by marking already set, and we can leave other static and global as syntax etc. It will bring us to the saving of memory albeit only slightly. Thanks, Norihiro
bug-grep <at> gnu.org
:bug#24260
; Package grep
.
(Fri, 19 Aug 2016 22:35:02 GMT) Full text and rfc822 format available.Message #26 received at 24260 <at> debbugs.gnu.org (full text, mbox):
From: Norihiro Tanaka <noritnk <at> kcn.ne.jp> To: Zev Weiss <zev <at> bewilderbeest.net> Cc: 24260 <at> debbugs.gnu.org Subject: Re: bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c Date: Sat, 20 Aug 2016 07:34:20 +0900
On Sat, 20 Aug 2016 07:25:06 +0900 Norihiro Tanaka <noritnk <at> kcn.ne.jp> wrote: > Hi Zev, > > Thanks for replying. I say a reverse thing. > > I believe that there is no problem if only dfaexec() is thread safe. In > other words, I think that variables that we must really move to support > multipthread are eolbyte, sbit and letters, newline only which depend > on eolbyte, and mbrtowc_cache[] and never_trail should be changed as the > value does not changed by other thread at running dfaexec() by marking > already set, and we can leave other static and global as syntax etc. > It will bring us to the saving of memory albeit only slightly. Ah, I replied for first patch, but my all comments are for second or later patches. I agree for first patch entirely regardless of whether we support multithread or not.
Jim Meyering <jim <at> meyering.net>
:Zev Weiss <zev <at> bewilderbeest.net>
:Message #31 received at 24260-done <at> debbugs.gnu.org (full text, mbox):
From: Jim Meyering <jim <at> meyering.net> To: Zev Weiss <zev <at> bewilderbeest.net> Cc: 24260-done <at> debbugs.gnu.org Subject: Re: bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c Date: Fri, 19 Aug 2016 15:53:12 -0700
On Fri, Aug 19, 2016 at 2:54 PM, Zev Weiss <zev <at> bewilderbeest.net> wrote: ... > The amended version of the patch looks fine to me, thanks for checking. Pushed.
bug-grep <at> gnu.org
:bug#24260
; Package grep
.
(Fri, 19 Aug 2016 23:04:01 GMT) Full text and rfc822 format available.Message #34 received at 24260 <at> debbugs.gnu.org (full text, mbox):
From: Zev Weiss <zev <at> bewilderbeest.net> To: Norihiro Tanaka <noritnk <at> kcn.ne.jp> Cc: 24260 <at> debbugs.gnu.org Subject: Re: bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c Date: Fri, 19 Aug 2016 18:03:19 -0500
On Sat, Aug 20, 2016 at 07:34:20AM +0900, Norihiro Tanaka wrote: > >On Sat, 20 Aug 2016 07:25:06 +0900 >Norihiro Tanaka <noritnk <at> kcn.ne.jp> wrote: >> Hi Zev, >> >> Thanks for replying. I say a reverse thing. >> >> I believe that there is no problem if only dfaexec() is thread safe. In >> other words, I think that variables that we must really move to support >> multipthread are eolbyte, sbit and letters, newline only which depend >> on eolbyte, and mbrtowc_cache[] and never_trail should be changed as the >> value does not changed by other thread at running dfaexec() by marking >> already set, and we can leave other static and global as syntax etc. >> It will bring us to the saving of memory albeit only slightly. > >Ah, I replied for first patch, but my all comments are for second or >later patches. I agree for first patch entirely regardless of whether >we support multithread or not. > Okay -- so your question is about the necessity of making operations other than dfaexec() thread-safe? That's reasonable, though (obviously) I went ahead made the other operations thread-safe anyway. 1) It was, in some ways, simpler for me. Rather than tracing through exactly when and how each variable was used to determine which could be safely left as globals, it was somewhat easier to just migrate all mutable global state into a context struct of some sort. 2) While it's not a terribly elegant approach (and certainly doesn't have to be this way), my current multithreading patch set actually does compile the regex separately (and concurrently) in each thread (for reasons similar to #1 above -- it was just simpler to do it that way). 3) Finally, and perhaps most compellingly in my opinion, especially since dfa.c is soon going to become a library component, it seems desirable for compilation etc. to be made thread safe anyway. While grep doesn't really *need* to have it be thread-safe (and I'm guessing gawk and sed currently don't either), it doesn't seem all that unreasonable for an application to *want* to concurrently compile independent regexes, so it would seem a bit unfortunate to me for a library (gnulib) to not support that at all. Zev
bug-grep <at> gnu.org
:bug#24260
; Package grep
.
(Sat, 20 Aug 2016 23:42:02 GMT) Full text and rfc822 format available.Message #37 received at 24260 <at> debbugs.gnu.org (full text, mbox):
From: Norihiro Tanaka <noritnk <at> kcn.ne.jp> To: Zev Weiss <zev <at> bewilderbeest.net> Cc: 24260 <at> debbugs.gnu.org Subject: Re: bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c Date: Sun, 21 Aug 2016 08:40:59 +0900
On Fri, 19 Aug 2016 18:03:19 -0500 Zev Weiss <zev <at> bewilderbeest.net> wrote: > Okay -- so your question is about the necessity of making operations other than dfaexec() thread-safe? That's reasonable, though (obviously) I went ahead made the other operations thread-safe anyway. > > 1) It was, in some ways, simpler for me. Rather than tracing through exactly when and how each variable was used to determine which could be safely left as globals, it was somewhat easier to just migrate all mutable global state into a context struct of some sort. > > 2) While it's not a terribly elegant approach (and certainly doesn't have to be this way), my current multithreading patch set actually does compile the regex separately (and concurrently) in each thread (for reasons similar to #1 above -- it was just simpler to do it that way). > > 3) Finally, and perhaps most compellingly in my opinion, especially since dfa.c is soon going to become a library component, it seems desirable for compilation etc. to be made thread safe anyway. While grep doesn't really *need* to have it be thread-safe (and I'm guessing gawk and sed currently don't either), it doesn't seem all that unreasonable for an application to *want* to concurrently compile independent regexes, so it would seem a bit unfortunate to me for a library (gnulib) to not support that at all. > > > Zev Thanks. dfa exists only to improve performance of regex and regex uses static variable for syntax which is re_syntax_options. So we can not still change compile phase thread-safe. In addition, IMHO, I think that compile should run only one time even if grep runs multithread as you say in 2, and I do not expect that members used in only compile not used in execute is generated each more than one. BTW, mbcsets, nmbcsets and mbcsets_alloc use in only compile, so I think that they may be also put out of struct dfa in future. (That isn't always to move them to static or global) Norihiro
bug-grep <at> gnu.org
:bug#24260
; Package grep
.
(Sun, 21 Aug 2016 01:22:02 GMT) Full text and rfc822 format available.Message #40 received at 24260 <at> debbugs.gnu.org (full text, mbox):
From: Zev Weiss <zev <at> bewilderbeest.net> To: Norihiro Tanaka <noritnk <at> kcn.ne.jp> Cc: 24260 <at> debbugs.gnu.org Subject: Re: bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c Date: Sat, 20 Aug 2016 20:21:33 -0500
On Sun, Aug 21, 2016 at 08:40:59AM +0900, Norihiro Tanaka wrote: > >On Fri, 19 Aug 2016 18:03:19 -0500 >Zev Weiss <zev <at> bewilderbeest.net> wrote: > >> Okay -- so your question is about the necessity of making operations other than dfaexec() thread-safe? That's reasonable, though (obviously) I went ahead made the other operations thread-safe anyway. >> >> 1) It was, in some ways, simpler for me. Rather than tracing through exactly when and how each variable was used to determine which could be safely left as globals, it was somewhat easier to just migrate all mutable global state into a context struct of some sort. >> >> 2) While it's not a terribly elegant approach (and certainly doesn't have to be this way), my current multithreading patch set actually does compile the regex separately (and concurrently) in each thread (for reasons similar to #1 above -- it was just simpler to do it that way). >> >> 3) Finally, and perhaps most compellingly in my opinion, especially since dfa.c is soon going to become a library component, it seems desirable for compilation etc. to be made thread safe anyway. While grep doesn't really *need* to have it be thread-safe (and I'm guessing gawk and sed currently don't either), it doesn't seem all that unreasonable for an application to *want* to concurrently compile independent regexes, so it would seem a bit unfortunate to me for a library (gnulib) to not support that at all. >> >> >> Zev > >Thanks. dfa exists only to improve performance of regex and regex uses >static variable for syntax which is re_syntax_options. So we can not >still change compile phase thread-safe. Hmm, I hadn't noticed that previously -- thanks. > >In addition, IMHO, I think that compile should run only one time even >if grep runs multithread as you say in 2, and I do not expect that >members used in only compile not used in execute is generated each more >than one. > Yes, certainly it would be preferable for a multithreaded grep to compile the regex once regardless of the thread count. I've been meaning to implement it that way in my patch set, but haven't quite gotten around to it yet...hopefully soon. Zev
Debbugs Internal Request <help-debbugs <at> gnu.org>
to internal_control <at> debbugs.gnu.org
.
(Sun, 18 Sep 2016 11:24:03 GMT) Full text and rfc822 format available.
GNU bug tracking system
Copyright (C) 1999 Darren O. Benham,
1997,2003 nCipher Corporation Ltd,
1994-97 Ian Jackson.