GNU bug report logs - #23335
[PATCH 1/2] tests: Added two new tests for unexpand from TODO

Previous Next

Package: coreutils;

Reported by: Jonathan Buchanan <jonathan.russ.buchanan <at> gmail.com>

Date: Fri, 22 Apr 2016 01:15:01 UTC

Severity: normal

Tags: patch

To reply to this bug, email your comments to 23335 AT debbugs.gnu.org.

Toggle the display of automated, internal messages from the tracker.

View this report as an mbox folder, status mbox, maintainer mbox


Report forwarded to bug-coreutils <at> gnu.org:
bug#23335; Package coreutils. (Fri, 22 Apr 2016 01:15:01 GMT) Full text and rfc822 format available.

Acknowledgement sent to Jonathan Buchanan <jonathan.russ.buchanan <at> gmail.com>:
New bug report received and forwarded. Copy sent to bug-coreutils <at> gnu.org. (Fri, 22 Apr 2016 01:15:01 GMT) Full text and rfc822 format available.

Message #5 received at submit <at> debbugs.gnu.org (full text, mbox):

From: Jonathan Buchanan <jonathan.russ.buchanan <at> gmail.com>
To: bug-coreutils <at> gnu.org
Cc: Jonathan Buchanan <jonathan.russ.buchanan <at> gmail.com>
Subject: [PATCH 1/2] tests: Added two new tests for unexpand from TODO
Date: Thu, 21 Apr 2016 20:33:35 -0400
* tests/misc/unexpand.pl: Added two tests from TODO that should pass
according to the specification but currently do not pass.
---
 tests/misc/unexpand.pl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/misc/unexpand.pl b/tests/misc/unexpand.pl
index c592c5a..2cd84a1 100755
--- a/tests/misc/unexpand.pl
+++ b/tests/misc/unexpand.pl
@@ -48,6 +48,8 @@ my @Tests =
      ['aa-8', '-a', {IN=> 'w'.' 'x 8 ."y\n"}, {OUT=> "w\t y\n"}],
 
      ['b-1', '-t', '2,4', {IN=> "      ."}, {OUT=>"\t\t  ."}],
+     ['b-2', '-t', '8,9', {IN=> "x\t \t y\n"}, {OUT=>"x\t \t y\n"}],
+     ['b-3', '-t', '5,8', {IN=> "x\t \t y\n"}, {OUT=>"x\ty\n"}],
      # These would infloop prior to textutils-2.0d.
 
      ['infloop-1', '-t', '1,2', {IN=> " \t\t .\n"}, {OUT=>"\t\t\t .\n"}],
-- 
2.8.0





Information forwarded to bug-coreutils <at> gnu.org:
bug#23335; Package coreutils. (Fri, 22 Apr 2016 03:48:02 GMT) Full text and rfc822 format available.

Message #8 received at submit <at> debbugs.gnu.org (full text, mbox):

From: Jonathan Buchanan <jonathan.russ.buchanan <at> gmail.com>
To: bug-coreutils <at> gnu.org
Cc: Jonathan Buchanan <jonathan.russ.buchanan <at> gmail.com>
Subject: [PATCH 2/2] unexpand: Reimplemented the unexpand algorithm to satisfy
 the standard
Date: Thu, 21 Apr 2016 20:33:36 -0400
* TODO: Removed the section detailing how unexpand did
not satisfy the standard.
* src/unexpand.c: Reimplemented the unexpand algorithm. The program
now satisfies the conditions specified in the old TODO.
---
 TODO           |   4 --
 src/unexpand.c | 176 ++++++++++++++++++++++-----------------------------------
 2 files changed, 69 insertions(+), 111 deletions(-)

diff --git a/TODO b/TODO
index de95e5a..dc1a9e2 100644
--- a/TODO
+++ b/TODO
@@ -67,10 +67,6 @@ lib/strftime.c: Since %N is the only format that we need but that
   would expand /%(-_)?\d*N/ to the desired string and then pass the
   resulting string to glibc's strftime.
 
-unexpand: [http://www.opengroup.org/onlinepubs/007908799/xcu/unexpand.html]
-  printf 'x\t \t y\n'|unexpand -t 8,9 should print its input, unmodified.
-  printf 'x\t \t y\n'|unexpand -t 5,8 should print "x\ty\n"
-
 sort: Investigate better sorting algorithms; see Knuth vol. 3.
 
   We tried list merge sort, but it was about 50% slower than the
diff --git a/src/unexpand.c b/src/unexpand.c
index a758756..dcd40de 100644
--- a/src/unexpand.c
+++ b/src/unexpand.c
@@ -303,13 +303,6 @@ unexpand (void)
       /* Input character, or EOF.  */
       int c;
 
-      /* If true, perform translations.  */
-      bool convert = true;
-
-
-      /* The following variables have valid values only when CONVERT
-         is true:  */
-
       /* Column of next input character.  */
       uintmax_t column = 0;
 
@@ -319,127 +312,96 @@ unexpand (void)
       /* Index in TAB_LIST of next tab stop to examine.  */
       size_t tab_index = 0;
 
-      /* If true, the first pending blank came just before a tab stop.  */
-      bool one_blank_before_tab_stop = false;
-
-      /* If true, the previous input character was a blank.  This is
-         initially true, since initial strings of blanks are treated
-         as if the line was preceded by a blank.  */
-      bool prev_blank = true;
-
       /* Number of pending columns of blanks.  */
       size_t pending = 0;
 
-
-      /* Convert a line of text.  */
+      /* If true, the previous input charactar was not a blank.  */
+      bool previous_non_blank = false;
 
       do
         {
           while ((c = getc (fp)) < 0 && (fp = next_file (fp)))
             continue;
 
-          if (convert)
+          if (c < 0)
+            {
+              free (pending_blank);
+              return;
+            }
+
+          /* Update the next tab column */
+          if (next_tab_column <= column)
             {
-              bool blank = !! isblank (c);
+              if (tab_size)
+                next_tab_column = (column + (tab_size - column % tab_size));
+              else
+                if (tab_index < first_free_tab)
+                  next_tab_column = tab_list[tab_index++];
+                else
+                  next_tab_column = -1;
+            }
 
-              if (blank)
+          bool blank = !! isblank (c);
+          if (!blank)
+            {
+              /* If no -a, stop converting once a non-blank is reached.  */
+              if (!convert_entire_line)
+                next_tab_column = -1;
+              if (fwrite (pending_blank, sizeof (char), pending, stdout)
+                  != pending)
+                error (EXIT_FAILURE, errno, _("write error"));
+              pending = 0;
+              if (putchar (c) < 0)
+                error (EXIT_FAILURE, errno, _("write error"));
+              previous_non_blank = true;
+            }
+          else
+            {
+              pending_blank[pending] = c;
+              pending++;
+              /* POSIX says spaces should not precede tabs, so remove spaces
+                 if a tab is found after spaces.  */
+              if (pending_blank[0] != '\t' && c == '\t')
                 {
-                  if (next_tab_column <= column)
+                  pending = 1;
+                  pending_blank[0] = '\t';
+                }
+              if (column + 1 == next_tab_column)
+                {
+                  /* POSIX says single trailing spaces should not be converted
+                     to tabs if they are followed by a non-blank.  */
+                  if (c == ' ' && pending == 1 && previous_non_blank)
                     {
-                      if (tab_size)
-                        next_tab_column =
-                          column + (tab_size - column % tab_size);
+                      previous_non_blank = false;
+                      if ((c = getc (fp)) >= 0)
+                        blank = !! isblank (c);
                       else
-                        while (true)
-                          if (tab_index == first_free_tab)
-                            {
-                              convert = false;
-                              break;
-                            }
-                          else
-                            {
-                              uintmax_t tab = tab_list[tab_index++];
-                              if (column < tab)
-                                {
-                                  next_tab_column = tab;
-                                  break;
-                                }
-                            }
-                    }
-
-                  if (convert)
-                    {
-                      if (next_tab_column < column)
-                        error (EXIT_FAILURE, 0, _("input line is too long"));
-
-                      if (c == '\t')
                         {
-                          column = next_tab_column;
-
-                          if (pending)
-                            pending_blank[0] = '\t';
+                          /* End of file, do not convert to tab.  */
+                          if (putchar (' ') < 0)
+                            error (EXIT_FAILURE, errno, _("write error"));
+                          continue;
                         }
+                      if (!blank)
+                        c = ' ';
                       else
-                        {
-                          column++;
-
-                          if (! (prev_blank && column == next_tab_column))
-                            {
-                              /* It is not yet known whether the pending blanks
-                                 will be replaced by tabs.  */
-                              if (column == next_tab_column)
-                                one_blank_before_tab_stop = true;
-                              pending_blank[pending++] = c;
-                              prev_blank = true;
-                              continue;
-                            }
-
-                          /* Replace the pending blanks by a tab or two.  */
-                          pending_blank[0] = c = '\t';
-                        }
-
-                      /* Discard pending blanks, unless it was a single
-                         blank just before the previous tab stop.  */
-                      pending = one_blank_before_tab_stop;
+                        c = '\t';
+                      if (putchar (c) < 0)
+                        error (EXIT_FAILURE, errno, _("write error"));
+                      column += 1;
+                      pending = 0;
+                      /* Move the position in the file back and continue.  */
+                      fseek (fp, -1, SEEK_CUR);
+                      continue;
                     }
-                }
-              else if (c == '\b')
-                {
-                  /* Go back one column, and force recalculation of the
-                     next tab stop.  */
-                  column -= !!column;
-                  next_tab_column = column;
-                  tab_index -= !!tab_index;
-                }
-              else
-                {
-                  column++;
-                  if (!column)
-                    error (EXIT_FAILURE, 0, _("input line is too long"));
-                }
-
-              if (pending)
-                {
-                  if (pending > 1 && one_blank_before_tab_stop)
-                    pending_blank[0] = '\t';
-                  if (fwrite (pending_blank, 1, pending, stdout) != pending)
-                    error (EXIT_FAILURE, errno, _("write error"));
+                  previous_non_blank = false;
                   pending = 0;
-                  one_blank_before_tab_stop = false;
+                  putchar ('\t');
                 }
-
-              prev_blank = blank;
-              convert &= convert_entire_line || blank;
-            }
-
-          if (c < 0)
-            {
-              free (pending_blank);
-              return;
             }
-
-          if (putchar (c) < 0)
-            error (EXIT_FAILURE, errno, _("write error"));
+          column++;
+          if (!column)
+            error (EXIT_FAILURE, 0, _("input line is too long"));
         }
       while (c != '\n');
     }
-- 
2.8.0





This bug report was last modified 8 years and 14 days ago.

Previous Next


GNU bug tracking system
Copyright (C) 1999 Darren O. Benham, 1997,2003 nCipher Corporation Ltd, 1994-97 Ian Jackson.