GNU bug report logs - #14097
date: add parsing support for ISO 8601 basic format

Previous Next

Package: coreutils;

Reported by: Mihai Capotă <mihai <at> mihaic.ro>

Date: Sat, 30 Mar 2013 19:22:02 UTC

Severity: wishlist

Tags: patch

To reply to this bug, email your comments to 14097 AT debbugs.gnu.org.

Toggle the display of automated, internal messages from the tracker.

View this report as an mbox folder, status mbox, maintainer mbox


Report forwarded to bug-coreutils <at> gnu.org:
bug#14097; Package coreutils. (Sat, 30 Mar 2013 19:22:02 GMT) Full text and rfc822 format available.

Acknowledgement sent to Mihai Capotă <mihai <at> mihaic.ro>:
New bug report received and forwarded. Copy sent to bug-coreutils <at> gnu.org. (Sat, 30 Mar 2013 19:22:02 GMT) Full text and rfc822 format available.

Message #5 received at submit <at> debbugs.gnu.org (full text, mbox):

From: Mihai Capotă <mihai <at> mihaic.ro>
To: bug-gnulib <at> gnu.org
Cc: bug-coreutils <at> gnu.org
Subject: [PATCH] Add support for ISO 8601 basic format
Date: Sat, 30 Mar 2013 20:18:13 +0100
The parser now accepts the basic format for combined date and time
representations, which ommits the date and time separators, "-" and ":".

See bug 23767 for GNU coreutils, <https://savannah.gnu.org/bugs/?23767>.

* lib/parse-datetime.y: Parse combined date and time representations in
ISO 8601 basic format.
(set_hhmmss_iso_8601_basic_time) New function.
* tests/test-parse-datetime.c: Add new tests for combined date and time
representations in ISO 8601 basic format.
---
 lib/parse-datetime.y        |   78 +++++++++++++++++++++++++++++++++++++++++--
 tests/test-parse-datetime.c |   61 +++++++++++++++++++++++++++++++++
 2 files changed, 137 insertions(+), 2 deletions(-)

diff --git a/lib/parse-datetime.y b/lib/parse-datetime.y
index 77d95b7..20bf1ac 100644
--- a/lib/parse-datetime.y
+++ b/lib/parse-datetime.y
@@ -282,6 +282,60 @@ set_hhmmss (parser_control *pc, long int hour, long int minutes,
   pc->seconds.tv_nsec = nsec;
 }
 
+/* Set PC-> hour, minutes, seconds and nanoseconds members from ISO 8601 basic
+   time.  */
+static void
+set_hhmmss_iso_8601_basic_time (parser_control *pc, long int integer_part,
+                                long int fractional_part)
+{
+  if (integer_part / 1000000 > 0)
+    {
+      /* not ISO 8601 time, forcing mktime error */
+      pc->hour = 90;
+      pc->minutes = 0;
+      pc->seconds.tv_sec = 0;
+      pc->seconds.tv_nsec = 0;
+    }
+  else
+    {
+      pc->hour = integer_part / 10000;
+      if (pc->hour > 0) /* HHMMSS */
+        {
+          pc->minutes = (integer_part % 10000) / 100;
+          pc->seconds.tv_sec = integer_part % 100;
+          pc->seconds.tv_nsec = fractional_part;
+        }
+      else
+        {
+          if (fractional_part != 0)
+            {
+              /* FIXME support fractional part for minutes and hours */
+              pc->hour = 90;
+              pc->minutes = 0;
+              pc->seconds.tv_sec = 0;
+              pc->seconds.tv_nsec = 0;
+            }
+          else
+            {
+              pc->hour = integer_part / 100;
+              if (pc->hour > 0) /* HHMM */
+                {
+                  pc->minutes = integer_part % 100;
+                  pc->seconds.tv_sec = 0;
+                  pc->seconds.tv_nsec = 0;
+                }
+              else /* HH */
+                {
+                  pc->hour = integer_part;
+                  pc->minutes = 0;
+                  pc->seconds.tv_sec = 0;
+                  pc->seconds.tv_nsec = 0;
+                }
+            }
+        }
+    }
+}
+
 %}
 
 /* We want a reentrant parser, even if the TZ manipulation and the calls to
@@ -290,8 +344,8 @@ set_hhmmss (parser_control *pc, long int hour, long int minutes,
 %parse-param { parser_control *pc }
 %lex-param { parser_control *pc }
 
-/* This grammar has 31 shift/reduce conflicts. */
-%expect 31
+/* This grammar has 34 shift/reduce conflicts. */
+%expect 34
 
 %union
 {
@@ -358,12 +412,18 @@ item:
 
 datetime:
     iso_8601_datetime
+  | iso_8601_basic_datetime
   ;
 
 iso_8601_datetime:
     iso_8601_date 'T' iso_8601_time
   ;
 
+iso_8601_basic_datetime:
+    number 'T' iso_8601_basic_time
+      { pc->dates_seen--; } /* already incremented in digits_to_date_time */
+  ;
+
 time:
     tUNUMBER tMERIDIAN
       {
@@ -401,6 +461,20 @@ iso_8601_time:
       }
   ;
 
+iso_8601_basic_time:
+    tUNUMBER o_zone_offset
+      {
+        set_hhmmss_iso_8601_basic_time (pc, $1.value, 0);
+        pc->meridian = MER24;
+      }
+  | tUDECIMAL_NUMBER o_zone_offset
+      {
+        /* FIXME avoid time_t to long int cast */
+        set_hhmmss_iso_8601_basic_time (pc, (long int)$1.tv_sec, $1.tv_nsec);
+        pc->meridian = MER24;
+      }
+  ;
+
 o_zone_offset:
   /* empty */
   | zone_offset
diff --git a/tests/test-parse-datetime.c b/tests/test-parse-datetime.c
index 7eba9ad..c620009 100644
--- a/tests/test-parse-datetime.c
+++ b/tests/test-parse-datetime.c
@@ -216,6 +216,67 @@ main (int argc _GL_UNUSED, char **argv)
           && expected.tv_nsec == result.tv_nsec);
 
 
+  /* ISO 8601 basic date and time of day representation,
+     'T' separator, local time zone */
+  p = "20110501T115518";
+  expected.tv_sec = ref_time - gmtoff;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+
+  /* ISO 8601 basic date and time of day representation,
+     'T' separator, UTC */
+  p = "20110501T115518Z";
+  expected.tv_sec = ref_time;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+
+  /* ISO 8601 basic date and time of day representation,
+     'T' separator, w/UTC offset */
+  p = "20110501T115518-0700";
+  expected.tv_sec = 1304276118;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+
+  /* ISO 8601 basic date and time of day representation,
+     'T' separator, w/hour only UTC offset */
+  p = "20110501T115518-07";
+  expected.tv_sec = 1304276118;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+
+  /* ISO 8601 basic date and time of day representation,
+     'T' separator, w/hour only UTC offset, with ns */
+  p = "20110501T115518,123456789-07";
+  expected.tv_sec = 1304276118;
+  expected.tv_nsec = 123456789;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+
+  /* Invalid ISO 8601 basic date and time of day representation,
+     too many digits for time */
+  p = "20110501T11551800";
+  ASSERT (!parse_datetime (&result, p, 0));
+
+
   now.tv_sec = 4711;
   now.tv_nsec = 1267;
   p = "now";
-- 
1.7.9.5





Information forwarded to bug-coreutils <at> gnu.org:
bug#14097; Package coreutils. (Mon, 01 Apr 2013 12:55:02 GMT) Full text and rfc822 format available.

Message #8 received at 14097 <at> debbugs.gnu.org (full text, mbox):

From: Eric Blake <eblake <at> redhat.com>
To: Mihai Capotă <mihai <at> mihaic.ro>
Cc: 14097 <at> debbugs.gnu.org, bug-gnulib <at> gnu.org
Subject: Re: bug#14097: [PATCH] Add support for ISO 8601 basic format
Date: Mon, 01 Apr 2013 06:51:15 -0600
[Message part 1 (text/plain, inline)]
On 03/30/2013 01:18 PM, Mihai Capotă wrote:
> The parser now accepts the basic format for combined date and time
> representations, which ommits the date and time separators, "-" and ":".

s/ommits/omits/

> 
> See bug 23767 for GNU coreutils, <https://savannah.gnu.org/bugs/?23767>.
> 
> * lib/parse-datetime.y: Parse combined date and time representations in
> ISO 8601 basic format.
> (set_hhmmss_iso_8601_basic_time) New function.
> * tests/test-parse-datetime.c: Add new tests for combined date and time
> representations in ISO 8601 basic format.
> ---
>  lib/parse-datetime.y        |   78 +++++++++++++++++++++++++++++++++++++++++--
>  tests/test-parse-datetime.c |   61 +++++++++++++++++++++++++++++++++

This patch is non-trivial in size.  I stopped reviewing here; we would
need to have copyright assignment on file to take this patch from you.
Is this still something you are interested in pursuing?

-- 
Eric Blake   eblake redhat com    +1-919-301-3266
Libvirt virtualization library http://libvirt.org

[signature.asc (application/pgp-signature, attachment)]

Information forwarded to bug-coreutils <at> gnu.org:
bug#14097; Package coreutils. (Mon, 01 Apr 2013 13:20:01 GMT) Full text and rfc822 format available.

Message #11 received at 14097 <at> debbugs.gnu.org (full text, mbox):

From: Mihai Capotă <mihai <at> mihaic.ro>
To: Eric Blake <eblake <at> redhat.com>
Cc: 14097 <at> debbugs.gnu.org, bug-gnulib <at> gnu.org
Subject: Re: bug#14097: [PATCH] Add support for ISO 8601 basic format
Date: Mon, 1 Apr 2013 15:16:33 +0200
On Mon, Apr 1, 2013 at 2:51 PM, Eric Blake <eblake <at> redhat.com> wrote:
> This patch is non-trivial in size.  I stopped reviewing here; we would
> need to have copyright assignment on file to take this patch from you.
> Is this still something you are interested in pursuing?

Yes, it is. I will take care of the copyright assignment ASAP.

Mihai




Information forwarded to bug-coreutils <at> gnu.org:
bug#14097; Package coreutils. (Wed, 24 Apr 2013 12:11:01 GMT) Full text and rfc822 format available.

Message #14 received at 14097 <at> debbugs.gnu.org (full text, mbox):

From: Mihai Capotă <mihai <at> mihaic.ro>
To: Eric Blake <eblake <at> redhat.com>
Cc: 14097 <at> debbugs.gnu.org, bug-gnulib <at> gnu.org
Subject: Re: bug#14097: [PATCH] Add support for ISO 8601 basic format
Date: Wed, 24 Apr 2013 14:05:49 +0200
On Mon, Apr 1, 2013 at 2:51 PM, Eric Blake <eblake <at> redhat.com> wrote:
> This patch is non-trivial in size.  I stopped reviewing here; we would
> need to have copyright assignment on file to take this patch from you.
> Is this still something you are interested in pursuing?

I completed the assignment process. Please continue the review.

Mihai




Information forwarded to bug-coreutils <at> gnu.org:
bug#14097; Package coreutils. (Wed, 24 Apr 2013 22:08:02 GMT) Full text and rfc822 format available.

Message #17 received at submit <at> debbugs.gnu.org (full text, mbox):

From: Paul Eggert <eggert <at> cs.ucla.edu>
To: Mihai Capotă <mihai <at> mihaic.ro>
Cc: bug-coreutils <at> gnu.org, bug-gnulib <at> gnu.org
Subject: Re: [PATCH] Add support for ISO 8601 basic format
Date: Wed, 24 Apr 2013 15:02:25 -0700
Thanks for taking this on.  Here is a brief review.
The most important thing is that the patch also needs
to update doc/parse-datetime.texi.  Also, some comments
about the code changes:

On 03/30/13 12:18, Mihai Capotă wrote:
> +      /* not ISO 8601 time, forcing mktime error */
> +      pc->hour = 90;

How does this force a mktime error?  mktime allows tm_hour == 90.

>  datetime:
>      iso_8601_datetime
> +  | iso_8601_basic_datetime
>    ;
>  
>  iso_8601_datetime:
>      iso_8601_date 'T' iso_8601_time
>    ;
>  
> +iso_8601_basic_datetime:
> +    number 'T' iso_8601_basic_time
> +      { pc->dates_seen--; } /* already incremented in digits_to_date_time */

This doesn't look right.  'number' accepts all sort of things that we
would rather not accept here.  Conversely, why require ":" in times to
correlate with "-" in dates?  Shouldn't we accept a "-"less date along
with a ":"ful time, and vice versa?  And that "dates_seen--" business
is a hack; can't we arrange things so that dates_seen is incremented
just once?

> +iso_8601_basic_time:
> +    tUNUMBER o_zone_offset
> +      {
> +        set_hhmmss_iso_8601_basic_time (pc, $1.value, 0);
> +        pc->meridian = MER24;
> +      }
> +  | tUDECIMAL_NUMBER o_zone_offset
> +      {
> +        /* FIXME avoid time_t to long int cast */

Why is the cast needed?  Also, can't the grammar be simplified
here, by using unsigned_seconds instead of using both
tUDECIMAL_NUMBER and tUNUMBER?






Information forwarded to bug-coreutils <at> gnu.org:
bug#14097; Package coreutils. (Mon, 05 Aug 2013 16:52:02 GMT) Full text and rfc822 format available.

Message #20 received at 14097 <at> debbugs.gnu.org (full text, mbox):

From: Mihai Capotă <mihai <at> mihaic.ro>
To: eggert <at> cs.ucla.edu
Cc: 14097 <at> debbugs.gnu.org, bug-gnulib <at> gnu.org
Subject: [PATCH v2] Add support for ISO 8601 basic format
Date: Mon,  5 Aug 2013 18:51:13 +0200
The parser now accepts the basic format for combined date and time
representations, which ommits the date and time separators, "-" and ":".

See bug 23767 for GNU coreutils, <https://savannah.gnu.org/bugs/?23767>.

* lib/parse-datetime.y: Parse combined date and time representations in
ISO 8601 basic format.
(set_hhmmss_iso_8601_basic_time) New function.
(digits_iso_8601_basic_to_date) New function.
* tests/test-parse-datetime.c: Add tests for combined date and time
representations in ISO 8601 basic format.
* doc/parse-datetime.texi Document support for combined date and time
representations in ISO 8601 basic format.

Signed-off-by: Mihai Capotă <mihai <at> mihaic.ro>
---
On Thu, Apr 25, 2013 at 12:02 AM, Paul Eggert <eggert <at> cs.ucla.edu> wrote:
> The most important thing is that the patch also needs
> to update doc/parse-datetime.texi.

Done.

> On 03/30/13 12:18, Mihai Capotă wrote:
>> +      /* not ISO 8601 time, forcing mktime error */
>> +      pc->hour = 90;
>
> How does this force a mktime error?  mktime allows tm_hour == 90.

I meant to say mktime_ok. I changed the code to reject input by incrementing times_seen, like time_zone_hhmm.

>>  datetime:
>>      iso_8601_datetime
>> +  | iso_8601_basic_datetime
>>    ;
>>
>>  iso_8601_datetime:
>>      iso_8601_date 'T' iso_8601_time
>>    ;
>>
>> +iso_8601_basic_datetime:
>> +    number 'T' iso_8601_basic_time
>> +      { pc->dates_seen--; } /* already incremented in digits_to_date_time */
>
> This doesn't look right.  'number' accepts all sort of things that we
> would rather not accept here.

I was trying to make use of the existing digits_to_date_time function. I replaced it with tUNUMBER and a new function.

> Conversely, why require ":" in times to
> correlate with "-" in dates?  Shouldn't we accept a "-"less date along
> with a ":"ful time, and vice versa?

No, that is not allowed by the standard.

> And that "dates_seen--" business
> is a hack; can't we arrange things so that dates_seen is incremented
> just once?

The hack is gone.

>> +iso_8601_basic_time:
>> +    tUNUMBER o_zone_offset
>> +      {
>> +        set_hhmmss_iso_8601_basic_time (pc, $1.value, 0);
>> +        pc->meridian = MER24;
>> +      }
>> +  | tUDECIMAL_NUMBER o_zone_offset
>> +      {
>> +        /* FIXME avoid time_t to long int cast */
>
> Why is the cast needed?  Also, can't the grammar be simplified
> here, by using unsigned_seconds instead of using both
> tUDECIMAL_NUMBER and tUNUMBER?

I switched to using unsigned_seconds.

 doc/parse-datetime.texi     |    9 +++++-
 lib/parse-datetime.y        |   68 +++++++++++++++++++++++++++++++++++++++++--
 tests/test-parse-datetime.c |   61 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 135 insertions(+), 3 deletions(-)

diff --git a/doc/parse-datetime.texi b/doc/parse-datetime.texi
index 6b3e973..9aa87ed 100644
--- a/doc/parse-datetime.texi
+++ b/doc/parse-datetime.texi
@@ -327,7 +327,12 @@ The ISO 8601 date and time of day extended format consists of an ISO
 day.  This format is also recognized if the @samp{T} is replaced by a
 space.
 
-In this format, the time of day should use 24-hour notation.
+The ISO 8601 basic format is also recognized. It is identical to the ISO 8601
+extended format, except for omitting the @samp{-} separator in the date and the
+@samp{:} separator in the time. Only the HHMMSS format is supported for the
+time of day, the reduced accuracy HHMM and HH formats are not supported.
+
+In these formats, the time of day should use 24-hour notation.
 Fractional seconds are allowed, with either comma or period preceding
 the fraction.  ISO 8601 fractional minutes and hours are not
 supported.  Typically, hosts support nanosecond timestamp resolution;
@@ -339,6 +344,8 @@ Here are some examples:
 2012-09-24T20:02:00.052-0500
 2012-12-31T23:59:59,999999999+1100
 1970-01-01 00:00Z
+20120924T200200.052-0500
+20121231T235959,999999999+1100
 @end example
 
 @node Day of week items
diff --git a/lib/parse-datetime.y b/lib/parse-datetime.y
index 4dce7fa..fa9719d 100644
--- a/lib/parse-datetime.y
+++ b/lib/parse-datetime.y
@@ -257,6 +257,38 @@ digits_to_date_time (parser_control *pc, textint text_int)
     }
 }
 
+/* Extract into *PC the date info from a string of digits in ISO 8601 basic
+   format, i.e., YYYYMMHH, YYYY, or YY meaning century. Note that YYYYMM is not
+   allowed to avoid confusion with YYMMHH  */
+static void
+digits_iso_8601_basic_to_date (parser_control *pc, textint text_int)
+{
+  switch (text_int.digits)
+    {
+    case 8:
+      pc->day = text_int.value % 100;
+      pc->month = (text_int.value / 100) % 100;
+      pc->year.value = text_int.value / 10000;
+      pc->year.digits = 4;
+      return;
+    case 4:
+      pc->day = 1;
+      pc->month = 1;
+      pc->year.value = text_int.value;
+      pc->year.digits = 4;
+      return;
+    case 2:
+      pc->day = 1;
+      pc->month = 1;
+      pc->year.value = text_int.value * 100;
+      pc->year.digits = 4;
+      return;
+    default:
+      pc->dates_seen++;
+      return;
+    }
+}
+
 /* Increment PC->rel by FACTOR * REL (FACTOR is 1 or -1).  */
 static void
 apply_relative_time (parser_control *pc, relative_time rel, int factor)
@@ -282,6 +314,28 @@ set_hhmmss (parser_control *pc, long int hour, long int minutes,
   pc->seconds.tv_nsec = nsec;
 }
 
+/* Set PC-> hour, minutes, seconds and nanoseconds members from ISO 8601 basic
+   time.  */
+static void
+set_hhmmss_iso_8601_basic_time (parser_control *pc, time_t integer_part,
+                                long int fractional_part)
+{
+  if (integer_part / 1000000 > 0)
+    {
+      /* Not ISO 8601 time, arrange to reject it by incrementing
+         pc->times_seen.*/
+      pc->times_seen++;
+    }
+  else
+    {
+      /* FIXME support reduced accuracy times, i.e. HHMM and HH */
+      pc->hour = integer_part / 10000;
+      pc->minutes = (integer_part % 10000) / 100;
+      pc->seconds.tv_sec = integer_part % 100;
+      pc->seconds.tv_nsec = fractional_part;
+    }
+}
+
 %}
 
 /* We want a reentrant parser, even if the TZ manipulation and the calls to
@@ -290,8 +344,8 @@ set_hhmmss (parser_control *pc, long int hour, long int minutes,
 %parse-param { parser_control *pc }
 %lex-param { parser_control *pc }
 
-/* This grammar has 31 shift/reduce conflicts. */
-%expect 31
+/* This grammar has 33 shift/reduce conflicts. */
+%expect 33
 
 %union
 {
@@ -358,12 +412,22 @@ item:
 
 datetime:
     iso_8601_datetime
+  | iso_8601_basic_datetime
   ;
 
 iso_8601_datetime:
     iso_8601_date 'T' iso_8601_time
   ;
 
+iso_8601_basic_datetime:
+    tUNUMBER 'T' unsigned_seconds o_zone_offset
+      {
+        digits_iso_8601_basic_to_date (pc, $1);
+        set_hhmmss_iso_8601_basic_time (pc, $3.tv_sec, $3.tv_nsec);
+        pc->meridian = MER24;
+      }
+  ;
+
 time:
     tUNUMBER tMERIDIAN
       {
diff --git a/tests/test-parse-datetime.c b/tests/test-parse-datetime.c
index 7eba9ad..c620009 100644
--- a/tests/test-parse-datetime.c
+++ b/tests/test-parse-datetime.c
@@ -216,6 +216,67 @@ main (int argc _GL_UNUSED, char **argv)
           && expected.tv_nsec == result.tv_nsec);
 
 
+  /* ISO 8601 basic date and time of day representation,
+     'T' separator, local time zone */
+  p = "20110501T115518";
+  expected.tv_sec = ref_time - gmtoff;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+
+  /* ISO 8601 basic date and time of day representation,
+     'T' separator, UTC */
+  p = "20110501T115518Z";
+  expected.tv_sec = ref_time;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+
+  /* ISO 8601 basic date and time of day representation,
+     'T' separator, w/UTC offset */
+  p = "20110501T115518-0700";
+  expected.tv_sec = 1304276118;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+
+  /* ISO 8601 basic date and time of day representation,
+     'T' separator, w/hour only UTC offset */
+  p = "20110501T115518-07";
+  expected.tv_sec = 1304276118;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+
+  /* ISO 8601 basic date and time of day representation,
+     'T' separator, w/hour only UTC offset, with ns */
+  p = "20110501T115518,123456789-07";
+  expected.tv_sec = 1304276118;
+  expected.tv_nsec = 123456789;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+
+  /* Invalid ISO 8601 basic date and time of day representation,
+     too many digits for time */
+  p = "20110501T11551800";
+  ASSERT (!parse_datetime (&result, p, 0));
+
+
   now.tv_sec = 4711;
   now.tv_nsec = 1267;
   p = "now";
-- 
1.7.9.5





Information forwarded to bug-coreutils <at> gnu.org:
bug#14097; Package coreutils. (Sat, 31 Aug 2013 15:32:02 GMT) Full text and rfc822 format available.

Message #23 received at 14097 <at> debbugs.gnu.org (full text, mbox):

From: Mihai Capotă <mihai <at> mihaic.ro>
To: eggert <at> cs.ucla.edu
Cc: 14097 <at> debbugs.gnu.org
Subject: Re: [PATCH v2] Add support for ISO 8601 basic format
Date: Sat, 31 Aug 2013 17:31:08 +0200
Could someone please review the new patch?

On Mon, Aug 5, 2013 at 6:51 PM, Mihai Capotă <mihai <at> mihaic.ro> wrote:
> The parser now accepts the basic format for combined date and time
> representations, which ommits the date and time separators, "-" and ":".
>
> See bug 23767 for GNU coreutils, <https://savannah.gnu.org/bugs/?23767>.
>
> * lib/parse-datetime.y: Parse combined date and time representations in
> ISO 8601 basic format.
> (set_hhmmss_iso_8601_basic_time) New function.
> (digits_iso_8601_basic_to_date) New function.
> * tests/test-parse-datetime.c: Add tests for combined date and time
> representations in ISO 8601 basic format.
> * doc/parse-datetime.texi Document support for combined date and time
> representations in ISO 8601 basic format.
>
> Signed-off-by: Mihai Capotă <mihai <at> mihaic.ro>
> ---
> On Thu, Apr 25, 2013 at 12:02 AM, Paul Eggert <eggert <at> cs.ucla.edu> wrote:
>> The most important thing is that the patch also needs
>> to update doc/parse-datetime.texi.
>
> Done.
>
>> On 03/30/13 12:18, Mihai Capotă wrote:
>>> +      /* not ISO 8601 time, forcing mktime error */
>>> +      pc->hour = 90;
>>
>> How does this force a mktime error?  mktime allows tm_hour == 90.
>
> I meant to say mktime_ok. I changed the code to reject input by incrementing times_seen, like time_zone_hhmm.
>
>>>  datetime:
>>>      iso_8601_datetime
>>> +  | iso_8601_basic_datetime
>>>    ;
>>>
>>>  iso_8601_datetime:
>>>      iso_8601_date 'T' iso_8601_time
>>>    ;
>>>
>>> +iso_8601_basic_datetime:
>>> +    number 'T' iso_8601_basic_time
>>> +      { pc->dates_seen--; } /* already incremented in digits_to_date_time */
>>
>> This doesn't look right.  'number' accepts all sort of things that we
>> would rather not accept here.
>
> I was trying to make use of the existing digits_to_date_time function. I replaced it with tUNUMBER and a new function.
>
>> Conversely, why require ":" in times to
>> correlate with "-" in dates?  Shouldn't we accept a "-"less date along
>> with a ":"ful time, and vice versa?
>
> No, that is not allowed by the standard.
>
>> And that "dates_seen--" business
>> is a hack; can't we arrange things so that dates_seen is incremented
>> just once?
>
> The hack is gone.
>
>>> +iso_8601_basic_time:
>>> +    tUNUMBER o_zone_offset
>>> +      {
>>> +        set_hhmmss_iso_8601_basic_time (pc, $1.value, 0);
>>> +        pc->meridian = MER24;
>>> +      }
>>> +  | tUDECIMAL_NUMBER o_zone_offset
>>> +      {
>>> +        /* FIXME avoid time_t to long int cast */
>>
>> Why is the cast needed?  Also, can't the grammar be simplified
>> here, by using unsigned_seconds instead of using both
>> tUDECIMAL_NUMBER and tUNUMBER?
>
> I switched to using unsigned_seconds.
>
>  doc/parse-datetime.texi     |    9 +++++-
>  lib/parse-datetime.y        |   68 +++++++++++++++++++++++++++++++++++++++++--
>  tests/test-parse-datetime.c |   61 ++++++++++++++++++++++++++++++++++++++
>  3 files changed, 135 insertions(+), 3 deletions(-)
>
> diff --git a/doc/parse-datetime.texi b/doc/parse-datetime.texi
> index 6b3e973..9aa87ed 100644
> --- a/doc/parse-datetime.texi
> +++ b/doc/parse-datetime.texi
> @@ -327,7 +327,12 @@ The ISO 8601 date and time of day extended format consists of an ISO
>  day.  This format is also recognized if the @samp{T} is replaced by a
>  space.
>
> -In this format, the time of day should use 24-hour notation.
> +The ISO 8601 basic format is also recognized. It is identical to the ISO 8601
> +extended format, except for omitting the @samp{-} separator in the date and the
> +@samp{:} separator in the time. Only the HHMMSS format is supported for the
> +time of day, the reduced accuracy HHMM and HH formats are not supported.
> +
> +In these formats, the time of day should use 24-hour notation.
>  Fractional seconds are allowed, with either comma or period preceding
>  the fraction.  ISO 8601 fractional minutes and hours are not
>  supported.  Typically, hosts support nanosecond timestamp resolution;
> @@ -339,6 +344,8 @@ Here are some examples:
>  2012-09-24T20:02:00.052-0500
>  2012-12-31T23:59:59,999999999+1100
>  1970-01-01 00:00Z
> +20120924T200200.052-0500
> +20121231T235959,999999999+1100
>  @end example
>
>  @node Day of week items
> diff --git a/lib/parse-datetime.y b/lib/parse-datetime.y
> index 4dce7fa..fa9719d 100644
> --- a/lib/parse-datetime.y
> +++ b/lib/parse-datetime.y
> @@ -257,6 +257,38 @@ digits_to_date_time (parser_control *pc, textint text_int)
>      }
>  }
>
> +/* Extract into *PC the date info from a string of digits in ISO 8601 basic
> +   format, i.e., YYYYMMHH, YYYY, or YY meaning century. Note that YYYYMM is not
> +   allowed to avoid confusion with YYMMHH  */
> +static void
> +digits_iso_8601_basic_to_date (parser_control *pc, textint text_int)
> +{
> +  switch (text_int.digits)
> +    {
> +    case 8:
> +      pc->day = text_int.value % 100;
> +      pc->month = (text_int.value / 100) % 100;
> +      pc->year.value = text_int.value / 10000;
> +      pc->year.digits = 4;
> +      return;
> +    case 4:
> +      pc->day = 1;
> +      pc->month = 1;
> +      pc->year.value = text_int.value;
> +      pc->year.digits = 4;
> +      return;
> +    case 2:
> +      pc->day = 1;
> +      pc->month = 1;
> +      pc->year.value = text_int.value * 100;
> +      pc->year.digits = 4;
> +      return;
> +    default:
> +      pc->dates_seen++;
> +      return;
> +    }
> +}
> +
>  /* Increment PC->rel by FACTOR * REL (FACTOR is 1 or -1).  */
>  static void
>  apply_relative_time (parser_control *pc, relative_time rel, int factor)
> @@ -282,6 +314,28 @@ set_hhmmss (parser_control *pc, long int hour, long int minutes,
>    pc->seconds.tv_nsec = nsec;
>  }
>
> +/* Set PC-> hour, minutes, seconds and nanoseconds members from ISO 8601 basic
> +   time.  */
> +static void
> +set_hhmmss_iso_8601_basic_time (parser_control *pc, time_t integer_part,
> +                                long int fractional_part)
> +{
> +  if (integer_part / 1000000 > 0)
> +    {
> +      /* Not ISO 8601 time, arrange to reject it by incrementing
> +         pc->times_seen.*/
> +      pc->times_seen++;
> +    }
> +  else
> +    {
> +      /* FIXME support reduced accuracy times, i.e. HHMM and HH */
> +      pc->hour = integer_part / 10000;
> +      pc->minutes = (integer_part % 10000) / 100;
> +      pc->seconds.tv_sec = integer_part % 100;
> +      pc->seconds.tv_nsec = fractional_part;
> +    }
> +}
> +
>  %}
>
>  /* We want a reentrant parser, even if the TZ manipulation and the calls to
> @@ -290,8 +344,8 @@ set_hhmmss (parser_control *pc, long int hour, long int minutes,
>  %parse-param { parser_control *pc }
>  %lex-param { parser_control *pc }
>
> -/* This grammar has 31 shift/reduce conflicts. */
> -%expect 31
> +/* This grammar has 33 shift/reduce conflicts. */
> +%expect 33
>
>  %union
>  {
> @@ -358,12 +412,22 @@ item:
>
>  datetime:
>      iso_8601_datetime
> +  | iso_8601_basic_datetime
>    ;
>
>  iso_8601_datetime:
>      iso_8601_date 'T' iso_8601_time
>    ;
>
> +iso_8601_basic_datetime:
> +    tUNUMBER 'T' unsigned_seconds o_zone_offset
> +      {
> +        digits_iso_8601_basic_to_date (pc, $1);
> +        set_hhmmss_iso_8601_basic_time (pc, $3.tv_sec, $3.tv_nsec);
> +        pc->meridian = MER24;
> +      }
> +  ;
> +
>  time:
>      tUNUMBER tMERIDIAN
>        {
> diff --git a/tests/test-parse-datetime.c b/tests/test-parse-datetime.c
> index 7eba9ad..c620009 100644
> --- a/tests/test-parse-datetime.c
> +++ b/tests/test-parse-datetime.c
> @@ -216,6 +216,67 @@ main (int argc _GL_UNUSED, char **argv)
>            && expected.tv_nsec == result.tv_nsec);
>
>
> +  /* ISO 8601 basic date and time of day representation,
> +     'T' separator, local time zone */
> +  p = "20110501T115518";
> +  expected.tv_sec = ref_time - gmtoff;
> +  expected.tv_nsec = 0;
> +  ASSERT (parse_datetime (&result, p, 0));
> +  LOG (p, expected, result);
> +  ASSERT (expected.tv_sec == result.tv_sec
> +          && expected.tv_nsec == result.tv_nsec);
> +
> +
> +  /* ISO 8601 basic date and time of day representation,
> +     'T' separator, UTC */
> +  p = "20110501T115518Z";
> +  expected.tv_sec = ref_time;
> +  expected.tv_nsec = 0;
> +  ASSERT (parse_datetime (&result, p, 0));
> +  LOG (p, expected, result);
> +  ASSERT (expected.tv_sec == result.tv_sec
> +          && expected.tv_nsec == result.tv_nsec);
> +
> +
> +  /* ISO 8601 basic date and time of day representation,
> +     'T' separator, w/UTC offset */
> +  p = "20110501T115518-0700";
> +  expected.tv_sec = 1304276118;
> +  expected.tv_nsec = 0;
> +  ASSERT (parse_datetime (&result, p, 0));
> +  LOG (p, expected, result);
> +  ASSERT (expected.tv_sec == result.tv_sec
> +          && expected.tv_nsec == result.tv_nsec);
> +
> +
> +  /* ISO 8601 basic date and time of day representation,
> +     'T' separator, w/hour only UTC offset */
> +  p = "20110501T115518-07";
> +  expected.tv_sec = 1304276118;
> +  expected.tv_nsec = 0;
> +  ASSERT (parse_datetime (&result, p, 0));
> +  LOG (p, expected, result);
> +  ASSERT (expected.tv_sec == result.tv_sec
> +          && expected.tv_nsec == result.tv_nsec);
> +
> +
> +  /* ISO 8601 basic date and time of day representation,
> +     'T' separator, w/hour only UTC offset, with ns */
> +  p = "20110501T115518,123456789-07";
> +  expected.tv_sec = 1304276118;
> +  expected.tv_nsec = 123456789;
> +  ASSERT (parse_datetime (&result, p, 0));
> +  LOG (p, expected, result);
> +  ASSERT (expected.tv_sec == result.tv_sec
> +          && expected.tv_nsec == result.tv_nsec);
> +
> +
> +  /* Invalid ISO 8601 basic date and time of day representation,
> +     too many digits for time */
> +  p = "20110501T11551800";
> +  ASSERT (!parse_datetime (&result, p, 0));
> +
> +
>    now.tv_sec = 4711;
>    now.tv_nsec = 1267;
>    p = "now";
> --
> 1.7.9.5
>




Information forwarded to bug-coreutils <at> gnu.org:
bug#14097; Package coreutils. (Fri, 22 Nov 2013 08:51:02 GMT) Full text and rfc822 format available.

Message #26 received at 14097 <at> debbugs.gnu.org (full text, mbox):

From: Mihai Capotă <mihai <at> mihaic.ro>
To: eggert <at> cs.ucla.edu
Cc: 14097 <at> debbugs.gnu.org
Subject: Re: [PATCH v2] Add support for ISO 8601 basic format
Date: Fri, 22 Nov 2013 09:50:08 +0100
On Sat, Aug 31, 2013 at 5:31 PM, Mihai Capotă <mihai <at> mihaic.ro> wrote:
> Could someone please review the new patch?

Anybody?




Severity set to 'wishlist' from 'normal' Request was from Assaf Gordon <assafgordon <at> gmail.com> to control <at> debbugs.gnu.org. (Fri, 19 Oct 2018 01:29:02 GMT) Full text and rfc822 format available.

Changed bug title to 'date: add parsing support for ISO 8601 basic format' from '[PATCH] Add support for ISO 8601 basic format' Request was from Assaf Gordon <assafgordon <at> gmail.com> to control <at> debbugs.gnu.org. (Fri, 19 Oct 2018 01:29:02 GMT) Full text and rfc822 format available.

This bug report was last modified 5 years and 163 days ago.

Previous Next


GNU bug tracking system
Copyright (C) 1999 Darren O. Benham, 1997,2003 nCipher Corporation Ltd, 1994-97 Ian Jackson.