Changeset 281688 in webkit


Ignore:
Timestamp:
Aug 27, 2021, 12:15:06 AM (3 years ago)
Author:
[email protected]
Message:

Intl.DateTimeFormat incorrectly parses patterns with 'h' literal
https://bugs.webkit.org/show_bug.cgi?id=229313
rdar://82414310

Reviewed by Ross Kirsling.

JSTests:

  • stress/intl-date-pattern-includes-literal-text.js: Added.

(shouldBe):

Source/JavaScriptCore:

While DateTimeFormat pattern and skeleton can include single-quoted literal texts,
we are not respecting that when parsing them to extract information. As a result,
we are incorrectly extracting hour-cycle information for "fr" locale since it can
include "HH 'h'" pattern text. This patch fixes that by skipping literal text
correctly.

  • runtime/IntlDateTimeFormat.cpp:

(JSC::skipLiteralText):
(JSC::IntlDateTimeFormat::setFormatsFromPattern):
(JSC::IntlDateTimeFormat::hourCycleFromPattern):
(JSC::IntlDateTimeFormat::replaceHourCycleInSkeleton):
(JSC::IntlDateTimeFormat::replaceHourCycleInPattern):

  • runtime/IntlDateTimeFormat.h:
  • runtime/IntlLocale.cpp:

(JSC::IntlLocale::hourCycles):

Location:
trunk
Files:
1 added
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/JSTests/ChangeLog

    r281686 r281688  
     12021-08-26  Yusuke Suzuki  <[email protected]>
     2
     3        Intl.DateTimeFormat incorrectly parses patterns with 'h' literal
     4        https://bugs.webkit.org/show_bug.cgi?id=229313
     5        rdar://82414310
     6
     7        Reviewed by Ross Kirsling.
     8
     9        * stress/intl-date-pattern-includes-literal-text.js: Added.
     10        (shouldBe):
     11
    1122021-08-26  Yusuke Suzuki  <[email protected]>
    213
  • trunk/Source/JavaScriptCore/ChangeLog

    r281686 r281688  
     12021-08-26  Yusuke Suzuki  <[email protected]>
     2
     3        Intl.DateTimeFormat incorrectly parses patterns with 'h' literal
     4        https://bugs.webkit.org/show_bug.cgi?id=229313
     5        rdar://82414310
     6
     7        Reviewed by Ross Kirsling.
     8
     9        While DateTimeFormat pattern and skeleton can include single-quoted literal texts,
     10        we are not respecting that when parsing them to extract information. As a result,
     11        we are incorrectly extracting hour-cycle information for "fr" locale since it can
     12        include "HH 'h'" pattern text. This patch fixes that by skipping literal text
     13        correctly.
     14
     15        * runtime/IntlDateTimeFormat.cpp:
     16        (JSC::skipLiteralText):
     17        (JSC::IntlDateTimeFormat::setFormatsFromPattern):
     18        (JSC::IntlDateTimeFormat::hourCycleFromPattern):
     19        (JSC::IntlDateTimeFormat::replaceHourCycleInSkeleton):
     20        (JSC::IntlDateTimeFormat::replaceHourCycleInPattern):
     21        * runtime/IntlDateTimeFormat.h:
     22        * runtime/IntlLocale.cpp:
     23        (JSC::IntlLocale::hourCycles):
     24
    1252021-08-26  Yusuke Suzuki  <[email protected]>
    226
  • trunk/Source/JavaScriptCore/runtime/IntlDateTimeFormat.cpp

    r281513 r281688  
    307307}
    308308
     309template<typename Container>
     310static inline unsigned skipLiteralText(const Container& container, unsigned start, unsigned length)
     311{
     312    // Skip literal text. We do not recognize '' single quote specially.
     313    // `'ICU''s change'` is `ICU's change` literal text, but even if we split this text into two literal texts,
     314    // we can anyway skip the same thing.
     315    // This function returns the last character index which can be considered as a literal text.
     316    ASSERT(length);
     317    ASSERT(start < length);
     318    ASSERT(container[start] == '\'');
     319    unsigned index = start;
     320    index;
     321    if (!(index < length))
     322        return length - 1;
     323    for (; index < length; index) {
     324        if (container[index] == '\'')
     325            return index;
     326    }
     327    return length - 1;
     328}
     329
    309330void IntlDateTimeFormat::setFormatsFromPattern(const StringView& pattern)
    310331{
    311332    // Get all symbols from the pattern, and set format fields accordingly.
    312333    // http://unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
     334    //
     335    // A date pattern is a character string consisting of two types of elements:
     336    // 1. Pattern fields, which repeat a specific pattern character one or more times.
     337    //    These fields are replaced with date and time data from a calendar when formatting,
     338    //    or used to generate data for a calendar when parsing. Currently, A..Z and a..z are
     339    //    reserved for use as pattern characters (unless they are quoted, see next item).
     340    //    The pattern characters currently defined, and the meaning of different fields
     341    //    lengths for then, are listed in the Date Field Symbol Table below.
     342    // 2. Literal text, which is output as-is when formatting, and must closely match when
     343    //    parsing. Literal text can include:
     344    //      1. Any characters other than A..Z and a..z, including spaces and punctuation.
     345    //      2. Any text between single vertical quotes ('xxxx'), which may include A..Z and
     346    //         a..z as literal text.
     347    //      3. Two adjacent single vertical quotes (''), which represent a literal single quote,
     348    //         either inside or outside quoted text.
    313349    unsigned length = pattern.length();
    314350    for (unsigned i = 0; i < length; i) {
    315         UChar currentCharacter = pattern[i];
     351        auto currentCharacter = pattern[i];
     352
     353        if (currentCharacter == '\'') {
     354            i = skipLiteralText(pattern, i, length);
     355            continue;
     356        }
     357
    316358        if (!isASCIIAlpha(currentCharacter))
    317359            continue;
     
    461503}
    462504
    463 inline IntlDateTimeFormat::HourCycle IntlDateTimeFormat::hourCycleFromPattern(const Vector<UChar, 32>& pattern)
    464 {
    465     for (auto character : pattern) {
     505IntlDateTimeFormat::HourCycle IntlDateTimeFormat::hourCycleFromPattern(const Vector<UChar, 32>& pattern)
     506{
     507    for (unsigned i = 0, length = pattern.size(); i < length; i) {
     508        auto character = pattern[i];
     509
     510        if (character == '\'') {
     511            i = skipLiteralText(pattern, i, length);
     512            continue;
     513        }
     514
    466515        switch (character) {
    467516        case 'K':
     
    480529    if (isHour12)
    481530        skeletonCharacter = 'h';
    482     for (auto& character : skeleton) {
     531    for (unsigned i = 0, length = skeleton.size(); i < length; i) {
     532        auto& character = skeleton[i];
     533
     534        // ICU DateTimeFormat skeleton also has single-quoted literal text.
     535        // https://github.com/unicode-org/icu/blob/main/icu4c/source/i18n/dtptngen.cpp
     536        if (character == '\'') {
     537            i = skipLiteralText(skeleton, i, length);
     538            continue;
     539        }
     540
    483541        switch (character) {
    484542        case 'h':
     
    511569    }
    512570
    513     for (auto& character : pattern) {
     571    for (unsigned i = 0, length = pattern.size(); i < length; i) {
     572        auto& character = pattern[i];
     573
     574        if (character == '\'') {
     575            i = skipLiteralText(pattern, i, length);
     576            continue;
     577        }
     578
    514579        switch (character) {
    515580        case 'K':
  • trunk/Source/JavaScriptCore/runtime/IntlDateTimeFormat.h

    r281371 r281688  
    8484    static IntlDateTimeFormat* unwrapForOldFunctions(JSGlobalObject*, JSValue);
    8585
     86    enum class HourCycle : uint8_t { None, H11, H12, H23, H24 };
     87    static HourCycle hourCycleFromPattern(const Vector<UChar, 32>&);
     88
    8689private:
    8790    IntlDateTimeFormat(VM&, Structure*);
     
    9396    UDateIntervalFormat* createDateIntervalFormatIfNecessary(JSGlobalObject*);
    9497
    95     enum class HourCycle : uint8_t { None, H11, H12, H23, H24 };
    9698    enum class Weekday : uint8_t { None, Narrow, Short, Long };
    9799    enum class Era : uint8_t { None, Narrow, Short, Long };
     
    122124    static HourCycle hourCycleFromSymbol(UChar);
    123125    static HourCycle parseHourCycle(const String&);
    124     static HourCycle hourCycleFromPattern(const Vector<UChar, 32>&);
    125126    static void replaceHourCycleInSkeleton(Vector<UChar, 32>&, bool hour12);
    126127    static void replaceHourCycleInPattern(Vector<UChar, 32>&, HourCycle);
  • trunk/Source/JavaScriptCore/runtime/IntlLocale.cpp

    r281513 r281688  
    2828#include "IntlLocale.h"
    2929
     30#include "IntlDateTimeFormat.h"
    3031#include "IntlObjectInlines.h"
    3132#include "JSCInlines.h"
     
    4142
    4243const ClassInfo IntlLocale::s_info = { "Object", &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(IntlLocale) };
     44
     45namespace IntlLocaleInternal {
     46static constexpr bool verbose = false;
     47}
    4348
    4449IntlLocale* IntlLocale::create(VM& vm, Structure* structure)
     
    644649    }
    645650
    646     for (unsigned i = 0; i < pattern.size(); i) {
    647         UChar currentCharacter = pattern[i];
    648         if (!isASCIIAlpha(currentCharacter))
    649             continue;
    650 
    651         while (i 1 < pattern.size() && pattern[i 1] == currentCharacter)
    652             i;
    653 
    654         switch (currentCharacter) {
    655         case 'h': {
    656             elements.append("h12"_s);
    657             RELEASE_AND_RETURN(scope, createArrayFromStringVector(globalObject, WTFMove(elements)));
    658         }
    659         case 'H': {
    660             elements.append("h23"_s);
    661             RELEASE_AND_RETURN(scope, createArrayFromStringVector(globalObject, WTFMove(elements)));
    662         }
    663         case 'k': {
    664             elements.append("h24"_s);
    665             RELEASE_AND_RETURN(scope, createArrayFromStringVector(globalObject, WTFMove(elements)));
    666         }
    667         case 'K': {
    668             elements.append("h11"_s);
    669             RELEASE_AND_RETURN(scope, createArrayFromStringVector(globalObject, WTFMove(elements)));
    670         }
    671         default:
    672             break;
    673         }
     651    dataLogLnIf(IntlLocaleInternal::verbose, "pattern:(", StringView(pattern.data(), pattern.size()), ")");
     652
     653    switch (IntlDateTimeFormat::hourCycleFromPattern(pattern)) {
     654    case IntlDateTimeFormat::HourCycle::None:
     655        break;
     656    case IntlDateTimeFormat::HourCycle::H11: {
     657        elements.append("h11"_s);
     658        RELEASE_AND_RETURN(scope, createArrayFromStringVector(globalObject, WTFMove(elements)));
     659    }
     660    case IntlDateTimeFormat::HourCycle::H12: {
     661        elements.append("h12"_s);
     662        RELEASE_AND_RETURN(scope, createArrayFromStringVector(globalObject, WTFMove(elements)));
     663    }
     664    case IntlDateTimeFormat::HourCycle::H23: {
     665        elements.append("h23"_s);
     666        RELEASE_AND_RETURN(scope, createArrayFromStringVector(globalObject, WTFMove(elements)));
     667    }
     668    case IntlDateTimeFormat::HourCycle::H24: {
     669        elements.append("h24"_s);
     670        RELEASE_AND_RETURN(scope, createArrayFromStringVector(globalObject, WTFMove(elements)));
     671    }
    674672    }
    675673
Note: See TracChangeset for help on using the changeset viewer.