Skip to content

Commit

Permalink
[External] [stdlib] Allow !r conversion flag in String.format (#4…
Browse files Browse the repository at this point in the history
…3914)

[External] [stdlib] Allow `!r` conversion flag in `String.format`

This addresses #3267.

- Implemented parsing and handling of `!s` and `!r` conversion flags in
format strings.
- Updated the `_FormatCurlyEntry` struct to include a `conversion_flag`
field.
- Modified the `create_entries` method to correctly parse and store
conversion flags.
- Updated the string formatting logic to apply the appropriate
conversion (`str` or `repr`) based on the flag.

Additionally, I was conscious of potential future support of `!a` flag
and `format_spec` , which is not currently supported. The additions of
this PR should be direct to modify for this support. Hence why
`supported_conversion_flags` was added, and a TODO was left in the code
for the latter.

For reference, see [Python's Format String
Syntax](https://docs.python.org/3/library/string.html#format-string-syntax):
```
replacement_field ::=  "{" [field_name] ["!" conversion] [":" format_spec] "}"
field_name        ::=  arg_name ("." attribute_name | "[" element_index "]")*
arg_name          ::=  [identifier | digit ]
attribute_name    ::=  identifier
element_index     ::=  digit  | index_string
index_string      ::=  <any source character except "]">  
conversion        ::=  "r" | "s" | "a"
format_spec       ::=  <see Python docs for details>
```

ORIGINAL_AUTHOR=Joshua James Venter
<67124214 [email protected]>
PUBLIC_PR_LINK=#3279

Co-authored-by: Joshua James Venter <67124214 [email protected]>
Closes #3279
MODULAR_ORIG_COMMIT_REV_ID: 9ee92a55ac683be22f7b93012f37c6980f7110b6
  • Loading branch information
modularbot and jjvraw committed Jul 24, 2024
1 parent 29916e1 commit f13ddcf
Show file tree
Hide file tree
Showing 3 changed files with 218 additions and 24 deletions.
14 changes: 14 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 336,20 @@ future and `StringSlice.__len__` now does return the Unicode codepoints length.
`Int` type.
([PR #3150](https://github.com/modularml/mojo/pull/3150) by [@LJ-9801](https://github.com/LJ-9801))
- `String.format()` now supports conversion flags `!s` and `!r`, allowing for
`str()` and `repr()` conversions within format strings.
([PR #3279](https://github.com/modularml/mojo/pull/3279) by [@jjvraw](https://github.com/jjvraw))
Example:
```mojo
String("{} {!r}").format("Mojo", "Mojo")
# "Mojo 'Mojo'"
String("{0!s} {0!r}").format("Mojo")
# "Mojo 'Mojo'"
```
### 🦋 Changed
- The pointer aliasing semantics of Mojo have changed. Initially, Mojo adopted a
Expand Down
126 changes: 102 additions & 24 deletions stdlib/src/builtin/string.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -1967,7 1967,7 @@ struct String(
)
return String(buf^)

fn format[*Ts: Stringable](self, *args: *Ts) raises -> String:
fn format[*Ts: StringRepresentable](self, *args: *Ts) raises -> String:
"""Format a template with *args.
Example of manual indexing:
Expand Down Expand Up @@ -2022,14 2022,21 @@ struct String(
@parameter
for i in range(num_pos_args):
if i == e[].field[Int]:
res = str(args[i])
if e[].conversion_flag == "r":
res = repr(args[i])
else:
res = str(args[i])

if e[].is_automatic_indexing():

@parameter
for i in range(num_pos_args):
if i == current_automatic_arg_index:
res = str(args[i])
if e[].conversion_flag == "r":
res = repr(args[i])
else:
res = str(args[i])

current_automatic_arg_index = 1

pos_in_self = e[].last_curly 1
Expand Down Expand Up @@ -2285,6 2292,22 @@ fn _calc_format_buffer_size[type: DType]() -> Int:
# ===----------------------------------------------------------------------===#


trait StringRepresentable(Stringable, Representable):
"""The `StringRepresentable` trait denotes a trait composition of the
`Stringable` and `Representable` traits.
This trait is used by the `format()` method to support both `{!s}` (or `{}`)
and `{!r}` format specifiers. It allows the method to handle types that
can be formatted using both their string representation and their
more detailed representation.
Types implementing this trait must provide both `__str__()` and `__repr__()`
methods as defined in `Stringable` and `Representable` traits respectively.
"""

pass


@value
struct _FormatCurlyEntry(CollectionElement, CollectionElementNew):
"""
Expand All @@ -2302,6 2325,9 @@ struct _FormatCurlyEntry(CollectionElement, CollectionElementNew):
var last_curly: Int
"""The index of an closing brace around a substitution field."""

var conversion_flag: String
"""Store the format specifier (e.g., 'r' for repr)."""

alias _FieldVariantType = Variant[
String, # kwargs indexing (`{field_name}`)
Int, # args manual indexing (`{3}`)
Expand All @@ -2314,6 2340,7 @@ struct _FormatCurlyEntry(CollectionElement, CollectionElementNew):
fn __init__(inout self, *, other: Self):
self.first_curly = other.first_curly
self.last_curly = other.last_curly
self.conversion_flag = other.conversion_flag
self.field = Self._FieldVariantType(other=other.field)

fn is_escaped_brace(ref [_]self) -> Bool:
Expand Down Expand Up @@ -2360,6 2387,10 @@ struct _FormatCurlyEntry(CollectionElement, CollectionElementNew):
var raised_manual_index = Optional[Int](None)
var raised_automatic_index = Optional[Int](None)
var raised_kwarg_field = Optional[String](None)
alias supported_conversion_flags = (
String("s"), # __str__
String("r"), # __repr__
)

var entries = List[Self]()
var start = Optional[Int](None)
Expand All @@ -2373,10 2404,13 @@ struct _FormatCurlyEntry(CollectionElement, CollectionElementNew):
# already one there.
if i - start.value() == 1:
# python escapes double curlies
var curren_entry = Self(
first_curly=start.value(), last_curly=i, field=False
var current_entry = Self(
first_curly=start.value(),
last_curly=i,
field=False,
conversion_flag="",
)
entries.append(curren_entry^)
entries.append(current_entry^)
start = None
continue
raise (
Expand All @@ -2389,27 2423,68 @@ struct _FormatCurlyEntry(CollectionElement, CollectionElementNew):
if start:
var start_value = start.value()
var current_entry = Self(
first_curly=start_value, last_curly=i, field=NoneType()
first_curly=start_value,
last_curly=i,
field=NoneType(),
conversion_flag="",
)

if i - start_value != 1:
var field = format_src[start_value 1 : i]
try:
# field is a number for manual indexing:
var number = int(field)
current_entry.field = number
if number >= len_pos_args or number < 0:
raised_manual_index = number
var exclamation_index = field.find("!")

# TODO: Future implementation of format specifiers
# When implementing format specifiers, modify this section to handle:
# replacement_field ::= "{" [field_name] ["!" conversion] [":" format_spec] "}"
# this will involve:
# 1. finding a colon ':' after the conversion flag (if present)
# 2. extracting the format_spec if a colon is found
# 3. adjusting the field and conversion_flag parsing accordingly

if exclamation_index != -1:
if exclamation_index 1 < len(field):
var conversion_flag: String = field[
exclamation_index 1 :
]
if (
conversion_flag
not in supported_conversion_flags
):
raise 'Conversion flag "' conversion_flag '" not recognised.'
current_entry.conversion_flag = conversion_flag
else:
raise "Empty conversion flag."

field = field[:exclamation_index]

if (
field == ""
): # an empty field, so it's automatic indexing
if automatic_indexing_count >= len_pos_args:
raised_automatic_index = (
automatic_indexing_count
)
break
manual_indexing_count = 1
except e:
debug_assert(
"not convertible to integer" in str(e),
"Not the expected error from atol",
)
# field is an keyword for **kwargs:
current_entry.field = field
raised_kwarg_field = field
break
automatic_indexing_count = 1
else:
try:
# field is a number for manual indexing:
var number = int(field)
current_entry.field = number
if number >= len_pos_args or number < 0:
raised_manual_index = number
break
manual_indexing_count = 1
except e:
debug_assert(
"not convertible to integer" in str(e),
"Not the expected error from atol",
)
# field is an keyword for **kwargs:
current_entry.field = field
raised_kwarg_field = field
break

else:
# automatic indexing
# current_entry.field is already None
Expand All @@ -2424,7 2499,10 @@ struct _FormatCurlyEntry(CollectionElement, CollectionElementNew):
if (i 1) < format_src.byte_length():
if format_src[i 1] == "}":
var curren_entry = Self(
first_curly=i, last_curly=i 1, field=True
first_curly=i,
last_curly=i 1,
field=True,
conversion_flag="",
)
entries.append(curren_entry^)
skip_next = True
Expand Down
102 changes: 102 additions & 0 deletions stdlib/test/builtin/test_string.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -1450,6 1450,107 @@ def test_format_args():
)


def test_format_conversion_flags():
assert_equal(String("{!r}").format(""), "''")
var special_str = "a\nb\tc"
assert_equal(
String("{} {!r}").format(special_str, special_str),
"a\nb\tc 'a\\nb\\tc'",
)
assert_equal(
String("{!s} {!r}").format(special_str, special_str),
"a\nb\tc 'a\\nb\\tc'",
)

var a = "Mojo"
assert_equal(String("{} {!r}").format(a, a), "Mojo 'Mojo'")
assert_equal(String("{!s} {!r}").format(a, a), "Mojo 'Mojo'")
assert_equal(String("{0!s} {0!r}").format(a), "Mojo 'Mojo'")

var b = 21.1
assert_true(
"21.100000000000001 SIMD[DType.float64, 1](2"
in String("{} {!r}").format(b, b),
)
assert_true(
"21.100000000000001 SIMD[DType.float64, 1](2"
in String("{!s} {!r}").format(b, b),
)

var c = 1e100
assert_equal(
String("{} {!r}").format(c, c),
"1e 100 SIMD[DType.float64, 1](1.0000000000000000e 100)",
)
assert_equal(
String("{!s} {!r}").format(c, c),
"1e 100 SIMD[DType.float64, 1](1.0000000000000000e 100)",
)

var d = 42
assert_equal(String("{} {!r}").format(d, d), "42 42")
assert_equal(String("{!s} {!r}").format(d, d), "42 42")

assert_true(
"Mojo SIMD[DType.float64, 1](2"
in String("{} {!r} {} {!r}").format(a, b, c, d)
)
assert_true(
"Mojo SIMD[DType.float64, 1](2"
in String("{!s} {!r} {!s} {!r}").format(a, b, c, d)
)

var e = True
assert_equal(String("{} {!r}").format(e, e), "True True")

assert_true(
"Mojo SIMD[DType.float64, 1](2"
in String("{0} {1!r} {2} {3}").format(a, b, c, d)
)
assert_true(
"Mojo SIMD[DType.float64, 1](2"
in String("{0!s} {1!r} {2} {3!s}").format(a, b, c, d)
)

assert_equal(
String("{3} {2} {1} {0}").format(a, d, c, b),
"21.100000000000001 1e 100 42 Mojo",
)

assert_true(
"'Mojo' 42 SIMD[DType.float64, 1](2"
in String("{0!r} {3} {1!r}").format(a, b, c, d)
)

assert_equal(String("{0!s} {0!r}").format(a), "Mojo 'Mojo'")

assert_true(
"True 'Mojo' 42 SIMD[DType.float64, 1](2"
in String("{4} {0!r} {3} {1!r}").format(a, b, c, d, True)
)

with assert_raises(contains='Conversion flag "x" not recognised.'):
_ = String("{!x}").format(1)

with assert_raises(contains="Empty conversion flag."):
_ = String("{!}").format(1)

with assert_raises(contains='Conversion flag "rs" not recognised.'):
_ = String("{!rs}").format(1)

with assert_raises(contains='Conversion flag "r123" not recognised.'):
_ = String("{!r123}").format(1)

with assert_raises(contains='Conversion flag "r!" not recognised.'):
_ = String("{!r!}").format(1)

with assert_raises(contains='Conversion flag "x" not recognised.'):
_ = String("{0!x}").format(1)

with assert_raises(contains='Conversion flag "r:d" not recognised.'):
_ = String("{!r:d}").format(1)


def test_isdigit():
assert_true(isdigit(ord("1")))
assert_false(isdigit(ord("g")))
Expand Down Expand Up @@ -1534,6 1635,7 @@ def main():
test_indexing()
test_string_iter()
test_format_args()
test_format_conversion_flags()
test_isdigit()
test_isprintable()
test_rjust()
Expand Down

0 comments on commit f13ddcf

Please sign in to comment.