Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add a C23 inspired checked integer multiplication helper #198

Merged
merged 1 commit into from
Feb 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 23,7 @@ cc_library(
name = "pcre2",
srcs = [
"src/pcre2_auto_possess.c",
"src/pcre2_ckdint.c",
"src/pcre2_compile.c",
"src/pcre2_config.c",
"src/pcre2_context.c",
Expand Down
8 changes: 8 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 162,13 @@ CHECK_C_SOURCE_COMPILES(

set(ORIG_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror")

CHECK_C_SOURCE_COMPILES(
"#include <sys/types.h>
int main() { int a,b; size_t m; __builtin_mul_overflow(a,b,&m); return 0; }"
HAVE_BUILTIN_MUL_OVERFLOW
)

CHECK_C_SOURCE_COMPILES(
"int main() { char buf[128] __attribute__((uninitialized)); (void)buf; return 0; }"
HAVE_ATTRIBUTE_UNINITIALIZED
Expand Down Expand Up @@ -610,6 617,7 @@ SET(PCRE2_HEADERS ${PROJECT_BINARY_DIR}/pcre2.h)
SET(PCRE2_SOURCES
src/pcre2_auto_possess.c
${PROJECT_BINARY_DIR}/pcre2_chartables.c
src/pcre2_chkdint.c
src/pcre2_compile.c
src/pcre2_config.c
src/pcre2_context.c
Expand Down
1 change: 1 addition & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 353,7 @@ NODIST_SOURCES = src/pcre2_chartables.c

COMMON_SOURCES = \
src/pcre2_auto_possess.c \
src/pcre2_chkdint.c \
src/pcre2_compile.c \
src/pcre2_config.c \
src/pcre2_context.c \
Expand Down
1 change: 1 addition & 0 deletions config-cmake.h.in
Original file line number Diff line number Diff line change
@@ -1,5 1,6 @@
/* config.h for CMake builds */

#cmakedefine HAVE_BUILTIN_MUL_OVERFLOW 1
#cmakedefine HAVE_ATTRIBUTE_UNINITIALIZED 1
#cmakedefine HAVE_DIRENT_H 1
#cmakedefine HAVE_STRERROR 1
Expand Down
26 changes: 26 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 73,32 @@ AC_SYS_LARGEFILE

PCRE2_VISIBILITY

# Check for the mul_overflow() builtin

AC_MSG_CHECKING([for __builtin_mul_overflow()])
AC_LANG_PUSH([C])
tmp_CFLAGS=$CFLAGS
CFLAGS="$CFLAGS -Werror"
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
int a, b;
size_t m;
]], [[__builtin_mul_overflow(a, b, &m)]])],
[pcre2_cc_cv_builtin_mul_overflow=yes],
[pcre2_cc_cv_builtin_mul_overflow=no])
AC_MSG_RESULT([$pcre2_cc_cv_builtin_mul_overflow])
if test "$pcre2_cc_cv_builtin_mul_overflow" = yes; then
AC_DEFINE([HAVE_BUILTIN_MUL_OVERFLOW], 1,
[Define this if your compiler provides __builtin_mul_overflow()])
fi
CFLAGS=$tmp_CFLAGS
AC_LANG_POP([C])

# Check for Clang __attribute__((uninitialized)) feature

AC_MSG_CHECKING([for __attribute__((uninitialized))])
Expand Down
3 changes: 3 additions & 0 deletions src/config.h.generic
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 58,9 @@ sure both macros are undefined; an emulation function will then be used. */
/* Define to 1 if you have the `bcopy' function. */
/* #undef HAVE_BCOPY */

/* Define this if your compiler provides __builtin_mul_overflow() */
/* #undef HAVE_BUILTIN_MUL_OVERFLOW */

/* Define to 1 if you have the <bzlib.h> header file. */
/* #undef HAVE_BZLIB_H */

Expand Down
3 changes: 3 additions & 0 deletions src/config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 58,9 @@ sure both macros are undefined; an emulation function will then be used. */
/* Define to 1 if you have the `bcopy' function. */
#undef HAVE_BCOPY

/* Define this if your compiler provides __builtin_mul_overflow() */
#undef HAVE_BUILTIN_MUL_OVERFLOW

/* Define to 1 if you have the <bzlib.h> header file. */
#undef HAVE_BZLIB_H

Expand Down
96 changes: 96 additions & 0 deletions src/pcre2_chkdint.c
Original file line number Diff line number Diff line change
@@ -0,0 1,96 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/

/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.

Written by Philip Hazel
Copyright (c) 2023 University of Cambridge

-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/

/* This file contains functions to implement checked integer operation */

#ifndef PCRE2_PCRE2TEST
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include "pcre2_internal.h"
#endif

/*************************************************
* Checked Integer Multiplication *
*************************************************/

/*
Arguments:
r A pointer to PCRE2_SIZE to store the answer
a, b Two integers

Returns: Bool indicating if the operation overflows

It is modeled after C23's <stdckdint.h> interface
The INT64_OR_DOUBLE type is a 64-bit integer type when available,
otherwise double. */

BOOL
PRIV(ckd_smul)(PCRE2_SIZE *r, int a, int b)
{
#ifdef HAVE_BUILTIN_MUL_OVERFLOW
PCRE2_SIZE m;

if (__builtin_mul_overflow(a, b, &m)) return TRUE;

*r = m;
#else
INT64_OR_DOUBLE m;

#ifdef PCRE2_DEBUG
if (a < 0 || b < 0) abort();
#endif

m = (INT64_OR_DOUBLE)a * (INT64_OR_DOUBLE)b;

#if defined INT64_MAX || defined int64_t
if (sizeof(m) > sizeof(*r) && m > (INT64_OR_DOUBLE)PCRE2_SIZE_MAX) return TRUE;
*r = (PCRE2_SIZE)m;
#else
if (m > PCRE2_SIZE_MAX) return TRUE;
*r = m;
#endif

#endif

return FALSE;
}

/* End of pcre_chkdint.c */
32 changes: 12 additions & 20 deletions src/pcre2_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -7112,15 7112,12 @@ for (;; pptr )

/* In the pre-compile phase, we don't actually do the replication. We
just adjust the length as if we had. Do some paranoid checks for
potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit
integer type when available, otherwise double. */
potential integer overflow. */

if (lengthptr != NULL)
{
PCRE2_SIZE delta = replicate*(1 LINK_SIZE);
if ((INT64_OR_DOUBLE)replicate*
(INT64_OR_DOUBLE)(1 LINK_SIZE) >
(INT64_OR_DOUBLE)INT_MAX ||
PCRE2_SIZE delta;
if (PRIV(ckd_smul)(&delta, replicate, 1 LINK_SIZE) ||
OFLOW_MAX - *lengthptr < delta)
{
*errorcodeptr = ERR20;
Expand Down Expand Up @@ -7282,15 7279,12 @@ for (;; pptr )
{
/* In the pre-compile phase, we don't actually do the replication.
We just adjust the length as if we had. Do some paranoid checks for
potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit
integer type when available, otherwise double. */
potential integer overflow. */

if (lengthptr != NULL)
{
PCRE2_SIZE delta = (repeat_min - 1)*length_prevgroup;
if ((INT64_OR_DOUBLE)(repeat_min - 1)*
(INT64_OR_DOUBLE)length_prevgroup >
(INT64_OR_DOUBLE)INT_MAX ||
PCRE2_SIZE delta;
if (PRIV(ckd_smul)(&delta, repeat_min - 1, length_prevgroup) ||
OFLOW_MAX - *lengthptr < delta)
{
*errorcodeptr = ERR20;
Expand Down Expand Up @@ -7334,21 7328,19 @@ for (;; pptr )
just adjust the length as if we had. For each repetition we must add
1 to the length for BRAZERO and for all but the last repetition we
must add 2 2*LINKSIZE to allow for the nesting that occurs. Do some
paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type
is a 64-bit integer type when available, otherwise double. */
paranoid checks to avoid integer overflow. */

if (lengthptr != NULL && repeat_max > 0)
{
PCRE2_SIZE delta = repeat_max*(length_prevgroup 1 2 2*LINK_SIZE) -
2 - 2*LINK_SIZE; /* Last one doesn't nest */
if ((INT64_OR_DOUBLE)repeat_max *
(INT64_OR_DOUBLE)(length_prevgroup 1 2 2*LINK_SIZE)
> (INT64_OR_DOUBLE)INT_MAX ||
OFLOW_MAX - *lengthptr < delta)
PCRE2_SIZE delta;
if (PRIV(ckd_smul)(&delta, repeat_max,
length_prevgroup 1 2 2*LINK_SIZE) ||
OFLOW_MAX (2 2*LINK_SIZE) - *lengthptr < delta)
{
*errorcodeptr = ERR20;
return 0;
}
delta -= (2 2*LINK_SIZE); /* Last one doesn't nest */
*lengthptr = delta;
}

Expand Down
7 changes: 5 additions & 2 deletions src/pcre2_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 156,8 @@ pcre2_match() because of the way it backtracks. */
#define PCRE2_SPTR CUSTOM_SUBJECT_PTR
#endif

/* When checking for integer overflow in pcre2_compile(), we need to handle
large integers. If a 64-bit integer type is available, we can use that.
/* When checking for integer overflow, we need to handle large integers.
If a 64-bit integer type is available, we can use that.
Otherwise we have to cast to double, which of course requires floating point
arithmetic. Handle this by defining a macro for the appropriate type. */

Expand Down Expand Up @@ -2042,6 2042,9 @@ extern void * _pcre2_memmove(void *, const void *, size_t);
#endif

#endif /* PCRE2_CODE_UNIT_WIDTH */

extern BOOL PRIV(ckd_smul)(PCRE2_SIZE *, int, int);

#endif /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */

/* End of pcre2_internal.h */
14 changes: 11 additions & 3 deletions src/pcre2test.c
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 272,7 @@ for building the library. */
#include "pcre2_internal.h"

/* We need access to some of the data tables that PCRE2 uses. Defining
PCRE2_PCRETEST makes some minor changes in the files. The previous definition
PCRE2_PCRE2TEST makes some minor changes in the files. The previous definition
of PRIV avoids name clashes. */

#define PCRE2_PCRE2TEST
Expand Down Expand Up @@ -336,6 336,8 @@ these inclusions should not be changed. */

#define PCRE2_SUFFIX(a) a

#include "pcre2_chkdint.c"

/* We need to be able to check input text for UTF-8 validity, whatever code
widths are actually available, because the input to pcre2test is always in
8-bit code units. So we include the UTF validity checking function for 8-bit
Expand Down Expand Up @@ -6836,7 6838,7 @@ if (dbuffer != NULL)
the number of code units that will be needed (though the buffer may have to be
extended if replication is involved). */

needlen = (size_t)((len 1) * code_unit_size);
needlen = (len 1) * code_unit_size;
if (dbuffer == NULL || needlen >= dbuffer_size)
{
while (needlen >= dbuffer_size)
Expand Down Expand Up @@ -6867,6 6869,7 @@ while ((c = *p ) != 0)

if (c == ']' && start_rep != NULL)
{
PCRE2_SIZE d;
long li;
char *endptr;

Expand Down Expand Up @@ -6898,7 6901,12 @@ while ((c = *p ) != 0)
}

replen = CAST8VAR(q) - start_rep;
needlen = replen * i;
if (PRIV(ckd_smul)(&d, replen, i))
{
fprintf(outfile, "** Expanded content too large\n");
return PR_OK;
}
needlen = d;

if (needlen >= dbuffer_size)
{
Expand Down