Fix typos and spacing in spec and samples. (#406)

This commit is contained in:
Yahui Sun 2020-06-30 14:46:22 -05:00 коммит произвёл GitHub
Родитель 9df9af5a5f
Коммит caff1bb5ad
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
4 изменённых файлов: 205 добавлений и 205 удалений

Просмотреть файл

@ -7,11 +7,11 @@
// clang -o string-helpers string-helper.c // clang -o string-helpers string-helper.c
// On Windows use: // On Windows use:
// clang -o string-helpers.exe string-helpers.c // clang -o string-helpers.exe string-helpers.c
// //
// Then run the program with 6 string arguments, the 3rd of which should // Then run the program with 6 string arguments, the 3rd of which should
// be an integer For example: // be an integer For example:
// string-helpers hello meet -125 fed brown red // string-helpers hello meet -125 fed brown red
// //
// Overview of using null-terminated arrays in Checked C: // Overview of using null-terminated arrays in Checked C:
// //
// In Checked C, pointers to null-terminated data are represented using the // In Checked C, pointers to null-terminated data are represented using the
@ -52,7 +52,7 @@
// int f(nt_array_ptr<char> p : count(n), int n) { // int f(nt_array_ptr<char> p : count(n), int n) {
// if (p[n]) { // read element at upper bound. // if (p[n]) { // read element at upper bound.
// ... // ...
// //
// If v != 0, the following code fails at runtime for an nt_array_ptr. It // If v != 0, the following code fails at runtime for an nt_array_ptr. It
// always fails for an array_ptr. // always fails for an array_ptr.
// //
@ -67,13 +67,13 @@
// int set_zero(nt_array_ptr<char> p : count(n), int n) { // int set_zero(nt_array_ptr<char> p : count(n), int n) {
// p[n] = 0; // allowed for nt_array_ptr. Runtime error for array_ptr! // p[n] = 0; // allowed for nt_array_ptr. Runtime error for array_ptr!
// } // }
// //
// The following example illustrates how bounds differ from // The following example illustrates how bounds differ from
// the count: // the count:
// //
// nt_array_ptr<char> : count(5) = "hello"; // nt_array_ptr<char> : count(5) = "hello";
// //
// The string "hello" contains 6 characters, the last of which is // The string "hello" contains 6 characters, the last of which is
// a null terminator. The valid bounds is count(5). // a null terminator. The valid bounds is count(5).
// //
// //
@ -84,7 +84,7 @@
// 1. Nt_array_ptrs with no bounds declared have a default bounds of count(0). // 1. Nt_array_ptrs with no bounds declared have a default bounds of count(0).
// 2. If you are using array subscripting to access the nt_array_ptr, // 2. If you are using array subscripting to access the nt_array_ptr,
// you'll need to widen the bounds as you determine the last character // you'll need to widen the bounds as you determine the last character
// is not a null terminator. This is typically done by // is not a null terminator. This is typically done by
// declaring bounds involving a separate counter: // declaring bounds involving a separate counter:
// array_ptr<char> p : count(0); // array_ptr<char> p : count(0);
// int i = 0; // int i = 0;
@ -108,7 +108,7 @@
// for ( ; arr[i] !=0; i++) { // for ( ; arr[i] !=0; i++) {
// nt_array_ptr<char> : count(i+1) tmp = p; // nt_array_ptr<char> : count(i+1) tmp = p;
// tmp[i] = ... // tmp[i] = ...
// //
// The function my_strlen show points 1 and 2. It is less // The function my_strlen show points 1 and 2. It is less
// frequent for string functions to modify strings. The function squeeze, // frequent for string functions to modify strings. The function squeeze,
// which does do that, shows point 3. // which does do that, shows point 3.
@ -136,7 +136,7 @@ checked int my_strlen(nt_array_ptr<char> p) {
} }
// Delete all c from p (adapted from p. 47, K&R 2nd Edition) // Delete all c from p (adapted from p. 47, K&R 2nd Edition)
// p implicltly has count(0). // p implicitly has count(0).
checked void squeeze(nt_array_ptr<char> p, char c) { checked void squeeze(nt_array_ptr<char> p, char c) {
int i = 0, j = 0; int i = 0, j = 0;
// Create a temporary whose count of elements can // Create a temporary whose count of elements can

Просмотреть файл

@ -3,13 +3,13 @@
\chapter{Extensions to existing C concepts} \chapter{Extensions to existing C concepts}
\label{chapter:core-extensions} \label{chapter:core-extensions}
This chapter describes extensions to existing C concepts. This chapter describes extensions to existing C concepts.
It covers new kinds of pointer types and array types, their meaning, It covers new kinds of pointer types and array types, their meaning,
and the operations on them. It introduces two and the operations on them. It introduces two
new program scopes: \lstinline+checked+ and \lstinline+unchecked+ new program scopes: \lstinline+checked+ and \lstinline+unchecked+
blocks. It covers a generalization of assertions to blocks. It covers a generalization of assertions to
dynamic checks that are never removed (unless a compiler dynamic checks that are never removed (unless a compiler
proves them redundant) because bounds safety depends upon them. proves them redundant) because bounds safety depends upon them.
Finally, it describes changes to Finally, it describes changes to
undefined behavior needed for bounds safety. undefined behavior needed for bounds safety.
@ -22,7 +22,7 @@ capital letter are reserved for system use, as are identifiers that
begin with two underscores \cite[Section 7.1.3]{ISO2011}. The following begin with two underscores \cite[Section 7.1.3]{ISO2011}. The following
new keywords are introduced: new keywords are introduced:
\begin{lstlisting} \begin{lstlisting}
_Array_ptr _Checked _Dynamic_check _For_any _Itype_for_any _Nt_array_ptr _Nt_checked _Array_ptr _Checked _Dynamic_check _For_any _Itype_for_any _Nt_array_ptr _Nt_checked
_Opaque _Ptr _Reveal _Where _Unchecked _Opaque _Ptr _Reveal _Where _Unchecked
\end{lstlisting} \end{lstlisting}
@ -38,7 +38,7 @@ the implementations of standard header files need to be modified to
not use identifiers that conflict with these keywords. not use identifiers that conflict with these keywords.
The all-lowercase versions of the keywords are: The all-lowercase versions of the keywords are:
\begin{lstlisting} \begin{lstlisting}
array_ptr checked dynamic_check for_any nt_array_ptr nt_checked array_ptr checked dynamic_check for_any nt_array_ptr nt_checked
opaque ptr reveal where unchecked opaque ptr reveal where unchecked
\end{lstlisting} \end{lstlisting}
@ -87,8 +87,8 @@ have an error or warning mode that flags unexpected uses of `\code{*}'.
The same syntax as C++ template instantiations is used for building The same syntax as C++ template instantiations is used for building
instances of these types because this syntax is well-known and instances of these types because this syntax is well-known and
understood. The new pointer types are added to the syntax for {\it type specifiers} understood. The new pointer types are added to the syntax for {\it type specifiers}
\cite[Section 6.7.2]{ISO2011}. The parameters to these type constructors \cite[Section 6.7.2]{ISO2011}. The parameters to these type constructors
must be types, which are described syntactically by {\it type names} must be types, which are described syntactically by {\it type names}
\cite[Section 6.7.7]{ISO2011}. If Checked C is extended to C++, \cite[Section 6.7.7]{ISO2011}. If Checked C is extended to C++,
in the C++ extension these new types will be template types that have special meaning. in the C++ extension these new types will be template types that have special meaning.
@ -125,10 +125,10 @@ int *const p = &x;
const ptr<int> q = &x; const ptr<int> q = &x;
const array_ptr<int> r = &x; const array_ptr<int> r = &x;
\end{lstlisting} \end{lstlisting}
In this case, the pointers cannot be modified after they are defined, but In this case, the pointers cannot be modified after they are defined, but
the integer that they point to can be modified. The checked pointer declarations the integer that they point to can be modified. The checked pointer declarations
have simpler syntax than the unchecked pointer declaration, where \keyword{const} have simpler syntax than the unchecked pointer declaration, where \keyword{const}
must be placed after the \lstinline+*+. The checked pointer declarations are all analogous must be placed after the \lstinline+*+. The checked pointer declarations are all analogous
to \lstinline+const double y = 5.0;+ declaring a variable \lstinline+y+ that is \keyword{const}. to \lstinline+const double y = 5.0;+ declaring a variable \lstinline+y+ that is \keyword{const}.
In the declaration of \keyword{y}, \keyword{double} specifies a type. In the declaration of \keyword{y}, \keyword{double} specifies a type.
The \keyword{const} keyword is placed before it to declare that The \keyword{const} keyword is placed before it to declare that
@ -137,7 +137,7 @@ and \arrayptrint\ all specify types too.
The checked pointer types follow the same rules that apply to modifiers for The checked pointer types follow the same rules that apply to modifiers for
unchecked pointer types. For example, a variable that is a pointer to a unchecked pointer types. For example, a variable that is a pointer to a
\keyword{const} value can be assigned a pointer to a non-const value. \keyword{const} value can be assigned a pointer to a non-const value.
The reverse is not allowed. The reverse is not allowed.
Array pointers (\arrayptr) of function types are not allowed. Functions have Array pointers (\arrayptr) of function types are not allowed. Functions have
@ -166,7 +166,7 @@ int a checked[10];
All array references to checked array types are bounds checked. C has All array references to checked array types are bounds checked. C has
the rule that an ``array of \var{T}'' is converted implicitly to a the rule that an ``array of \var{T}'' is converted implicitly to a
``pointer to \var{T}'' in a number of situations. This rule is extended ``pointer to \var{T}'' in a number of situations. This rule is extended
to convert a ``checked array of \var{T}'' to an ``\arrayptr\ to convert a ``checked array of \var{T}'' to an ``\arrayptr\
to \var{T}''. to \var{T}''.
In C, array types may be complete or incomplete. A complete array type In C, array types may be complete or incomplete. A complete array type
@ -186,7 +186,7 @@ that \lstinline{i >= 0} and \lstinline{i < 10}.
Array references to multi-dimensional arrays must be uniformly bounds Array references to multi-dimensional arrays must be uniformly bounds
checked or not bounds checked. If any dimension is bounds checked, all checked or not bounds checked. If any dimension is bounds checked, all
dimensions must be checked. A programmer can simply declare that dimensions must be checked. A programmer can simply declare that
the outer dimension of an array is checked. The checked property will be the outer dimension of an array is checked. The checked property will be
propagated to the inner dimensions: propagated to the inner dimensions:
\begin{lstlisting} \begin{lstlisting}
@ -258,9 +258,9 @@ can be used to declare multi-dimensional arrays:
typedef int arr_ty[10]; typedef int arr_ty[10];
arr_ty x[10]; arr_ty x[10];
\end{lstlisting} \end{lstlisting}
The checked property is propagated only to array declarators that are The checked property is propagated only to array declarators that are
nested directly within other array declarators. It is not propagated to nested directly within other array declarators. It is not propagated to
the bodies of type definitions. It is an error if an array type and a nested the bodies of type definitions. It is an error if an array type and a nested
array type from the use of a type name have different checked properties. array type from the use of a type name have different checked properties.
Here are examples of correct and incorrect declarations: Here are examples of correct and incorrect declarations:
@ -282,9 +282,9 @@ in memory that ends with a null terminator value. We add a new type
to represent pointers to these kinds of arrays. We divide to represent pointers to these kinds of arrays. We divide
these arrays into two parts: a prefix with bounds and these arrays into two parts: a prefix with bounds and
a sequence of additional elements that ends a sequence of additional elements that ends
with a null terminator. The initial elements of the with a null terminator. The initial elements of the
sequence can be read, provided that preceding elements sequence can be read, provided that preceding elements
are not the null terminator. The bounds can be are not the null terminator. The bounds can be
{\em widened} based on the number of elements read. {\em widened} based on the number of elements read.
A programmer may declare bounds for an \ntarrayptr\ variable. If bounds A programmer may declare bounds for an \ntarrayptr\ variable. If bounds
@ -293,12 +293,12 @@ at runtime.
If no bounds are declared, the bounds are inferred. At variable declarations, If no bounds are declared, the bounds are inferred. At variable declarations,
the declared bounds are for arrays with a prefix of 0 elements. the declared bounds are for arrays with a prefix of 0 elements.
At variable uses, At variable uses,
the bounds are determined using a program analysis that widens the bounds are determined using a program analysis that widens
bounds based on control-flow. bounds based on control-flow.
The bounds are then used to check memory accesses. The bounds are then used to check memory accesses.
A problem with using an analysis to determine bounds A problem with using an analysis to determine bounds
is that the bounds are not written down in the program. is that the bounds are not written down in the program.
This may lead to unexpected failures if the bounds are narrower This may lead to unexpected failures if the bounds are narrower
than expected and it may make it difficult to understand than expected and it may make it difficult to understand
failures. failures.
@ -314,10 +314,10 @@ if (*p == 'a')
if (*(p + 1) == 'b') if (*(p + 1) == 'b')
if (*(p + 2) == 'c') if (*(p + 2) == 'c')
\end{lstlisting} \end{lstlisting}
The element type \var{T} of an \ntarrayptrT\ must be The element type \var{T} of an \ntarrayptrT\ must be
an integral or pointer type. \ntarrayptrT\ extends \arrayptrT: the an integral or pointer type. \ntarrayptrT\ extends \arrayptrT: the
prefix with bounds is treated the same as if an \arrayptrT\ pointed prefix with bounds is treated the same as if an \arrayptrT\ pointed
to it. An \ntarrayptrT\ value can be converted to an \arrayptrT\ value to it. An \ntarrayptrT\ value can be converted to an \arrayptrT\ value
with the same bounds. with the same bounds.
@ -400,11 +400,11 @@ The following operations involving pointer-typed values are allowed:
inequality. The pointers do not have to be the same kind of pointer. inequality. The pointers do not have to be the same kind of pointer.
To support reasoning about program behavior, the result of comparing To support reasoning about program behavior, the result of comparing
pointers to different objects must be defined. Checked pointers can also pointers to different objects must be defined. Checked pointers can also
be compared for equality or inequality with \code{0} and \void\ pointers, be compared for equality or inequality with \code{0} and \void\ pointers,
just like unchecked pointers. just like unchecked pointers.
\item \item
Pointers to objects of the same type can be compared relationally. Relational comparisons are the Pointers to objects of the same type can be compared relationally. Relational comparisons are the
\lstinline|<|, \lstinline|<=|, \lstinline|>|, \lstinline|>=| operators. The pointers do not have \lstinline|<|, \lstinline|<=|, \lstinline|>|, \lstinline|>=| operators. The pointers do not have
to be to be
the same kind of pointer. For example, an \uncheckedptrT\ can be compared with an the same kind of pointer. For example, an \uncheckedptrT\ can be compared with an
\arrayptrT . To support bounds checking and reasoning about program behavior, the \arrayptrT . To support bounds checking and reasoning about program behavior, the
@ -480,7 +480,7 @@ Programmers must initialize these variables explicitly using C's initializer syn
when they are declared. when they are declared.
Initializing an automatic variable with a checked pointer type is simple: a programmer adds Initializing an automatic variable with a checked pointer type is simple: a programmer adds
\lstinline+= 0+ or \lstinline+x = NULL+ to the variable declarator. C's initializer syntax \lstinline+= 0+ or \lstinline+x = NULL+ to the variable declarator. C's initializer syntax
allows compact initializers for structure variables and array variables. allows compact initializers for structure variables and array variables.
A list of initialization values is specified using a brace-enclosed list of expressions. A list of initialization values is specified using a brace-enclosed list of expressions.
The initializer list can be partial list that describes The initializer list can be partial list that describes
@ -526,13 +526,13 @@ scopes, the declared types of variables are allowed to be or use
checked pointer types or checked array types; unchecked pointer types checked pointer types or checked array types; unchecked pointer types
and unchecked array types are not allowed. Similarly, for functions, and unchecked array types are not allowed. Similarly, for functions,
return types and parameter types are allowed to be or use checked pointer return types and parameter types are allowed to be or use checked pointer
types or checked array types. types or checked array types.
On the other hand, declarations in checked scopes can use unchecked pointer On the other hand, declarations in checked scopes can use unchecked pointer
types and unchecked array types, provided that the declarations provide a types and unchecked array types, provided that the declarations provide a
bounds-safe interface. These are described in bounds-safe interface. These are described in
Section~\ref{section:function-bounds-safe-interfaces}. Section~\ref{section:function-bounds-safe-interfaces}.
Variables and functions used in checked scopes are Variables and functions used in checked scopes are
allowed to have or use checked pointer types, checked array types, or allowed to have or use checked pointer types, checked array types, or
unchecked pointer types and unchecked array types with bounds declarations. unchecked pointer types and unchecked array types with bounds declarations.
@ -551,7 +551,7 @@ existing C code.
A checked block is introduced by placing \keyword{checked} before a A checked block is introduced by placing \keyword{checked} before a
compound block: compound block:
\begin{lstlisting} \begin{lstlisting}
checked checked
{ {
int a = 5; int a = 5;
ptr<int> pa = &a; ptr<int> pa = &a;
@ -573,25 +573,25 @@ checked keyword can be used. This example shows the use of an unchecked
block: block:
\begin{lstlisting} \begin{lstlisting}
checked checked
{ {
int a = 5; int a = 5;
unchecked unchecked
{ {
int *upa = &a; int *upa = &a;
int b[5][5]; int b[5][5];
for (int i = 0; i < 5; i++) { for (int i = 0; i < 5; i++) {
for (int j = 0; j < 5; j++) { for (int j = 0; j < 5; j++) {
// not bounds checked // not bounds checked
b[i][j] = -1; b[i][j] = -1;
} }
} }
} }
... ...
} }
\end{lstlisting} \end{lstlisting}
In a checked function definition, the body of the function is a In a checked function definition, the body of the function is a
checked block. A checked function definition is declared by placing the checked block. A checked function definition is declared by placing the
\keyword{checked} keyword before the definition. Here are examples of checked and \keyword{checked} keyword before the definition. Here are examples of checked and
unchecked function definitions: unchecked function definitions:
@ -599,7 +599,7 @@ unchecked function definitions:
\begin{lstlisting} \begin{lstlisting}
// checked at the function level: no unchecked pointers can appear in // checked at the function level: no unchecked pointers can appear in
// argument types, the return type, or the body of the function. // argument types, the return type, or the body of the function.
checked int f(ptr<int> p) checked int f(ptr<int> p)
{ {
int a = 5; int a = 5;
ptr<int> pa = &a; ptr<int> pa = &a;
@ -635,10 +635,10 @@ referred to as K\&R-style function declarations because this is the only way
functions were declared in the first edition of the C Programming Language book functions were declared in the first edition of the C Programming Language book
by Brian Kernigan and Dennis Ritchie. The second edition incorporated changes from the by Brian Kernigan and Dennis Ritchie. The second edition incorporated changes from the
C ANSI Standard and introduced function prototypes.}, where the types C ANSI Standard and introduced function prototypes.}, where the types
of the arguments to functions are not specified . These of the arguments to functions are not specified. These
functions are dangerous to use because there can be mismatches functions are dangerous to use because there can be mismatches
between argument types and parameter types at function between argument types and parameter types at function
calls. This can corrupt data or the call stack. In checked scopes, calls. This can corrupt data or the call stack. In checked scopes,
the use or declaration of functions without prototypes is not allowed. the use or declaration of functions without prototypes is not allowed.
As we add different notions of checking to Checked C, we will use the As we add different notions of checking to Checked C, we will use the
@ -656,7 +656,7 @@ It consists of a parenthesized type name followed by list of initializers in bra
A compound literal has static storage if it is at the top level of a file and automatic A compound literal has static storage if it is at the top level of a file and automatic
storage if it occurs within a block. Here are examples of compound literals: storage if it occurs within a block. Here are examples of compound literals:
\begin{lstlisting} \begin{lstlisting}
int *arr = (int []) { 0, 1, 2}; int *arr = (int []) { 0, 1, 2 };
struct Point { struct Point {
int x; int x;
int y; int y;
@ -714,7 +714,7 @@ checked void f(void) {
// checking to nt_array_ptr<char>. // checking to nt_array_ptr<char>.
nt_array_ptr<char> p = "hello"; nt_array_ptr<char> p = "hello";
// "goodbye" has type char nt_checked[8]. The type is altered during // "goodbye" has type char nt_checked[8]. The type is altered during
// type checking to the pointer type array_ptr. // type checking to the pointer type array_ptr.
array_ptr<char> r = "goodbye"; array_ptr<char> r = "goodbye";
@ -803,11 +803,11 @@ struct inner_array {
typedef struct inner_array ragged_2d_arr[]; typedef struct inner_array ragged_2d_arr[];
ragged_2d_arr data = ragged_2d_arr data =
{ { 3, (double checked[]) { 1.0, 0.0, 0.0 } }, { { 3, (double checked[]) { 1.0, 0.0, 0.0 } },
{ 1, (double checked[]) { 5.0 } }, { 1, (double checked[]) { 5.0 } },
{ 0, 0 }, { 0, 0 },
{ 2, (double checked[]) { 6.0, 7.0 } } { 2, (double checked[]) { 6.0, 7.0 } }
}; };
struct BufferPair { struct BufferPair {
@ -841,7 +841,7 @@ nt_array_ptr<char> more_colors nt_checked[] = { "white", "black",
// A null-terminated array of pointers to null-terminated arrays // A null-terminated array of pointers to null-terminated arrays
// of strings. // of strings.
nt_array_ptr<nt_array_ptr<char>> sentences nt_checked[4] = nt_array_ptr<nt_array_ptr<char>> sentences nt_checked[4] =
{ (nt_array_ptr<char> nt_checked[]) { "this", "is", "the", "first", { (nt_array_ptr<char> nt_checked[]) { "this", "is", "the", "first",
"sentence", 0 }, "sentence", 0 },
(nt_array_ptr<char> nt_checked[]) { "here", "is", "another", 0 }, (nt_array_ptr<char> nt_checked[]) { "here", "is", "another", 0 },
@ -869,16 +869,16 @@ The type to which a pointer and its bounds are relatively aligned is
called the relative alignment type. By default, the relative alignment called the relative alignment type. By default, the relative alignment
type for a pointer and its bounds is the referent type. However, the type for a pointer and its bounds is the referent type. However, the
relative alignment type can be overridden by specifying it explicitly as relative alignment type can be overridden by specifying it explicitly as
part of the bounds. This is described in part of the bounds. This is described in
Section~\ref{section:representing-relative-alignment}. Section~\ref{section:representing-relative-alignment}.
This can be used for bounds for the results of pointers casts and This can be used for bounds for the results of pointers casts and
for \arrayptrvoid\ pointers. The type for \arrayptrvoid\ pointers. The type
\void\ has no defined size, so the default relative alignment is \void\ has no defined size, so the default relative alignment is
undefined for \void. undefined for \void.
We considered removing the entire concept of relative alignment from the We considered removing the entire concept of relative alignment from the
design to simplify the design. We decided against that because it would design to simplify the design. We decided against that because it would
increase the cost of bounds checking throughout the program. increase the cost of bounds checking throughout the program.
Section~\ref{section:design-alternatives:always-unaligned} explains Section~\ref{section:design-alternatives:always-unaligned} explains
this choice in more detail. this choice in more detail.
@ -1036,7 +1036,7 @@ pointer arithmetic.
To support this expansion, integer arithmetic operators are extended To support this expansion, integer arithmetic operators are extended
with the operators \plusovf, \minusovf, and \mulovf. The with the operators \plusovf, \minusovf, and \mulovf. The
operators interpet pointers as unsigned integers in some range 0 to operators interpret pointers as unsigned integers in some range 0 to
\code{UINTPTR_MAX}. An operator produces a runtime error if the value \code{UINTPTR_MAX}. An operator produces a runtime error if the value
of its result cannot be represented by the result type: of its result cannot be represented by the result type:
@ -1044,7 +1044,7 @@ of its result cannot be represented by the result type:
\item \item
\plusovf\ takes an unsigned integer \var{i} and an \plusovf\ takes an unsigned integer \var{i} and an
integer \var{j} and produces an unsigned integer in the range 0 to integer \var{j} and produces an unsigned integer in the range 0 to
\code{UINTPTR_MAX}. Its result is the mathemetical value \var{i} + \var{j}. \code{UINTPTR_MAX}. Its result is the mathematical value \var{i} + \var{j}.
\item \item
For subtraction, there are two forms: For subtraction, there are two forms:
@ -1057,7 +1057,7 @@ of its result cannot be represented by the result type:
\item \item
\lstinline|-|\textsubscript{ovf\_diff } takes two unsigned integers \var{i} \lstinline|-|\textsubscript{ovf\_diff } takes two unsigned integers \var{i}
and \var{j} and computes \var{i} - \var{j}, producing a signed integer of type and \var{j} and computes \var{i} - \var{j}, producing a signed integer of type
\code{ptrdiff_t}. Its result is the mathemetical value \var{i} - \var{j}. \code{ptrdiff_t}. Its result is the mathematical value \var{i} - \var{j}.
\end{itemize} \end{itemize}
\item \item
\mulovf\ takes two integers \var{i} and \var{j} (both either \mulovf\ takes two integers \var{i} and \var{j} (both either
@ -1080,7 +1080,7 @@ signed or unsigned integer as \code{e2}.
If \code{e2} is a signed integer, the expansion of \code{e1 + e2} If \code{e2} is a signed integer, the expansion of \code{e1 + e2}
must cast \sizeof{\var{T}} to a signed integer. We introduce a must cast \sizeof{\var{T}} to a signed integer. We introduce a
signed integer type \code{signed_size_t} that is large enough for signed integer type \code{signed_size_t} that is large enough for
this cast. \code{e1 + e2} expands to \code{e1} \plusovf\ this cast. \code{e1 + e2} expands to \code{e1} \plusovf\
\code{((signed_size_t)} \sizeof{\var{T}}\code{)} \mulovf\ \code{e2}. This cast is \code{((signed_size_t)} \sizeof{\var{T}}\code{)} \mulovf\ \code{e2}. This cast is
necessary because in C, multiplying a signed integer by an unsigned necessary because in C, multiplying a signed integer by an unsigned
integer implicitly converts the signed integer to be an unsigned integer implicitly converts the signed integer to be an unsigned
@ -1110,7 +1110,7 @@ Otherwise, in a typical C environment, the program would fault at
\lstinline+*p+. In the code \lstinline|if (p < hi) { p += 1; }| \lstinline+*p+. In the code \lstinline|if (p < hi) { p += 1; }|
the check \lstinline+p < hi+ implies that the increment cannot the check \lstinline+p < hi+ implies that the increment cannot
overflow. The checks are also inexpensive on superscalar processors: overflow. The checks are also inexpensive on superscalar processors:
they can be placed so that they are statically predicted by they can be placed so that they are statically predicted by
processors to never fail. processors to never fail.
\section{Programmer-inserted dynamic checks} \section{Programmer-inserted dynamic checks}
@ -1215,7 +1215,7 @@ void decode(array_ptr<char> output_buffer, array_ptr<char> input_buffer,
|\hl{\textit{// need check that dst and src have at least len bytes of space}}| |\hl{\textit{// need check that dst and src have at least len bytes of space}}|
memcpy(dst, src, len); memcpy(dst, src, len);
src += len; src += len;
dst += len; dst += len;
break; break;
} }
case COMPRESSED_INT64: { case COMPRESSED_INT64: {
@ -1273,7 +1273,7 @@ so there is no way to test the path.
\end{lstlisting} \end{lstlisting}
The problems with requiring functions that validate buffer lengths to The problems with requiring functions that validate buffer lengths to
return status codes for errors are analyzed by O'Donell and Sebor\cite{ODonell2015}. return status codes for errors are analyzed by O'Donell and Sebor\cite{ODonell2015}.
Annex K of the C Standard \cite{ISO2011} introduced a new set of standard library functions to replace Annex K of the C Standard \cite{ISO2011} introduced a new set of standard library functions to replace
functions that provide no way to validate their arguments. These functions that provide no way to validate their arguments. These
functions return status codes to indicate success or failure. A classic functions return status codes to indicate success or failure. A classic
@ -1281,7 +1281,7 @@ example of a function prone to misuse is
\lstinline+strcpy(char *dst, const char *src)+. \lstinline+strcpy(char *dst, const char *src)+.
It copies all bytes in \lstinline+src+ to \lstinline+dst+ until It copies all bytes in \lstinline+src+ to \lstinline+dst+ until
it hits a null byte. If \lstinline+src+ is missing the null byte or it hits a null byte. If \lstinline+src+ is missing the null byte or
\lstinline+dst+ is too small, this causes a buffer overrunn. The new \lstinline+dst+ is too small, this causes a buffer overrun. The new
function \lstinline+strcpy_s+ takes an additional size parameter for function \lstinline+strcpy_s+ takes an additional size parameter for
\lstinline+dst+ and has the signature \lstinline+dst+ and has the signature
\lstinline+errno_t strcpy_s(char *dst, size_t dest_len, const char *src)+. \lstinline+errno_t strcpy_s(char *dst, size_t dest_len, const char *src)+.
@ -1305,7 +1305,7 @@ void decode(array_ptr<char> output_buffer, array_ptr<char> input_buffer,
|\hl{\textit{dynamic\_check(dst + len < dst\_bound \&\& src + len < src\_bound);}}| |\hl{\textit{dynamic\_check(dst + len < dst\_bound \&\& src + len < src\_bound);}}|
memcpy(dst, src, len); memcpy(dst, src, len);
src += len; src += len;
dst += len; dst += len;
break; break;
} }
... ...
@ -1321,15 +1321,15 @@ behavior is worse.
The following example uses \lstinline+dynamic_check+ to eliminate bounds The following example uses \lstinline+dynamic_check+ to eliminate bounds
checks in a loop. It is based on experience hand-optimizing C\# and Java checks in a loop. It is based on experience hand-optimizing C\# and Java
programs. This kind of example is typically found during a performance programs. This kind of example is typically found during a performance
tuning phase of program development. In the example, tuning phase of program development. In the example,
the \lstinline|: count(|\var{exp}\lstinline|)| notation indicates that \var{exp} is the the \lstinline|: count(|\var{exp}\lstinline|)| notation indicates that \var{exp} is the
length of the buffer. length of the buffer.
\begin{lstlisting} \begin{lstlisting}
void append(array_ptr<char> dst : count(dst_count), void append(array_ptr<char> dst : count(dst_count),
array_ptr<char> src : count(src_count), array_ptr<char> src : count(src_count),
size_t dst_count, size_t src_count) size_t dst_count, size_t src_count)
{ {
for (size_t i = 0; i < src_count; i++) { for (size_t i = 0; i < src_count; i++) {
if (src[i] == marker) { if (src[i] == marker) {
break; break;
@ -1382,7 +1382,7 @@ A compiler would not introduce this dynamic\_check because it would
alter the behavior of \lstinline+append+. The bounds check on \lstinline+dst+ alter the behavior of \lstinline+append+. The bounds check on \lstinline+dst+
in the original code is done only if a marker is not found in in the original code is done only if a marker is not found in
\lstinline+src+ and \lstinline+src_count > 0+. A compiler could \lstinline+src+ and \lstinline+src_count > 0+. A compiler could
try to deduce a precondition for the loop try to deduce a precondition for the loop
that prevents the bounds check from failing, but this is that prevents the bounds check from failing, but this is
not possible because the precondition depends on the contents of not possible because the precondition depends on the contents of
\lstinline+src+. A compiler would have to clone code to maintain the same \lstinline+src+. A compiler would have to clone code to maintain the same
@ -1391,7 +1391,7 @@ sort of transformation or do it sparingly. Programmer control produces
better results. Here is the code a compiler might generate with cloning. better results. Here is the code a compiler might generate with cloning.
\begin{lstlisting}[escapechar=\|] \begin{lstlisting}[escapechar=\|]
void append(array_ptr<char> dst: count(dest_count), void append(array_ptr<char> dst: count(dest_count),
array_ptr<char> src : count(src_count), array_ptr<char> src : count(src_count),
size_t dst_count, size_t src_count) size_t dst_count, size_t src_count)
{ {
@ -1493,7 +1493,7 @@ signed integer overflow:
\begin{itemize} \begin{itemize}
\item \item
Unchecked pointers shall be treated as addresses of locations in memory, Unchecked pointers shall be treated as addresses of locations in memory,
just as checked pointers are treated as addressses. The addresses shall just as checked pointers are treated as addresses. The addresses shall
be unsigned integers with a defined range of 0 to be unsigned integers with a defined range of 0 to
\code{UINTPTR_MAX}: \code{UINTPTR_MAX}:
@ -1527,7 +1527,7 @@ In the case of a runtime error, program execution cannot continue in a
way that uses the value of the expression that produced the error. way that uses the value of the expression that produced the error.
For programs with expressions and initializers with undefined meanings, For programs with expressions and initializers with undefined meanings,
those programs must be rejected at translation-time. those programs must be rejected at translation-time.
Section~\ref{section:avoiding-undefinedness} Section~\ref{section:avoiding-undefinedness}
describes this in detail. describes this in detail.

Просмотреть файл

@ -3,7 +3,7 @@
\chapter{Introduction} \chapter{Introduction}
\label{chapter:introduction} \label{chapter:introduction}
The C programming language \cite{Ritchie1988, ISO2011} allows programmers to use The C programming language \cite{Ritchie1988, ISO2011} allows programmers to use
pointers directly. A pointer is an address of a location in memory. Programs may do pointers directly. A pointer is an address of a location in memory. Programs may do
arithmetic on pointers, dereference them to read memory, or assign arithmetic on pointers, dereference them to read memory, or assign
through them to modify memory. The ability to use pointers directly through them to modify memory. The ability to use pointers directly
@ -69,7 +69,7 @@ Checked C addresses the bounds checking problem for C by:
extension to \arrayptr\ for null-terminated arrays (\ntarrayptr). extension to \arrayptr\ for null-terminated arrays (\ntarrayptr).
\item \item
For array pointer types (\arrayptr), in keeping with the low-level For array pointer types (\arrayptr), in keeping with the low-level
nature of C, bounds checking is placed under programmer control. nature of C, bounds checking is placed under programmer control.
This differs from languages like This differs from languages like
Java, where bounds checking is completely automatic. A programmer Java, where bounds checking is completely automatic. A programmer
declares \emph{bounds}, where the bounds for an \arrayptr\ declares \emph{bounds}, where the bounds for an \arrayptr\
@ -174,11 +174,11 @@ This problem is addressed in two ways. First, the language rules that are
used to check the validity of bounds are limited intentionally. The rules can used to check the validity of bounds are limited intentionally. The rules can
check the validity of bounds that are needed in practice. However, there are check the validity of bounds that are needed in practice. However, there are
bounds that are true that cannot be proved using the rules. In the terminology bounds that are true that cannot be proved using the rules. In the terminology
of program logics, the rules are incomplete. As an example, the of program logics, the rules are incomplete. As an example, the
rules about distributivity limit the size of rules about distributivity limit the size of
expressions that they produce. In practice, this means that expressions that they produce. In practice, this means that
simple disjunctive bounds can be checked, but complex disjunctive bounds cannot be checked. simple disjunctive bounds can be checked, but complex disjunctive bounds cannot be checked.
Second, the inference that compilers do for checking program invariants is Second, the inference that compilers do for checking program invariants is
limited. Compilers act as \emph{checkers} for invariants. They check that limited. Compilers act as \emph{checkers} for invariants. They check that
declared invariants follow from other declared invariants, the declared invariants follow from other declared invariants, the
@ -202,25 +202,25 @@ scope of this technical report. For now, it is assumed that programs are
correct in these other aspects. correct in these other aspects.
This design is being done in an iterative fashion. To validate the This design is being done in an iterative fashion. To validate the
design, we mocked up modifying a subset of the OpenSSL design, we mocked up modifying a subset of the OpenSSL
code base \cite{OpenSSL2015} to be bounds-safe . code base \cite{OpenSSL2015} to be bounds-safe .
We created C++ templates for the new pointer types and modified OpenSSL to We created C++ templates for the new pointer types and modified OpenSSL to
compile as valid C++ code. compile as valid C++ code.
We hand-edited about 11,000 lines of the code to use checked pointer We hand-edited about 11,000 lines of the code to use checked pointer
types with full bounds annotations. We used macros to encapsulate types with full bounds annotations. We used macros to encapsulate
the bounds annotations so that they could be elided from the code the bounds annotations so that they could be elided from the code
and OpenSSL compiled and tested using the new types. We also modified the and OpenSSL compiled and tested using the new types. We also modified the
generic stack type in OpenSSL to use the \code{ptr} type, which required generic stack type in OpenSSL to use the \code{ptr} type, which required
cross-cutting changes across the code base (in all, about 160 files were changed) cross-cutting changes across the code base (in all, about 160 files were changed)
as well dealing with complicated macros. as well dealing with complicated macros.
We learned the following from this We learned the following from this
experience. First, it was important to have a compact, succinct syntax experience. First, it was important to have a compact, succinct syntax
for declaring bounds. for declaring bounds.
Second, in most cases, declaring bounds at declarations was sufficient for Second, in most cases, declaring bounds at declarations was sufficient for
tracking the bounds of variables. Large blocks of code tracking the bounds of variables. Large blocks of code
remained unchanged, which matches the observations of the remained unchanged, which matches the observations of the
Cyclone project \cite{Jim2002}, an earlier research Cyclone project \cite{Jim2002}, an earlier research
effort to create a type-safe version of C. effort to create a type-safe version of C.
Third, the expressions allowed in declaring Third, the expressions allowed in declaring
bounds needed to be rich. Fourth, pointer casts were used bounds needed to be rich. Fourth, pointer casts were used
@ -230,19 +230,19 @@ easily to be appropriate for the new referent type of the pointer.
Fifth, it was clear that there needed Fifth, it was clear that there needed
to be a graceful way of interoperating with existing libraries that could to be a graceful way of interoperating with existing libraries that could
not be changed. Finally, signed integer overflow was a pervasive not be changed. Finally, signed integer overflow was a pervasive
possibility, which raised questions about the meaning of bounds possibility, which raised questions about the meaning of bounds
declarations that used signed integers. declarations that used signed integers.
We revised the design to We revised the design to
address these issues. In particular, we paid close attention to address these issues. In particular, we paid close attention to
tracking bounds through pointer casts. We also made sure that the tracking bounds through pointer casts. We also made sure that the
contraints on signed integer expressions used in bounds expressions constraints on signed integer expressions used in bounds expressions
were understood and could be written down in the language of were understood and could be written down in the language of
simple invariants. simple invariants.
The design is a work-in-progress. Some material will be missing The design is a work-in-progress. Some material will be missing
or incomplete because the language design has not yet been done. or incomplete because the language design has not yet been done.
Readers should be aware that all parts of the document are subject to change. Readers should be aware that all parts of the document are subject to change.
We are interested in feedback and suggestions about ways to improve We are interested in feedback and suggestions about ways to improve
the design. the design.
@ -304,7 +304,7 @@ Two specific design principles are adopted based on these principles:
inventing new notations. Many systems are hybrid C/C++ systems, so inventing new notations. Many systems are hybrid C/C++ systems, so
this approach fits with the principle of clarity. It also enables this approach fits with the principle of clarity. It also enables
incremental adoption. One of the design goals of C++ has been that C incremental adoption. One of the design goals of C++ has been that C
is a subset of C++. It is a design goal to allow Checked C to be a is a subset of C++. It is a design goal to allow Checked C to be a
subset of C++ too. subset of C++ too.
\end{enumerate} \end{enumerate}
@ -314,13 +314,13 @@ Operators and symbolic characters will be shown in black (like \lstinline|->|).
variables will be shown in light blue (like \lstinline|for (int i = 0; i<10; i++)|). At times, we will variables will be shown in light blue (like \lstinline|for (int i = 0; i<10; i++)|). At times, we will
define properties or make mathematical statements that range over sets of values or program define properties or make mathematical statements that range over sets of values or program
elements, such as mathematical integers, variables, or expressions. We will use meta-variables in {\it italics} that elements, such as mathematical integers, variables, or expressions. We will use meta-variables in {\it italics} that
range over these values or program elements. We might say something like range over these values or program elements. We might say something like
``for all expressions \var{e\textsubscript{1}} and \var{e\textsubscript{2}}.'' These meta-variables ``for all expressions \var{e\textsubscript{1}} and \var{e\textsubscript{2}}.'' These meta-variables
differ from program variables, which are elements of specific C programs. differ from program variables, which are elements of specific C programs.
\section{Organization of this document} \section{Organization of this document}
The target audience for this document includes programmers interested in The target audience for this document includes programmers interested in
learning about Checked C, compiler writers, and language designers. learning about Checked C, compiler writers, and language designers.
These groups have different and conflicting needs. Programmers will find that These groups have different and conflicting needs. Programmers will find that
some sections have too much detail, while language designers will likely some sections have too much detail, while language designers will likely
@ -339,7 +339,7 @@ bounds safety even in the face of integer overflow, which is a
widespread possibility in C programs. This requires a more nuanced widespread possibility in C programs. This requires a more nuanced
requirement on the result of overflow than ``the result is undefined.'' requirement on the result of overflow than ``the result is undefined.''
Chapters~\ref{chapter:core-extensions} through Chapters~\ref{chapter:core-extensions} through
\ref{chapter:pointers-to-data-with-arrayptrs} \ref{chapter:pointers-to-data-with-arrayptrs}
present the extensions to C for declaring bounds present the extensions to C for declaring bounds
for \arrayptr\ values stored in variables and structures and for \arrayptr\ values stored in variables and structures and
@ -377,13 +377,13 @@ covers inferring effects of an expression on the bounds of variables.
This inferred information is then used to validate that declarations and This inferred information is then used to validate that declarations and
statements correctly declare bounds and maintain the bounds information. statements correctly declare bounds and maintain the bounds information.
Chapter~\ref{chapter:interoperation} covers interoperation between Chapter~\ref{chapter:interoperation} covers interoperation between
checked and unchecked code. It covers conversions between checked and checked and unchecked code. It covers conversions between checked and
unchecked pointers, as well as conversions between the new kinds of checked pointers. unchecked pointers, as well as conversions between the new kinds of checked pointers.
It pins down the notion of checked code and unchecked code. Finally, it covers It pins down the notion of checked code and unchecked code. Finally, it covers
bounds-safe interfaces in depth. bounds-safe interfaces in depth.
Chapter~\ref{chapter:structure-bounds} extends these ideas from variables to data Chapter~\ref{chapter:structure-bounds} extends these ideas from variables to data
structures by structures by
introducing member bounds, which are type-level invariants about members introducing member bounds, which are type-level invariants about members
of structure types. For now, it assumes that the programmer has done of structure types. For now, it assumes that the programmer has done
@ -395,13 +395,13 @@ uses to check programs with member bounds.
Chapter~\ref{chapter:pointers-to-data-with-arrayptrs} describes how Chapter~\ref{chapter:pointers-to-data-with-arrayptrs} describes how
to extend reasoning about bounds to pointers to data that has \arrayptr-typed to extend reasoning about bounds to pointers to data that has \arrayptr-typed
values within it. This includes pointers to structures with members that values within it. This includes pointers to structures with members that
have \arrayptr\ types and pointers directly to \arrayptr s. Pointers to have \arrayptr\ types and pointers directly to \arrayptr s. Pointers to
structures can be used easily by ensuring that modifications to members structures can be used easily by ensuring that modifications to members
preserve type-level bounds invariants. preserve type-level bounds invariants.
This follows the lead of the Deputy system \cite{Condit2007}. This follows the lead of the Deputy system \cite{Condit2007}.
Complexity arises for pointers to \arrayptr s where Complexity arises for pointers to \arrayptr s where
the \arrayptr s have bounds that depend on other variables or members. The the \arrayptr s have bounds that depend on other variables or members. The
other variables or members need to be abstracted by pointers as well to ensure other variables or members need to be abstracted by pointers as well to ensure
that invariants for bounds are not violated. Assignments via pointer that invariants for bounds are not violated. Assignments via pointer
indirections need to be coordinated to maintain the invariant. This indirections need to be coordinated to maintain the invariant. This
gives rise to constraints on the pointers and the variables or gives rise to constraints on the pointers and the variables or
@ -418,12 +418,12 @@ about whether one fact is true given a set of other facts (for example,
given x == y and the statement \code{z = y;} z == x is true after that given x == y and the statement \code{z = y;} z == x is true after that
statement). These rules and facts can be used to deduce the correctness statement). These rules and facts can be used to deduce the correctness
of bounds declarations that differ from those inferred directly by the of bounds declarations that differ from those inferred directly by the
checking described in Chapters~\ref{chapter:checking-bounds} checking described in Chapters~\ref{chapter:checking-bounds}
and~\ref{chapter:pointers-to-data-with-arrayptrs}. For example, and~\ref{chapter:pointers-to-data-with-arrayptrs}. For example,
a programmer may wish to narrow the memory that is accessible via an a programmer may wish to narrow the memory that is accessible via an
\arrayptr\ variable by declaring bounds that are a subrange of \arrayptr\ variable by declaring bounds that are a subrange of
the bounds inferred by the checking. A programmer may wish to update the the bounds inferred by the checking. A programmer may wish to update the
bounds for an \arrayptr\ variable after an assignment \code{x = y}, substituting bounds for an \arrayptr\ variable after an assignment \code{x = y}, substituting
\code{x} for \code{y}. \code{x} for \code{y}.
The same static checking that is used for bounds can be used to reason The same static checking that is used for bounds can be used to reason
@ -448,7 +448,7 @@ of the modifications or understand the practical benefits of checking.
Still, this gives some idea about the usefulness and applicability of Still, this gives some idea about the usefulness and applicability of
the design. the design.
Chapter~\ref{chapter:open-issues} summarizes the open problems uncovered by Chapter~\ref{chapter:open-issues} summarizes the open problems uncovered by
Chapter~\ref{chapter:eval} or Chapter~\ref{chapter:eval} or
unaddressed by the design. It also describes next steps for implementing unaddressed by the design. It also describes next steps for implementing
this in a C compiler. this in a C compiler.
@ -460,21 +460,21 @@ that were considered and not chosen. It explains why those alternatives were
not chosen. not chosen.
Here are the suggested roadmaps for the different groups of readers. Here are the suggested roadmaps for the different groups of readers.
For programmers interested in learning about Checked C, For programmers interested in learning about Checked C,
we suggest reading Chapter~\ref{chapter:core-extensions} except we suggest reading Chapter~\ref{chapter:core-extensions} except
Sections~\ref{section:pointer-arithmetic-errors} and \ref{section:changes-to-undefined-behavior}, Sections~\ref{section:pointer-arithmetic-errors} and \ref{section:changes-to-undefined-behavior},
Chapter~\ref{chapter:tracking-bounds} Chapter~\ref{chapter:tracking-bounds}
except Section~\ref{section:extent-of-declarations}, except Section~\ref{section:extent-of-declarations},
Section~\ref{section:inferring-expression-bounds} of Chapter~\ref{chapter:checking-bounds}, Section~\ref{section:inferring-expression-bounds} of Chapter~\ref{chapter:checking-bounds},
Chapter~\ref{chapter:interoperation} except for Section~\ref{section:checking-bounds-interfaces}, Chapter~\ref{chapter:interoperation} except for Section~\ref{section:checking-bounds-interfaces},
Chapter~\ref{chapter:structure-bounds} except for Sections~\ref{section:member-bounds-state-extent} Chapter~\ref{chapter:structure-bounds} except for Sections~\ref{section:member-bounds-state-extent}
and \ref{section:checking-bounds-with-structures}, and \ref{section:checking-bounds-with-structures},
and Chapter~\ref{chapter:simple-invariants} through the first section. and Chapter~\ref{chapter:simple-invariants} through the first section.
Sections~\ref{section:pointer-cast-results}, \ref{section:integer-overflow-informal}, Sections~\ref{section:pointer-cast-results}, \ref{section:integer-overflow-informal},
and \ref{section:pointer-integer-conversions} are advanced topics that can be read and \ref{section:pointer-integer-conversions} are advanced topics that can be read
after the other recommended chapters and sections. after the other recommended chapters and sections.
We suggest language designers and compiler writers We suggest language designers and compiler writers
read most of the document. Language designers can skip Sections~\ref{section:computing-extent}, read most of the document. Language designers can skip Sections~\ref{section:computing-extent},
\ref{section:canonicalization}, and \ref{section:canonicalization-example}. \ref{section:canonicalization}, and \ref{section:canonicalization-example}.
Compiler writers can skip the discussions in Section~\ref{section:programmer-dynamic-checks} Compiler writers can skip the discussions in Section~\ref{section:programmer-dynamic-checks}
@ -484,7 +484,7 @@ and Chapters~\ref{chapter:related-work} and \ref{chapter:design-alternatives}.
\section{Acknowledgements} \section{Acknowledgements}
This design has benefited from many discussions with Weidong Cui, Gabriel Dos Reis, This design has benefited from many discussions with Weidong Cui, Gabriel Dos Reis,
Sam Elliott, Chris Hawblitzel, Galen Hunt, Shuvendu Lahiri, and Reuben Olinsky. The design has Sam Elliott, Chris Hawblitzel, Galen Hunt, Shuvendu Lahiri, and Reuben Olinsky. The design has
benefited also from feedback from Michael Hicks, Wonsub Kim, Greg benefited also from feedback from Michael Hicks, Wonsub Kim, Greg
Morrisett, Jonghyun Park, and Andrew Ruef. We thank them for their contributions to Morrisett, Jonghyun Park, and Andrew Ruef. We thank them for their contributions to

Просмотреть файл

@ -32,7 +32,7 @@ the form:
\end{tabbing} \end{tabbing}
where \var{x} is a variable. Additional forms for handling where \var{x} is a variable. Additional forms for handling
\void\ pointers and casts are described in \void\ pointers and casts are described in
Sections~\ref{section:pointers-to-void} and \ref{section:pointer-cast-results}. Sections~\ref{section:pointers-to-void} and \ref{section:pointer-cast-results}.
A bounds expression describes the memory that can be accessed using the A bounds expression describes the memory that can be accessed using the
@ -67,7 +67,7 @@ include counts, pairs that specify an upper bound and lower bound,
Bounds expression use ``non-modifying'' expressions. These are a subset Bounds expression use ``non-modifying'' expressions. These are a subset
of C expressions that do not modify variables or memory. They include of C expressions that do not modify variables or memory. They include
local variables, parameters, constant expressions, casts, and operators local variables, parameters, constant expressions, casts, and operators
such as addition, subtraction, and address-of. such as addition, subtraction, and address-of.
Section~\ref{section:non-modifying-expressions} describes Section~\ref{section:non-modifying-expressions} describes
non-modifying expressions in detail. non-modifying expressions in detail.
@ -80,8 +80,8 @@ type of \var{e1} is a character, a short integer, a bit field, or an enumeration
the expression is promoted to an \keyword{int} type if that is large the expression is promoted to an \keyword{int} type if that is large
enough to hold all values of the type or an \keyword{unsigned int} type otherwise.} enough to hold all values of the type or an \keyword{unsigned int} type otherwise.}
For \boundsdecl{\var{x}}{\bounds{\var{e1}}{\var{e2}}}, \var{e1} and For \boundsdecl{\var{x}}{\bounds{\var{e1}}{\var{e2}}}, \var{e1} and
\var{e2} must be pointers to the same type. Typically \var{x} is also \var{e2} must be pointers to the same type. Typically \var{x} is also
a pointer to that type or an array of that type. a pointer to that type or an array of that type.
However, \var{x} can be a pointer to or an array of a different type. However, \var{x} can be a pointer to or an array of a different type.
This is useful for describing the results of casts and This is useful for describing the results of casts and
bounds for \arrayptrvoid\ pointers. bounds for \arrayptrvoid\ pointers.
@ -140,7 +140,7 @@ have been derived via a sequence of operations from a pointer to some
object \var{obj} with \bounds{\var{low}}{\var{high}}. object \var{obj} with \bounds{\var{low}}{\var{high}}.
The following statement will be true at runtime: The following statement will be true at runtime:
\var{ev} \lstinline@== 0 || (@\var{low} \lstinline|<=| \var{lbv} \lstinline|&&| \var{ev} \lstinline@== 0 || (@\var{low} \lstinline|<=| \var{lbv} \lstinline|&&|
\var{ubv} \lstinline|<=| \var{high}\lstinline|)|. In other words, \var{ubv} \lstinline|<=| \var{high}\lstinline|)|. In other words,
if \var{ev} is null, the bounds if \var{ev} is null, the bounds
may or may not be valid. If \var{ev} is non-null, the bounds must be may or may not be valid. If \var{ev} is non-null, the bounds must be
valid. This implies that any access to memory where \var{ev} valid. This implies that any access to memory where \var{ev}
@ -155,10 +155,10 @@ acceptable to use the address of an address-taken variable in a bounds
declaration. This is provided that the resulting address is not used to declaration. This is provided that the resulting address is not used to
access memory in the bounds declaration (that is, indirectly access the access memory in the bounds declaration (that is, indirectly access the
value of a variable). For example, given the declaration \code{int x}, value of a variable). For example, given the declaration \code{int x},
the expression \code{&x} may appear in a bounds expression. the expression \code{&x} may appear in a bounds expression.
Chapter~\ref{chapter:pointers-to-data-with-arrayptrs} extends the Chapter~\ref{chapter:pointers-to-data-with-arrayptrs} extends the
design to avoid these restrictions. It covers pointers to data with design to avoid these restrictions. It covers pointers to data with
\arrayptr\ values, pointers to variables used in bounds, and bounds \arrayptr\ values, pointers to variables used in bounds, and bounds
that use pointers. that use pointers.
\section{Using bounds declarations} \section{Using bounds declarations}
@ -169,7 +169,7 @@ by following the declarator with \code{:} \var{bounds-exp}. In that
case, the bounds expression applies to the variable that is the subject case, the bounds expression applies to the variable that is the subject
of the declarator. By making the bounds be part of the program, this of the declarator. By making the bounds be part of the program, this
preserves the control and efficiency of C. The bounds declarations will preserves the control and efficiency of C. The bounds declarations will
be checked statically for correctness using rules described in be checked statically for correctness using rules described in
Chapter~\ref{chapter:checking-bounds}. Chapter~\ref{chapter:checking-bounds}.
\subsection{Bounds declarations at variable declarations} \subsection{Bounds declarations at variable declarations}
@ -182,13 +182,13 @@ between \code{start} and \code{end}, where the integer stored at
\begin{lstlisting} \begin{lstlisting}
int sum(array_ptr<int> start : bounds(start, end), array_ptr<int> end) int sum(array_ptr<int> start : bounds(start, end), array_ptr<int> end)
{ {
int result = 0; int result = 0;
array_ptr<int> current : bounds(start, end) = start; array_ptr<int> current : bounds(start, end) = start;
while (current < end) { while (current < end) {
result += *current++; // *current is bounds-checked. The checking ensures result += *current++; // *current is bounds-checked. The checking ensures
// that current is within the bounds of (start, end) // that current is within the bounds of (start, end)
// at the memory access. // at the memory access.
} }
return result; return result;
} }
@ -198,13 +198,13 @@ This can be written using \keyword{where} clauses as well, at the
inconvenience of typing variable names twice in declarations: inconvenience of typing variable names twice in declarations:
\begin{lstlisting} \begin{lstlisting}
int sum(array_ptr<int> start where start : bounds(start, end), int sum(array_ptr<int> start where start : bounds(start, end),
array_ptr<int> end) array_ptr<int> end)
{ {
int result = 0; int result = 0;
array_ptr<int> current where current : bounds(start, end) = start; array_ptr<int> current where current : bounds(start, end) = start;
while (current < end) { while (current < end) {
result += *current++; result += *current++;
} }
return result; return result;
} }
@ -249,12 +249,12 @@ int bad_find(int key, array_ptr<int> a : count(len), int len)
This function adds two arrays of 2x2 arrays. This function adds two arrays of 2x2 arrays.
\begin{lstlisting} \begin{lstlisting}
int add(int a checked[][2][2] : count(len), int b checked[][2][2] : count(len), int add(int a checked[][2][2] : count(len), int b checked[][2][2] : count(len),
int len) int len)
{ {
for (int i = 0; i < len; i++) { for (int i = 0; i < len; i++) {
// All array accesses are bounds checked // All array accesses are bounds checked
a[i][0][0] += b[i][0][0]; a[i][0][0] += b[i][0][0];
a[i][0][1] += b[i][0][1]; a[i][0][1] += b[i][0][1];
a[i][1][0] += b[i][1][0]; a[i][1][0] += b[i][1][0];
a[i][1][1] += b[i][1][1]; a[i][1][1] += b[i][1][1];
@ -282,9 +282,9 @@ int sum(void)
This is a declaration of a function that copies bytes provided that the This is a declaration of a function that copies bytes provided that the
pointers and lengths are aligned: pointers and lengths are aligned:
\begin{lstlisting} \begin{lstlisting}
int aligned_memcpy(array<char> dst where dst : count(len) && int aligned_memcpy(array<char> dst where dst : count(len) &&
aligned(dst, 4), aligned(dst, 4),
array<char> src where src : count(len) && array<char> src where src : count(len) &&
aligned(src, 4), aligned(src, 4),
int len where len % 4 == 0); int len where len % 4 == 0);
\end{lstlisting} \end{lstlisting}
@ -374,7 +374,7 @@ statements. It does not make sense to have dataflow-sensitive bounds for
externally-scoped variables and variables with static or thread storage. externally-scoped variables and variables with static or thread storage.
The bounds extend to the next assignment to any variable in the bounds The bounds extend to the next assignment to any variable in the bounds
declaration, with some exceptions for pointer incrementing or declaration, with some exceptions for pointer incrementing or
decrementing. Section~\ref{section:extent-of-declarations} decrementing. Section~\ref{section:extent-of-declarations}
describes the extent of flow-sensitive bounds describes the extent of flow-sensitive bounds
in more detail. in more detail.
@ -384,7 +384,7 @@ elements and then points to an array with 10 elements; the bounds are
adjusted accordingly. adjusted accordingly.
\begin{lstlisting} \begin{lstlisting}
void f(void) void f(void)
{ {
int x[5]; int x[5];
int y[10]; int y[10];
@ -401,7 +401,7 @@ lazily to either \code{a} or \code{b}, depending on another parameter \code{val}
\begin{lstlisting} \begin{lstlisting}
/* use either a or b depending on val */ /* use either a or b depending on val */
int choose(int val, array_ptr<int> a : count(alen), int alen, int choose(int val, array_ptr<int> a : count(alen), int alen,
array_ptr<int> b : count(blen), int blen) array_ptr<int> b : count(blen), int blen)
{ {
array_ptr<int> c; array_ptr<int> c;
int clen; int clen;
@ -412,7 +412,7 @@ int choose(int val, array_ptr<int> a : count(alen), int alen,
else { else {
clen = blen; clen = blen;
c = b where c : count(clen); c = b where c : count(clen);
} }
... ...
} }
\end{lstlisting} \end{lstlisting}
@ -425,7 +425,7 @@ the update. Consider the following example:
\begin{lstlisting} \begin{lstlisting}
/* sum integers stored between start and end, where end is not included */ /* sum integers stored between start and end, where end is not included */
int sum(array_ptr<int> start : bounds(start, end), array_ptr<int> end) int sum(array_ptr<int> start : bounds(start, end), array_ptr<int> end)
{ {
... ...
// Adjusting end. Can declare new bounds for start at the assignment // Adjusting end. Can declare new bounds for start at the assignment
end = end - 1 where start : bounds(start, end + 1); end = end - 1 where start : bounds(start, end + 1);
@ -466,11 +466,11 @@ added:
\code{byte_count} is the identifier \code{byte_count}. \code{byte_count} is the identifier \code{byte_count}.
The bounds declaration \boundsdecl{\var{x}}{\boundsbytecount{\var{e1}}} The bounds declaration \boundsdecl{\var{x}}{\boundsbytecount{\var{e1}}}
describes the number of bytes that are accessible beginning at \var{x}. describes the number of bytes that are accessible beginning at \var{x}.
Only memory that is at or above \var{x} and below Only memory that is at or above \var{x} and below
\lstinline|(|\arrayptrchar\lstinline|)| \var{x} \code{+} \var{e1} can be accessed through \lstinline|(|\arrayptrchar\lstinline|)| \var{x} \code{+} \var{e1} can be accessed through
\var{x}. The type of \var{e1} must be an integral type. The usual C integer conversions are \var{x}. The type of \var{e1} must be an integral type. The usual C integer conversions are
applied to \var{e1}. This bounds declaration is a synonym for applied to \var{e1}. This bounds declaration is a synonym for
\boundsdecl{\var{x}} \boundsdecl{\var{x}}
{\boundsrel{\lstinline|(|\arrayptrchar)\lstinline|)| \var{x}} {\boundsrel{\lstinline|(|\arrayptrchar)\lstinline|)| \var{x}}
{\lstinline|(|\arrayptrchar)\lstinline|)| \var{x} \code{+} \var{e1}} {\lstinline|(|\arrayptrchar)\lstinline|)| \var{x} \code{+} \var{e1}}
@ -479,7 +479,7 @@ applied to \var{e1}. This bounds declaration is a synonym for
The standard C library functions for \code{malloc}, \code{memcmp}, and The standard C library functions for \code{malloc}, \code{memcmp}, and
\code{memcpy} will be \code{memcpy} will be
given bounds-safe interfaces to avoid breaking existing code as given bounds-safe interfaces to avoid breaking existing code as
described in Section~\ref{section:function-bounds-safe-interfaces}. described in Section~\ref{section:function-bounds-safe-interfaces}.
However, if they were to return checked pointer However, if they were to return checked pointer
types, their bounds declarations would be: types, their bounds declarations would be:
@ -509,7 +509,7 @@ in-line bounds declarations and \keyword{where} clauses for declarations:
\begin{tabbing} \begin{tabbing}
\var{init-}\=\var{declarator:} \\ \var{init-}\=\var{declarator:} \\
\>\var{declarator inline-bounds-specifier\textsubscript{opt} where-clause\textsubscript{opt}} \\ \>\var{declarator inline-bounds-specifier\textsubscript{opt} where-clause\textsubscript{opt}} \\
\>\var{declarator inline-bounds-specifier\textsubscript{opt} \>\var{declarator inline-bounds-specifier\textsubscript{opt}
where-clause\textsubscript{opt}} \lstinline|=| \var{initializer where-clause\textsubscript{opt}} \lstinline|=| \var{initializer
where-clause\textsubscript{opt}} \\ where-clause\textsubscript{opt}} \\
\>\ldots{} \\ \>\ldots{} \\
@ -518,7 +518,7 @@ where-clause\textsubscript{opt}} \\
\>\var{declaration-specifiers declarator} \var{inline-bounds-specifier\textsubscript{opt} where-clause\textsubscript{opt}} \\ \>\var{declaration-specifiers declarator} \var{inline-bounds-specifier\textsubscript{opt} where-clause\textsubscript{opt}} \\
\\ \\
\var{inline-bounds-specifier:}\\ \var{inline-bounds-specifier:}\\
\>\lstinline|:| \var{bounds-exp} \\ \>\lstinline|:| \var{bounds-exp} \\
\\ \\
\var{where-clause}: \\ \var{where-clause}: \\
\>\keyword{where} \var{facts} \\ \>\keyword{where} \var{facts} \\
@ -573,7 +573,7 @@ must be the identifier \code{any} or the identifier \code{unknown}.
The semantics of C expressions is subtle. The expression The semantics of C expressions is subtle. The expression
\lstinline|*|\var{e1} does not dereference memory. It \lstinline|*|\var{e1} does not dereference memory. It
produces an lvalue that can be used to access memory. If the expression produces an lvalue that can be used to access memory. If the expression
occurs on the left-hand side of an assigment, the memory pointed to occurs on the left-hand side of an assignment, the memory pointed to
by the lvalue is updated with the value of the right-hand side of by the lvalue is updated with the value of the right-hand side of
the assignment. the assignment.
For example, given \lstinline|*|\var{e1} \lstinline|=| \var{e2}, For example, given \lstinline|*|\var{e1} \lstinline|=| \var{e2},
@ -645,7 +645,7 @@ is used to access memory. The compiler computes the bounds of
runtime checks that \var{e1} \lstinline|+| \var{e2} is within this bounds as runtime checks that \var{e1} \lstinline|+| \var{e2} is within this bounds as
part of the evaluation of \var{e1}\lstinline|[|\var{e2}\lstinline|]|. For example, part of the evaluation of \var{e1}\lstinline|[|\var{e2}\lstinline|]|. For example,
given \lstinline|x[5]| where \lstinline|x| has \lstinline|bounds(x, x + c)|, the given \lstinline|x[5]| where \lstinline|x| has \lstinline|bounds(x, x + c)|, the
compiler inserts runtime checks that \lstinline|x <= x + 5 < x + c|. compiler inserts runtime checks that \lstinline|x <= x + 5 < x + c|.
The runtime checks simplify to \lstinline|5 < c|. The runtime checks simplify to \lstinline|5 < c|.
For a multi-dimensional array access, only one bounds check is done. For a multi-dimensional array access, only one bounds check is done.
@ -685,11 +685,11 @@ has {\bounds{\var{e2}}{\var{e3}}}:
\begin{itemize} \begin{itemize}
\item Given an assignment of the form \lstinline|*|\var{e1} \lstinline|=| \var{e4}, the \item Given an assignment of the form \lstinline|*|\var{e1} \lstinline|=| \var{e4}, the
value of \var{e4} is computed to some temporary \var{v}. The value of \var{e4} is computed to some temporary \var{v}. The
check is \lstinline|(|\var{e2} \lstinline|<=| \var{t} \lstinline|&&| \var{t} \lstinline|<| check is \lstinline|(|\var{e2} \lstinline|<=| \var{t} \lstinline|&&| \var{t} \lstinline|<|
\var{e3}\lstinline@) || (@ \var{e2} \lstinline|<=| \var{t} \lstinline|&&| \var{t} \var{e3}\lstinline@) || (@ \var{e2} \lstinline|<=| \var{t} \lstinline|&&| \var{t}
\lstinline|==| \var{e3} \lstinline|&&| \var{v} \lstinline|== 0)|. \lstinline|==| \var{e3} \lstinline|&&| \var{v} \lstinline|== 0)|.
When checking for a write of \code{0} exactly at the upper bound, we When checking for a write of \code{0} exactly at the upper bound, we
include the first element of the sequence in the allowed memory range. include the first element of the sequence in the allowed memory range.
The second lower bound comparison \var{e2} \lstinline|<=| \var{t} The second lower bound comparison \var{e2} \lstinline|<=| \var{t}
prevents an assignment prevents an assignment
@ -740,7 +740,7 @@ Additional storage would be particularly
problematic when bounds declarations are extend to structure members. problematic when bounds declarations are extend to structure members.
It would cause the size and layout of data structures to change. It would cause the size and layout of data structures to change.
Deferring evaluation also avoids complications when \code{x} is Deferring evaluation also avoids complications when \code{x} is
\code{null}. Section~\ref{section:bounds-declarations-alternate-semantics} \code{null}. Section~\ref{section:bounds-declarations-alternate-semantics}
discusses eager evaluation of bounds expressions at discusses eager evaluation of bounds expressions at
bounds declarations in more detail and explains why this was not chosen. bounds declarations in more detail and explains why this was not chosen.
@ -750,7 +750,7 @@ It is common in C code to use multiple statements to update program state.
This can cause problems when variables in a bounds declaration are updated. This can cause problems when variables in a bounds declaration are updated.
Invariant bounds declarations must be valid at the end of every statement. Invariant bounds declarations must be valid at the end of every statement.
When updates involve multiple statements, a bounds declaration may be valid only When updates involve multiple statements, a bounds declaration may be valid only
after all the updates are done. In Checked C, statements and declarations can be after all the updates are done. In Checked C, statements and declarations can be
grouped into bundled blocks. Bounds declarations are checked only at the end of bundled blocks. grouped into bundled blocks. Bounds declarations are checked only at the end of bundled blocks.
Consider the following example where a function is added to the earlier \code{sum} Consider the following example where a function is added to the earlier \code{sum}
@ -770,7 +770,7 @@ int sum(void)
} }
/* buggy resize function */ /* buggy resize function */
void resize(int len) void resize(int len)
{ {
array_ptr<int> tmp : count(len) = malloc(sizeof(int) * len); array_ptr<int> tmp : count(len) = malloc(sizeof(int) * len);
copy(tmp, buf, buflen); copy(tmp, buf, buflen);
@ -787,7 +787,7 @@ The updates to \code{buflen} and \code{buf} can be grouped together,
so the checker considers them to be one action: so the checker considers them to be one action:
\begin{lstlisting} \begin{lstlisting}
/* correct resize function */ /* correct resize function */
void resize(int len) void resize(int len)
{ {
array_ptr<int> tmp : count(len) = malloc(sizeof(int) * len); array_ptr<int> tmp : count(len) = malloc(sizeof(int) * len);
copy(tmp, buf, buflen); copy(tmp, buf, buflen);
@ -812,7 +812,7 @@ The C syntax for is extended with:
\\ \\
\var{bundled-item:}\\ \var{bundled-item:}\\
\> \var{declaration}\\ \> \var{declaration}\\
\> \var{expression-statement} \> \var{expression-statement}
\end{tabbing} \end{tabbing}
There is some subtlety with bundled blocks and function calls. The There is some subtlety with bundled blocks and function calls. The
@ -825,7 +825,7 @@ results in temporary variables and updating static variables \textit{en
masse} after the required function calls have been made. masse} after the required function calls have been made.
Bundled blocks expose an interesting difference between regular C Bundled blocks expose an interesting difference between regular C
and Checked C programs. In regular C, and Checked C programs. In regular C,
\begin{lstlisting} \begin{lstlisting}
expr1, expr2; expr1, expr2;
\end{lstlisting} \end{lstlisting}
@ -843,7 +843,7 @@ statements. Thus bounds could be valid after
In the \code{resize} example, the following suceeds with a bundle In the \code{resize} example, the following suceeds with a bundle
block: block:
\begin{lstlisting} \begin{lstlisting}
void resize(int len) void resize(int len)
{ {
array_ptr<int> tmp = malloc(sizeof(int) * len); array_ptr<int> tmp = malloc(sizeof(int) * len);
copy(tmp, buf, buflen); copy(tmp, buf, buflen);
@ -1043,7 +1043,7 @@ the declaration.
\label{section:integer-overflow-informal} \label{section:integer-overflow-informal}
When objects are allocated dynamically in C, programmers have to compute When objects are allocated dynamically in C, programmers have to compute
the amount of memory to allocate for the objects. It is well-known the amount of memory to allocate for the objects. It is well-known
that integer overflow or wraparound in these computations can lead to buffer that integer overflow or wraparound in these computations can lead to buffer
overruns \cite{Howard2003,Mitre2015-128,Mitre2015-190,Mitre2015-680,Dietz2015}. overruns \cite{Howard2003,Mitre2015-128,Mitre2015-190,Mitre2015-680,Dietz2015}.
In Checked C, the explicit size computations are not enough In Checked C, the explicit size computations are not enough
@ -1073,10 +1073,10 @@ a mismatch when overflow or wraparound can happen, which causes static
checking to fail. checking to fail.
We expand the count expression; to integer arithmetic to make its size We expand the count expression; to integer arithmetic to make its size
computation clear. \lstinline|count(|\var{e1}\lstinline|)| expands to computation clear. \lstinline|count(|\var{e1}\lstinline|)| expands to
\bounds{\code{p}}{\code{p +} \var{e1}}. \bounds{\code{p}}{\code{p +} \var{e1}}.
Following the rules in Section~\ref{section:pointers-as-integers}, Following the rules in Section~\ref{section:pointers-as-integers},
the expansion of \lstinline|p +| \var{e1} from pointer arithmetic to integer the expansion of \lstinline|p +| \var{e1} from pointer arithmetic to integer
arithmetic depends on the type of \var{e1}: arithmetic depends on the type of \var{e1}:
\begin{itemize} \begin{itemize}
@ -1086,7 +1086,7 @@ arithmetic depends on the type of \var{e1}:
\var{e1}. \var{e1}.
\item \item
If \var{e1} is a signed integer, \lstinline|p +| \var{e1} expands to If \var{e1} is a signed integer, \lstinline|p +| \var{e1} expands to
\lstinline|p| \lstinline{+}\textsubscript{ovf} \lstinline|p| \lstinline{+}\textsubscript{ovf}
\lstinline|((signed_size_t)| \sizeof{\var{T}}\lstinline|)| \lstinline{*}\textsubscript{ovf} \lstinline|((signed_size_t)| \sizeof{\var{T}}\lstinline|)| \lstinline{*}\textsubscript{ovf}
\var{e1}. \var{e1}.
\end{itemize} \end{itemize}
@ -1103,11 +1103,11 @@ Values differ?\tabularnewline
\midrule \midrule
\endhead \endhead
Unsigned integer & \sizeof{\var{T}} \lstinline|*|\textsubscript{ovf} \var{e1} & Unsigned integer & \sizeof{\var{T}} \lstinline|*|\textsubscript{ovf} \var{e1} &
\sizeof{\var{T}} \lstinline|* |\var{e1} & \sizeof{\var{T}} \lstinline|* |\var{e1} &
On overflow\tabularnewline On overflow\tabularnewline
Signed integer & \lstinline|(signed_size_t)| \sizeof{\var{T}} Signed integer & \lstinline|(signed_size_t)| \sizeof{\var{T}}
\lstinline|*|\textsubscript{ovf} \var{e1} & \lstinline|*|\textsubscript{ovf} \var{e1} &
\sizeof{\var{T}} \lstinline|* (size_t)| \var{e1} & \sizeof{\var{T}} \lstinline|* (size_t)| \var{e1} &
On overflow or when \code{e1 < 0}.\tabularnewline On overflow or when \code{e1 < 0}.\tabularnewline
\bottomrule \bottomrule
\end{longtable} \end{longtable}
@ -1166,7 +1166,7 @@ Function types can be declared to have parameters and return values that have bo
declarations, just like functions. The bounds declarations become part of the function types. declarations, just like functions. The bounds declarations become part of the function types.
Here are simple examples of function pointer types with bounds declarations: Here are simple examples of function pointer types with bounds declarations:
\begin{lstlisting} \begin{lstlisting}
// Function that takes a pointer to a 5 element array and returns an // Function that takes a pointer to a 5 element array and returns an
// integer. // integer.
int (array_ptr<int> arr : count(5)) int (array_ptr<int> arr : count(5))
// checked function pointer // checked function pointer
@ -1233,7 +1233,7 @@ the pointer has type
\arrayptrT. The \arrayptrT. The
bounds specify a range of memory that is exactly the size of some array bounds specify a range of memory that is exactly the size of some array
of \var{T} and the pointer points exactly at an element of that array. For of \var{T} and the pointer points exactly at an element of that array. For
example, suppose short ints are 2 bytes in size and example, suppose short ints are 2 bytes in size and
{\boundsdecl{\code{x}}{\bounds{\code{y}}{\code{z}}}}, where the types of \code{x}, {\boundsdecl{\code{x}}{\bounds{\code{y}}{\code{z}}}}, where the types of \code{x},
\code{y}, \code{z} are \arrayptrinst{\code{short int}} . \code{y}, \code{z} are \arrayptrinst{\code{short int}} .
@ -1264,7 +1264,7 @@ becomes more expensive. With relative alignment, the bounds check is
alignment, the check for the upper bound needs to compute the highest alignment, the check for the upper bound needs to compute the highest
address that will be accessed using x. For this example, the highest address that will be accessed using x. For this example, the highest
address accessed will be address accessed will be
\lstinline|(|\arrayptrchar\lstinline|) x + sizeof(short int) - 1|, so the check becomes \lstinline|(|\arrayptrchar\lstinline|) x + sizeof(short int) - 1|, so the check becomes
\code{y <= x} and \code{x + 1 < z}. \code{y <= x} and \code{x + 1 < z}.
A pointer cast can create a pointer that is not relatively aligned to A pointer cast can create a pointer that is not relatively aligned to
@ -1309,8 +1309,8 @@ instead of \code{char}) usually already are doing these checks.
\subsection{Representing relative alignment in bounds declarations} \subsection{Representing relative alignment in bounds declarations}
\label{section:representing-relative-alignment} \label{section:representing-relative-alignment}
Bounds expressions are extended with an optional relative alignment Bounds expressions are extended with an optional relative alignment
clause to represent situations where an \arrayptrT\ is not relatively clause to represent situations where an \arrayptrT\ is not relatively
aligned to its bounds for type \var{T}: aligned to its bounds for type \var{T}:
\begin{tabbing} \begin{tabbing}
@ -1321,7 +1321,7 @@ aligned to its bounds for type \var{T}:
\\ \\
\var{relative-alignment-clause:}\\ \var{relative-alignment-clause:}\\
\> \relalign{\var{type}} \\ \> \relalign{\var{type}} \\
\> \relalignval{\var{constant-exp}} \> \relalignval{\var{constant-exp}}
\end{tabbing} \end{tabbing}
This clause is only added to bounds pairs because (by definition) count This clause is only added to bounds pairs because (by definition) count
@ -1331,9 +1331,9 @@ identifiers. The optional clause specifies a
relative alignment type \var{T} or the required relative alignment in relative alignment type \var{T} or the required relative alignment in
bytes. Given bytes. Given
\boundsdecl{\var{x}}{\boundsrel{\var{e1}}{\var{e2}}{\var{T}}}, \boundsdecl{\var{x}}{\boundsrel{\var{e1}}{\var{e2}}{\var{T}}},
\lstinline|((|\arrayptrchar\lstinline|)| \var{x} \lstinline|- (|\arrayptrchar\lstinline|)| \var{e1}) \lstinline|((|\arrayptrchar\lstinline|)| \var{x} \lstinline|- (|\arrayptrchar\lstinline|)| \var{e1})
\lstinline|%| \sizeof{\var{T}}\lstinline|== 0| and \lstinline|%| \sizeof{\var{T}}\lstinline|== 0| and
\lstinline|((|\arrayptrchar\lstinline|)| \var{e2} \lstinline|- (|\arrayptrchar\lstinline|)| \var{x}) \lstinline|((|\arrayptrchar\lstinline|)| \var{e2} \lstinline|- (|\arrayptrchar\lstinline|)| \var{x})
\lstinline|% sizeof(|\var{T}\lstinline|) == 0| must be true. If the number of bytes is \lstinline|% sizeof(|\var{T}\lstinline|) == 0| must be true. If the number of bytes is
specified, \sizeof{\var{T}} is replaced by the specified, \sizeof{\var{T}} is replaced by the
constant expression. The relative alignment clause \relalign{\var{type}} constant expression. The relative alignment clause \relalign{\var{type}}
@ -1346,7 +1346,7 @@ If the default relative alignment has been overridden and
\sizeof{referent-type(\var{e1})} is a common factor of \sizeof{\var{T}} \sizeof{referent-type(\var{e1})} is a common factor of \sizeof{\var{T}}
If it is, it inserts the same If it is, it inserts the same
runtime check as before. Otherwise, it inserts a runtime check that runtime check as before. Otherwise, it inserts a runtime check that
\var{e2} \lstinline|<=| \var{e1} \lstinline|&&| \var{e2} \lstinline|<=| \var{e1} \lstinline|&&|
\var{e1} \lstinline|+ sizeof(|\var{T}) \lstinline|- 1 <| \var{e3}. \var{e1} \lstinline|+ sizeof(|\var{T}) \lstinline|- 1 <| \var{e3}.
\subsection{Examples of uses of bounds declarations that specify relative alignment} \subsection{Examples of uses of bounds declarations that specify relative alignment}
@ -1403,7 +1403,7 @@ possible. For simplicity, it is assumed that the memory pointed to by
\code{dst} and \code{src} does not overlap and that \code{sizeof(int)} is 4: \code{dst} and \code{src} does not overlap and that \code{sizeof(int)} is 4:
\begin{lstlisting} \begin{lstlisting}
void copy(array_ptr<char> dst : bounds(dst, dst + num), void copy(array_ptr<char> dst : bounds(dst, dst + num),
array_ptr<char> src : bounds(src, src + num), array_ptr<char> src : bounds(src, src + num),
size_t num) size_t num)
{ {
if (num % 4 == 0) { if (num % 4 == 0) {
@ -1429,7 +1429,7 @@ relative alignment can be changed:
\begin{lstlisting} \begin{lstlisting}
void copy(array_ptr<char> dst : bounds(dst, dst + num), void copy(array_ptr<char> dst : bounds(dst, dst + num),
array_ptr<char> src : bounds(src, src + num), array_ptr<char> src : bounds(src, src + num),
size_t num) size_t num)
{ {
if (num % 4 == 0) { if (num % 4 == 0) {
@ -1444,7 +1444,7 @@ void copy(array_ptr<char> dst : bounds(dst, dst + num),
aligned_dst[i] = aligned_src[i]; aligned_dst[i] = aligned_src[i];
} }
} }
else else
... ...
} }
\end{lstlisting} \end{lstlisting}
@ -1472,12 +1472,12 @@ array_ptr<int> r : count(2) = (array_ptr<int>) p;
Variables that have bounds declared for them at expression statements Variables that have bounds declared for them at expression statements
have dataflow-sensitive bounds declarations. There are two aspects to have dataflow-sensitive bounds declarations. There are two aspects to
dataflow-sensitive bounds: extent and consistency. The extent of a dataflow-sensitive bounds: extent and consistency. The extent of a
flow-sensitive bounds declaration is the part of the program to which flow-sensitive bounds declaration is the part of the program to which
a flow-sensitive bounds a flow-sensitive bounds
declaration applies. Within the extent of a flow-sensitive declaration applies. Within the extent of a flow-sensitive
bounds declaration, the bounds declaration, the
bounds for the variable for memory dereferences are given by the bounds bounds for the variable for memory dereferences are given by the bounds
declaration. Consistency is that all the flow-sensitive bounds declaration. Consistency is that all the flow-sensitive bounds
declarations flowing to a statement agree with each other. declarations flowing to a statement agree with each other.
As mentioned in Section~\ref{section:statement-declarations}, As mentioned in Section~\ref{section:statement-declarations},
@ -1511,19 +1511,19 @@ lead to unexpected bounds checks failures.
\subsection{Definition of extent} \subsection{Definition of extent}
\label{section:extent-definition} \label{section:extent-definition}
We first define the set of flow-sensitive bounds declarations that apply to a We first define the set of flow-sensitive bounds declarations that apply to a
component of a function. A component is an expression statement, component of a function. A component is an expression statement,
variable declaration, or a compound statement. For any flow-sensitive bounds variable declaration, or a compound statement. For any flow-sensitive bounds
declaration for a variable \var{v}, if declaration for a variable \var{v}, if
\begin{compactenum} \begin{compactenum}
\item \item
There is some path from the bounds declaration to the There is some path from the bounds declaration to the
component, and component, and
\item \item
\var{v} occurs in the component, and \var{v} occurs in the component, and
\item \item
there is no other flow-sensitive bounds declaration for there is no other flow-sensitive bounds declaration for
\var{v} along the path \var{v} along the path
\end{compactenum} \end{compactenum}
@ -1531,26 +1531,26 @@ then
\begin{compactenum} \begin{compactenum}
\item \item
if all the variables occurring in the bounds declaration are if all the variables occurring in the bounds declaration are
in-scope at the component, and in-scope at the component, and
\begin{compactenum} \begin{compactenum}
\item No expressions or statements on the path modify a variable \item No expressions or statements on the path modify a variable
occurring in the bounds declaration occurring in the bounds declaration
\item or \item or
\begin{compactenum} \begin{compactenum}
\item The only expressions or statements on the path that \item The only expressions or statements on the path that
modify a variable occurring in the bounds declaration modify a variable occurring in the bounds declaration
are pointer increments or decrements of \var{v}, and are pointer increments or decrements of \var{v}, and
\item \var{v} does not occur in the bounds expression, and \item \var{v} does not occur in the bounds expression, and
\item the bounds expression is not a count expression \item the bounds expression is not a count expression
\end{compactenum} \end{compactenum}
\end{compactenum} \end{compactenum}
then the bounds declaration applies to the component then the bounds declaration applies to the component
\item \item
Otherwise, the bounds declaration \boundsdecl{\var{v}}{\boundsunknown} Otherwise, the bounds declaration \boundsdecl{\var{v}}{\boundsunknown}
applies to the component. applies to the component.
\end{compactenum} \end{compactenum}
\subsection{Consistency} \subsection{Consistency}
These conditions ensure the consistency of bounds declarations: These conditions ensure the consistency of bounds declarations:
@ -1582,7 +1582,7 @@ rejected by the compiler.
/* buggy function */ /* buggy function */
/* sum integers stored between start and end, where end is not included */ /* sum integers stored between start and end, where end is not included */
int sum(array_ptr<int> start : bounds(start, end), array_ptr<int> end) int sum(array_ptr<int> start : bounds(start, end), array_ptr<int> end)
{ {
end = end + 1; // bounds(start, end) does not hold after this, so program is end = end + 1; // bounds(start, end) does not hold after this, so program is
// rejected // rejected
start[5] = 0; start[5] = 0;
@ -1595,22 +1595,22 @@ A correct function declares new bounds for \code{start}:
\begin{lstlisting} \begin{lstlisting}
/* sum integers stored between start and end, where end is not included */ /* sum integers stored between start and end, where end is not included */
int sum(array_ptr<int> start : bounds(start, end), array_ptr<int> end) int sum(array_ptr<int> start : bounds(start, end), array_ptr<int> end)
{ {
end = end + 1 where start : bounds(start, end - 1); end = end + 1 where start : bounds(start, end - 1);
start[5] = 0; // program accepted by compiler; may fail bounds check start[5] = 0; // program accepted by compiler; may fail bounds check
// at run time // at run time
... ...
} }
\end{lstlisting} \end{lstlisting}
The following example shows the extents of the two bounds declarations for The following example shows the extents of the two bounds declarations for
\code{start}. The bounds declaration at the declaration of \code{start} and \code{start}. The bounds declaration at the declaration of \code{start} and
its extent is highlighted in blue. The bounds declaration at the assignment its extent is highlighted in blue. The bounds declaration at the assignment
to \code{end} and its extent is highlighted in yellow: to \code{end} and its extent is highlighted in yellow:
\sethlcolor{lightblue} \sethlcolor{lightblue}
\begin{lstlisting}[escapechar=\|] \begin{lstlisting}[escapechar=\|]
/* sum integers stored between start and end, where end is not included */ /* sum integers stored between start and end, where end is not included */
int sum(array_ptr<int> start : |\hl{bounds(start, end)}|, array_ptr<int> end) int sum(array_ptr<int> start : |\hl{bounds(start, end)}|, array_ptr<int> end)
|\hl{\{\\ |\hl{\{\\
start[5] = 0; // bounds checked using (start, end)}\\ start[5] = 0; // bounds checked using (start, end)}\\
@ -1620,14 +1620,14 @@ start[5] = 0; // bounds checked using (start, end - 1)\\
} }
\end{lstlisting} \end{lstlisting}
Figure~\ref{fig:bounds-extent:choose} expands on the \code{choose} function earlier. Figure~\ref{fig:bounds-extent:choose} expands on the \code{choose} function earlier.
There are 4 explicit bounds declarations for \code{c}. The bounds declarations There are 4 explicit bounds declarations for \code{c}. The bounds declarations
and their extents are also color-coded. Three bounds declarations for \code{c} and their extents are also color-coded. Three bounds declarations for \code{c}
have the form \boundsdecl{\code{c}}{\boundscount{\code{clen}}}. They must be syntactically have the form \boundsdecl{\code{c}}{\boundscount{\code{clen}}}. They must be syntactically
identical because there are statements that they cover in common. Their identical because there are statements that they cover in common. Their
extents are highlighted in blue. Another bounds declaration has extents are highlighted in blue. Another bounds declaration has
the form \boundsdecl{\code{c}}{\boundscount{\code{clen - 1}}}. Its extent is highlighted the form \boundsdecl{\code{c}}{\boundscount{\code{clen - 1}}}. Its extent is highlighted
in yellow. It covers code where \code{c} is incremented and the in yellow. It covers code where \code{c} is incremented and the
length is decremented. The increment invalidates the bounds declaration length is decremented. The increment invalidates the bounds declaration
\boundsdecl{\code{c}}{\boundscount{\code{clen}}}. A different bounds declaration for \code{c} \boundsdecl{\code{c}}{\boundscount{\code{clen}}}. A different bounds declaration for \code{c}
is needed after the increment. is needed after the increment.
@ -1650,16 +1650,16 @@ int choose(int val, array_ptr<int> a: count(alen), int alen,
clen = blen; clen = blen;
c = b where |\hl{c : count(clen);}| c = b where |\hl{c : count(clen);}|
|\hl{\}}| |\hl{\}}|
|\hl{if (clen > 1) \{}| |\hl{if (clen > 1) \{}|
|\hl{result = c[0];}| |\hl{result = c[0];}|
|\hl{\}}| |\hl{\}}|
|\hl{if (cond \&\& clen > 1) \{}| |\hl{if (cond \&\& clen > 1) \{}|
c = |\hl{c + 1}| where |\sethlcolor{lightyellow}\hl{c : count(clen - 1);}| c = |\hl{c + 1}| where |\sethlcolor{lightyellow}\hl{c : count(clen - 1);}|
clen = |\sethlcolor{lightyellow}\hl{clen - 1}| where |\sethlcolor{lightblue}\hl{c : count(clen);}| clen = |\sethlcolor{lightyellow}\hl{clen - 1}| where |\sethlcolor{lightblue}\hl{c : count(clen);}|
|\hl{result += c[0];}| |\hl{result += c[0];}|
|\hl{\}}| |\hl{\}}|
|\hl{...}| |\hl{...}|
|\hl{return result;}| |\hl{return result;}|
} }
@ -1687,8 +1687,8 @@ function. In functions that do not have loops in their control-flow, the
dataflow analysis can be done in O(N*M) time. dataflow analysis can be done in O(N*M) time.
The dataflow analysis uses a lattice of values, assigning one lattice The dataflow analysis uses a lattice of values, assigning one lattice
value to each variable at each program point in the function. value to each variable at each program point in the function.
Figure~\ref{fig:extent-dataflow-lattice} shows the lattice of values. Figure~\ref{fig:extent-dataflow-lattice} shows the lattice of values.
It includes singleton values consisting of the bounds It includes singleton values consisting of the bounds
expressions in the flow-sensitive bounds declaration for variables in expressions in the flow-sensitive bounds declaration for variables in
the function, \boundsunknown\ (indicating absence of bounds the function, \boundsunknown\ (indicating absence of bounds
@ -1706,11 +1706,11 @@ information), and \code{Top} (indicating contradiction or error):
\caption{The lattice of values used in dataflow computation of extent} \caption{The lattice of values used in dataflow computation of extent}
\label{fig:extent-dataflow-lattice} \label{fig:extent-dataflow-lattice}
\end{figure} \end{figure}
For an assignment to an \arrayptr\ variable, the existing For an assignment to an \arrayptr\ variable, the existing
lattice value for the \arrayptr\ variable is killed, unless lattice value for the \arrayptr\ variable is killed, unless
the special conditions described in Section~\ref{section:extent-of-declarations} the special conditions described in Section~\ref{section:extent-of-declarations}
are met. are met.
A new lattice value is generated for the \arrayptr\ variable. If the A new lattice value is generated for the \arrayptr\ variable. If the
assignment declares bounds for the \arrayptr\ variable, the new assignment declares bounds for the \arrayptr\ variable, the new
lattice value is the bounds expression in the bounds declaration. lattice value is the bounds expression in the bounds declaration.
@ -1749,7 +1749,7 @@ loop. The loop modifies \code{current}, but the bounds for
\begin{lstlisting} \begin{lstlisting}
/* sum integers stored between start and end, where end is not included */ /* sum integers stored between start and end, where end is not included */
int sum(array_ptr<int> start where start : bounds(start, end), array_ptr<int> end) int sum(array_ptr<int> start where start : bounds(start, end), array_ptr<int> end)
{ {
int sum = 0; int sum = 0;
array_ptr<int> current : (start, end) = start; array_ptr<int> current : (start, end) = start;
@ -1776,10 +1776,10 @@ eliminate the runtime bounds checks easily.
\begin{lstlisting} \begin{lstlisting}
/* lexicographic comparison of two arrays of integers */ /* lexicographic comparison of two arrays of integers */
int compare(array_ptr<int> x : bounds(x, x_end), int compare(array_ptr<int> x : bounds(x, x_end),
array_ptr<int> y : bounds(y, y_end) array_ptr<int> y : bounds(y, y_end)
array_ptr<int> x_end, array_ptr<int> y_end) array_ptr<int> x_end, array_ptr<int> y_end)
{ {
while (x < x_end && y < y_end) { while (x < x_end && y < y_end) {
if (*x == *y) { // bounds check: x >= x && x < x_end; easily if (*x == *y) { // bounds check: x >= x && x < x_end; easily
// optimizable. // optimizable.
@ -1802,7 +1802,7 @@ int compare(array_ptr<int> x : bounds(x, x_end),
return 1; return 1;
} }
else { else {
return -1; return -1;
} }
} }
\end{lstlisting} \end{lstlisting}