Fix typos and spacing in spec and samples. (#406)
This commit is contained in:
Родитель
9df9af5a5f
Коммит
caff1bb5ad
|
@ -7,11 +7,11 @@
|
||||||
// clang -o string-helpers string-helper.c
|
// clang -o string-helpers string-helper.c
|
||||||
// On Windows use:
|
// On Windows use:
|
||||||
// clang -o string-helpers.exe string-helpers.c
|
// clang -o string-helpers.exe string-helpers.c
|
||||||
//
|
//
|
||||||
// Then run the program with 6 string arguments, the 3rd of which should
|
// Then run the program with 6 string arguments, the 3rd of which should
|
||||||
// be an integer For example:
|
// be an integer For example:
|
||||||
// string-helpers hello meet -125 fed brown red
|
// string-helpers hello meet -125 fed brown red
|
||||||
//
|
//
|
||||||
// Overview of using null-terminated arrays in Checked C:
|
// Overview of using null-terminated arrays in Checked C:
|
||||||
//
|
//
|
||||||
// In Checked C, pointers to null-terminated data are represented using the
|
// In Checked C, pointers to null-terminated data are represented using the
|
||||||
|
@ -52,7 +52,7 @@
|
||||||
// int f(nt_array_ptr<char> p : count(n), int n) {
|
// int f(nt_array_ptr<char> p : count(n), int n) {
|
||||||
// if (p[n]) { // read element at upper bound.
|
// if (p[n]) { // read element at upper bound.
|
||||||
// ...
|
// ...
|
||||||
//
|
//
|
||||||
// If v != 0, the following code fails at runtime for an nt_array_ptr. It
|
// If v != 0, the following code fails at runtime for an nt_array_ptr. It
|
||||||
// always fails for an array_ptr.
|
// always fails for an array_ptr.
|
||||||
//
|
//
|
||||||
|
@ -67,13 +67,13 @@
|
||||||
// int set_zero(nt_array_ptr<char> p : count(n), int n) {
|
// int set_zero(nt_array_ptr<char> p : count(n), int n) {
|
||||||
// p[n] = 0; // allowed for nt_array_ptr. Runtime error for array_ptr!
|
// p[n] = 0; // allowed for nt_array_ptr. Runtime error for array_ptr!
|
||||||
// }
|
// }
|
||||||
//
|
//
|
||||||
// The following example illustrates how bounds differ from
|
// The following example illustrates how bounds differ from
|
||||||
// the count:
|
// the count:
|
||||||
//
|
//
|
||||||
// nt_array_ptr<char> : count(5) = "hello";
|
// nt_array_ptr<char> : count(5) = "hello";
|
||||||
//
|
//
|
||||||
// The string "hello" contains 6 characters, the last of which is
|
// The string "hello" contains 6 characters, the last of which is
|
||||||
// a null terminator. The valid bounds is count(5).
|
// a null terminator. The valid bounds is count(5).
|
||||||
//
|
//
|
||||||
//
|
//
|
||||||
|
@ -84,7 +84,7 @@
|
||||||
// 1. Nt_array_ptrs with no bounds declared have a default bounds of count(0).
|
// 1. Nt_array_ptrs with no bounds declared have a default bounds of count(0).
|
||||||
// 2. If you are using array subscripting to access the nt_array_ptr,
|
// 2. If you are using array subscripting to access the nt_array_ptr,
|
||||||
// you'll need to widen the bounds as you determine the last character
|
// you'll need to widen the bounds as you determine the last character
|
||||||
// is not a null terminator. This is typically done by
|
// is not a null terminator. This is typically done by
|
||||||
// declaring bounds involving a separate counter:
|
// declaring bounds involving a separate counter:
|
||||||
// array_ptr<char> p : count(0);
|
// array_ptr<char> p : count(0);
|
||||||
// int i = 0;
|
// int i = 0;
|
||||||
|
@ -108,7 +108,7 @@
|
||||||
// for ( ; arr[i] !=0; i++) {
|
// for ( ; arr[i] !=0; i++) {
|
||||||
// nt_array_ptr<char> : count(i+1) tmp = p;
|
// nt_array_ptr<char> : count(i+1) tmp = p;
|
||||||
// tmp[i] = ...
|
// tmp[i] = ...
|
||||||
//
|
//
|
||||||
// The function my_strlen show points 1 and 2. It is less
|
// The function my_strlen show points 1 and 2. It is less
|
||||||
// frequent for string functions to modify strings. The function squeeze,
|
// frequent for string functions to modify strings. The function squeeze,
|
||||||
// which does do that, shows point 3.
|
// which does do that, shows point 3.
|
||||||
|
@ -136,7 +136,7 @@ checked int my_strlen(nt_array_ptr<char> p) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delete all c from p (adapted from p. 47, K&R 2nd Edition)
|
// Delete all c from p (adapted from p. 47, K&R 2nd Edition)
|
||||||
// p implicltly has count(0).
|
// p implicitly has count(0).
|
||||||
checked void squeeze(nt_array_ptr<char> p, char c) {
|
checked void squeeze(nt_array_ptr<char> p, char c) {
|
||||||
int i = 0, j = 0;
|
int i = 0, j = 0;
|
||||||
// Create a temporary whose count of elements can
|
// Create a temporary whose count of elements can
|
||||||
|
|
|
@ -3,13 +3,13 @@
|
||||||
\chapter{Extensions to existing C concepts}
|
\chapter{Extensions to existing C concepts}
|
||||||
\label{chapter:core-extensions}
|
\label{chapter:core-extensions}
|
||||||
|
|
||||||
This chapter describes extensions to existing C concepts.
|
This chapter describes extensions to existing C concepts.
|
||||||
It covers new kinds of pointer types and array types, their meaning,
|
It covers new kinds of pointer types and array types, their meaning,
|
||||||
and the operations on them. It introduces two
|
and the operations on them. It introduces two
|
||||||
new program scopes: \lstinline+checked+ and \lstinline+unchecked+
|
new program scopes: \lstinline+checked+ and \lstinline+unchecked+
|
||||||
blocks. It covers a generalization of assertions to
|
blocks. It covers a generalization of assertions to
|
||||||
dynamic checks that are never removed (unless a compiler
|
dynamic checks that are never removed (unless a compiler
|
||||||
proves them redundant) because bounds safety depends upon them.
|
proves them redundant) because bounds safety depends upon them.
|
||||||
Finally, it describes changes to
|
Finally, it describes changes to
|
||||||
undefined behavior needed for bounds safety.
|
undefined behavior needed for bounds safety.
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@ capital letter are reserved for system use, as are identifiers that
|
||||||
begin with two underscores \cite[Section 7.1.3]{ISO2011}. The following
|
begin with two underscores \cite[Section 7.1.3]{ISO2011}. The following
|
||||||
new keywords are introduced:
|
new keywords are introduced:
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
_Array_ptr _Checked _Dynamic_check _For_any _Itype_for_any _Nt_array_ptr _Nt_checked
|
_Array_ptr _Checked _Dynamic_check _For_any _Itype_for_any _Nt_array_ptr _Nt_checked
|
||||||
_Opaque _Ptr _Reveal _Where _Unchecked
|
_Opaque _Ptr _Reveal _Where _Unchecked
|
||||||
\end{lstlisting}
|
\end{lstlisting}
|
||||||
|
|
||||||
|
@ -38,7 +38,7 @@ the implementations of standard header files need to be modified to
|
||||||
not use identifiers that conflict with these keywords.
|
not use identifiers that conflict with these keywords.
|
||||||
The all-lowercase versions of the keywords are:
|
The all-lowercase versions of the keywords are:
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
array_ptr checked dynamic_check for_any nt_array_ptr nt_checked
|
array_ptr checked dynamic_check for_any nt_array_ptr nt_checked
|
||||||
opaque ptr reveal where unchecked
|
opaque ptr reveal where unchecked
|
||||||
\end{lstlisting}
|
\end{lstlisting}
|
||||||
|
|
||||||
|
@ -87,8 +87,8 @@ have an error or warning mode that flags unexpected uses of `\code{*}'.
|
||||||
|
|
||||||
The same syntax as C++ template instantiations is used for building
|
The same syntax as C++ template instantiations is used for building
|
||||||
instances of these types because this syntax is well-known and
|
instances of these types because this syntax is well-known and
|
||||||
understood. The new pointer types are added to the syntax for {\it type specifiers}
|
understood. The new pointer types are added to the syntax for {\it type specifiers}
|
||||||
\cite[Section 6.7.2]{ISO2011}. The parameters to these type constructors
|
\cite[Section 6.7.2]{ISO2011}. The parameters to these type constructors
|
||||||
must be types, which are described syntactically by {\it type names}
|
must be types, which are described syntactically by {\it type names}
|
||||||
\cite[Section 6.7.7]{ISO2011}. If Checked C is extended to C++,
|
\cite[Section 6.7.7]{ISO2011}. If Checked C is extended to C++,
|
||||||
in the C++ extension these new types will be template types that have special meaning.
|
in the C++ extension these new types will be template types that have special meaning.
|
||||||
|
@ -125,10 +125,10 @@ int *const p = &x;
|
||||||
const ptr<int> q = &x;
|
const ptr<int> q = &x;
|
||||||
const array_ptr<int> r = &x;
|
const array_ptr<int> r = &x;
|
||||||
\end{lstlisting}
|
\end{lstlisting}
|
||||||
In this case, the pointers cannot be modified after they are defined, but
|
In this case, the pointers cannot be modified after they are defined, but
|
||||||
the integer that they point to can be modified. The checked pointer declarations
|
the integer that they point to can be modified. The checked pointer declarations
|
||||||
have simpler syntax than the unchecked pointer declaration, where \keyword{const}
|
have simpler syntax than the unchecked pointer declaration, where \keyword{const}
|
||||||
must be placed after the \lstinline+*+. The checked pointer declarations are all analogous
|
must be placed after the \lstinline+*+. The checked pointer declarations are all analogous
|
||||||
to \lstinline+const double y = 5.0;+ declaring a variable \lstinline+y+ that is \keyword{const}.
|
to \lstinline+const double y = 5.0;+ declaring a variable \lstinline+y+ that is \keyword{const}.
|
||||||
In the declaration of \keyword{y}, \keyword{double} specifies a type.
|
In the declaration of \keyword{y}, \keyword{double} specifies a type.
|
||||||
The \keyword{const} keyword is placed before it to declare that
|
The \keyword{const} keyword is placed before it to declare that
|
||||||
|
@ -137,7 +137,7 @@ and \arrayptrint\ all specify types too.
|
||||||
|
|
||||||
The checked pointer types follow the same rules that apply to modifiers for
|
The checked pointer types follow the same rules that apply to modifiers for
|
||||||
unchecked pointer types. For example, a variable that is a pointer to a
|
unchecked pointer types. For example, a variable that is a pointer to a
|
||||||
\keyword{const} value can be assigned a pointer to a non-const value.
|
\keyword{const} value can be assigned a pointer to a non-const value.
|
||||||
The reverse is not allowed.
|
The reverse is not allowed.
|
||||||
|
|
||||||
Array pointers (\arrayptr) of function types are not allowed. Functions have
|
Array pointers (\arrayptr) of function types are not allowed. Functions have
|
||||||
|
@ -166,7 +166,7 @@ int a checked[10];
|
||||||
All array references to checked array types are bounds checked. C has
|
All array references to checked array types are bounds checked. C has
|
||||||
the rule that an ``array of \var{T}'' is converted implicitly to a
|
the rule that an ``array of \var{T}'' is converted implicitly to a
|
||||||
``pointer to \var{T}'' in a number of situations. This rule is extended
|
``pointer to \var{T}'' in a number of situations. This rule is extended
|
||||||
to convert a ``checked array of \var{T}'' to an ``\arrayptr\
|
to convert a ``checked array of \var{T}'' to an ``\arrayptr\
|
||||||
to \var{T}''.
|
to \var{T}''.
|
||||||
|
|
||||||
In C, array types may be complete or incomplete. A complete array type
|
In C, array types may be complete or incomplete. A complete array type
|
||||||
|
@ -186,7 +186,7 @@ that \lstinline{i >= 0} and \lstinline{i < 10}.
|
||||||
Array references to multi-dimensional arrays must be uniformly bounds
|
Array references to multi-dimensional arrays must be uniformly bounds
|
||||||
checked or not bounds checked. If any dimension is bounds checked, all
|
checked or not bounds checked. If any dimension is bounds checked, all
|
||||||
dimensions must be checked. A programmer can simply declare that
|
dimensions must be checked. A programmer can simply declare that
|
||||||
the outer dimension of an array is checked. The checked property will be
|
the outer dimension of an array is checked. The checked property will be
|
||||||
propagated to the inner dimensions:
|
propagated to the inner dimensions:
|
||||||
|
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
|
@ -258,9 +258,9 @@ can be used to declare multi-dimensional arrays:
|
||||||
typedef int arr_ty[10];
|
typedef int arr_ty[10];
|
||||||
arr_ty x[10];
|
arr_ty x[10];
|
||||||
\end{lstlisting}
|
\end{lstlisting}
|
||||||
The checked property is propagated only to array declarators that are
|
The checked property is propagated only to array declarators that are
|
||||||
nested directly within other array declarators. It is not propagated to
|
nested directly within other array declarators. It is not propagated to
|
||||||
the bodies of type definitions. It is an error if an array type and a nested
|
the bodies of type definitions. It is an error if an array type and a nested
|
||||||
array type from the use of a type name have different checked properties.
|
array type from the use of a type name have different checked properties.
|
||||||
|
|
||||||
Here are examples of correct and incorrect declarations:
|
Here are examples of correct and incorrect declarations:
|
||||||
|
@ -282,9 +282,9 @@ in memory that ends with a null terminator value. We add a new type
|
||||||
to represent pointers to these kinds of arrays. We divide
|
to represent pointers to these kinds of arrays. We divide
|
||||||
these arrays into two parts: a prefix with bounds and
|
these arrays into two parts: a prefix with bounds and
|
||||||
a sequence of additional elements that ends
|
a sequence of additional elements that ends
|
||||||
with a null terminator. The initial elements of the
|
with a null terminator. The initial elements of the
|
||||||
sequence can be read, provided that preceding elements
|
sequence can be read, provided that preceding elements
|
||||||
are not the null terminator. The bounds can be
|
are not the null terminator. The bounds can be
|
||||||
{\em widened} based on the number of elements read.
|
{\em widened} based on the number of elements read.
|
||||||
|
|
||||||
A programmer may declare bounds for an \ntarrayptr\ variable. If bounds
|
A programmer may declare bounds for an \ntarrayptr\ variable. If bounds
|
||||||
|
@ -293,12 +293,12 @@ at runtime.
|
||||||
|
|
||||||
If no bounds are declared, the bounds are inferred. At variable declarations,
|
If no bounds are declared, the bounds are inferred. At variable declarations,
|
||||||
the declared bounds are for arrays with a prefix of 0 elements.
|
the declared bounds are for arrays with a prefix of 0 elements.
|
||||||
At variable uses,
|
At variable uses,
|
||||||
the bounds are determined using a program analysis that widens
|
the bounds are determined using a program analysis that widens
|
||||||
bounds based on control-flow.
|
bounds based on control-flow.
|
||||||
The bounds are then used to check memory accesses.
|
The bounds are then used to check memory accesses.
|
||||||
A problem with using an analysis to determine bounds
|
A problem with using an analysis to determine bounds
|
||||||
is that the bounds are not written down in the program.
|
is that the bounds are not written down in the program.
|
||||||
This may lead to unexpected failures if the bounds are narrower
|
This may lead to unexpected failures if the bounds are narrower
|
||||||
than expected and it may make it difficult to understand
|
than expected and it may make it difficult to understand
|
||||||
failures.
|
failures.
|
||||||
|
@ -314,10 +314,10 @@ if (*p == 'a')
|
||||||
if (*(p + 1) == 'b')
|
if (*(p + 1) == 'b')
|
||||||
if (*(p + 2) == 'c')
|
if (*(p + 2) == 'c')
|
||||||
\end{lstlisting}
|
\end{lstlisting}
|
||||||
|
|
||||||
The element type \var{T} of an \ntarrayptrT\ must be
|
The element type \var{T} of an \ntarrayptrT\ must be
|
||||||
an integral or pointer type. \ntarrayptrT\ extends \arrayptrT: the
|
an integral or pointer type. \ntarrayptrT\ extends \arrayptrT: the
|
||||||
prefix with bounds is treated the same as if an \arrayptrT\ pointed
|
prefix with bounds is treated the same as if an \arrayptrT\ pointed
|
||||||
to it. An \ntarrayptrT\ value can be converted to an \arrayptrT\ value
|
to it. An \ntarrayptrT\ value can be converted to an \arrayptrT\ value
|
||||||
with the same bounds.
|
with the same bounds.
|
||||||
|
|
||||||
|
@ -400,11 +400,11 @@ The following operations involving pointer-typed values are allowed:
|
||||||
inequality. The pointers do not have to be the same kind of pointer.
|
inequality. The pointers do not have to be the same kind of pointer.
|
||||||
To support reasoning about program behavior, the result of comparing
|
To support reasoning about program behavior, the result of comparing
|
||||||
pointers to different objects must be defined. Checked pointers can also
|
pointers to different objects must be defined. Checked pointers can also
|
||||||
be compared for equality or inequality with \code{0} and \void\ pointers,
|
be compared for equality or inequality with \code{0} and \void\ pointers,
|
||||||
just like unchecked pointers.
|
just like unchecked pointers.
|
||||||
\item
|
\item
|
||||||
Pointers to objects of the same type can be compared relationally. Relational comparisons are the
|
Pointers to objects of the same type can be compared relationally. Relational comparisons are the
|
||||||
\lstinline|<|, \lstinline|<=|, \lstinline|>|, \lstinline|>=| operators. The pointers do not have
|
\lstinline|<|, \lstinline|<=|, \lstinline|>|, \lstinline|>=| operators. The pointers do not have
|
||||||
to be
|
to be
|
||||||
the same kind of pointer. For example, an \uncheckedptrT\ can be compared with an
|
the same kind of pointer. For example, an \uncheckedptrT\ can be compared with an
|
||||||
\arrayptrT . To support bounds checking and reasoning about program behavior, the
|
\arrayptrT . To support bounds checking and reasoning about program behavior, the
|
||||||
|
@ -480,7 +480,7 @@ Programmers must initialize these variables explicitly using C's initializer syn
|
||||||
when they are declared.
|
when they are declared.
|
||||||
|
|
||||||
Initializing an automatic variable with a checked pointer type is simple: a programmer adds
|
Initializing an automatic variable with a checked pointer type is simple: a programmer adds
|
||||||
\lstinline+= 0+ or \lstinline+x = NULL+ to the variable declarator. C's initializer syntax
|
\lstinline+= 0+ or \lstinline+x = NULL+ to the variable declarator. C's initializer syntax
|
||||||
allows compact initializers for structure variables and array variables.
|
allows compact initializers for structure variables and array variables.
|
||||||
A list of initialization values is specified using a brace-enclosed list of expressions.
|
A list of initialization values is specified using a brace-enclosed list of expressions.
|
||||||
The initializer list can be partial list that describes
|
The initializer list can be partial list that describes
|
||||||
|
@ -526,13 +526,13 @@ scopes, the declared types of variables are allowed to be or use
|
||||||
checked pointer types or checked array types; unchecked pointer types
|
checked pointer types or checked array types; unchecked pointer types
|
||||||
and unchecked array types are not allowed. Similarly, for functions,
|
and unchecked array types are not allowed. Similarly, for functions,
|
||||||
return types and parameter types are allowed to be or use checked pointer
|
return types and parameter types are allowed to be or use checked pointer
|
||||||
types or checked array types.
|
types or checked array types.
|
||||||
|
|
||||||
On the other hand, declarations in checked scopes can use unchecked pointer
|
On the other hand, declarations in checked scopes can use unchecked pointer
|
||||||
types and unchecked array types, provided that the declarations provide a
|
types and unchecked array types, provided that the declarations provide a
|
||||||
bounds-safe interface. These are described in
|
bounds-safe interface. These are described in
|
||||||
Section~\ref{section:function-bounds-safe-interfaces}.
|
Section~\ref{section:function-bounds-safe-interfaces}.
|
||||||
Variables and functions used in checked scopes are
|
Variables and functions used in checked scopes are
|
||||||
allowed to have or use checked pointer types, checked array types, or
|
allowed to have or use checked pointer types, checked array types, or
|
||||||
unchecked pointer types and unchecked array types with bounds declarations.
|
unchecked pointer types and unchecked array types with bounds declarations.
|
||||||
|
|
||||||
|
@ -551,7 +551,7 @@ existing C code.
|
||||||
A checked block is introduced by placing \keyword{checked} before a
|
A checked block is introduced by placing \keyword{checked} before a
|
||||||
compound block:
|
compound block:
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
checked
|
checked
|
||||||
{
|
{
|
||||||
int a = 5;
|
int a = 5;
|
||||||
ptr<int> pa = &a;
|
ptr<int> pa = &a;
|
||||||
|
@ -573,25 +573,25 @@ checked keyword can be used. This example shows the use of an unchecked
|
||||||
block:
|
block:
|
||||||
|
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
checked
|
checked
|
||||||
{
|
{
|
||||||
int a = 5;
|
int a = 5;
|
||||||
unchecked
|
unchecked
|
||||||
{
|
{
|
||||||
int *upa = &a;
|
int *upa = &a;
|
||||||
int b[5][5];
|
int b[5][5];
|
||||||
for (int i = 0; i < 5; i++) {
|
for (int i = 0; i < 5; i++) {
|
||||||
for (int j = 0; j < 5; j++) {
|
for (int j = 0; j < 5; j++) {
|
||||||
// not bounds checked
|
// not bounds checked
|
||||||
b[i][j] = -1;
|
b[i][j] = -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
...
|
...
|
||||||
}
|
}
|
||||||
\end{lstlisting}
|
\end{lstlisting}
|
||||||
|
|
||||||
In a checked function definition, the body of the function is a
|
In a checked function definition, the body of the function is a
|
||||||
checked block. A checked function definition is declared by placing the
|
checked block. A checked function definition is declared by placing the
|
||||||
\keyword{checked} keyword before the definition. Here are examples of checked and
|
\keyword{checked} keyword before the definition. Here are examples of checked and
|
||||||
unchecked function definitions:
|
unchecked function definitions:
|
||||||
|
@ -599,7 +599,7 @@ unchecked function definitions:
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
// checked at the function level: no unchecked pointers can appear in
|
// checked at the function level: no unchecked pointers can appear in
|
||||||
// argument types, the return type, or the body of the function.
|
// argument types, the return type, or the body of the function.
|
||||||
checked int f(ptr<int> p)
|
checked int f(ptr<int> p)
|
||||||
{
|
{
|
||||||
int a = 5;
|
int a = 5;
|
||||||
ptr<int> pa = &a;
|
ptr<int> pa = &a;
|
||||||
|
@ -635,10 +635,10 @@ referred to as K\&R-style function declarations because this is the only way
|
||||||
functions were declared in the first edition of the C Programming Language book
|
functions were declared in the first edition of the C Programming Language book
|
||||||
by Brian Kernigan and Dennis Ritchie. The second edition incorporated changes from the
|
by Brian Kernigan and Dennis Ritchie. The second edition incorporated changes from the
|
||||||
C ANSI Standard and introduced function prototypes.}, where the types
|
C ANSI Standard and introduced function prototypes.}, where the types
|
||||||
of the arguments to functions are not specified . These
|
of the arguments to functions are not specified. These
|
||||||
functions are dangerous to use because there can be mismatches
|
functions are dangerous to use because there can be mismatches
|
||||||
between argument types and parameter types at function
|
between argument types and parameter types at function
|
||||||
calls. This can corrupt data or the call stack. In checked scopes,
|
calls. This can corrupt data or the call stack. In checked scopes,
|
||||||
the use or declaration of functions without prototypes is not allowed.
|
the use or declaration of functions without prototypes is not allowed.
|
||||||
|
|
||||||
As we add different notions of checking to Checked C, we will use the
|
As we add different notions of checking to Checked C, we will use the
|
||||||
|
@ -656,7 +656,7 @@ It consists of a parenthesized type name followed by list of initializers in bra
|
||||||
A compound literal has static storage if it is at the top level of a file and automatic
|
A compound literal has static storage if it is at the top level of a file and automatic
|
||||||
storage if it occurs within a block. Here are examples of compound literals:
|
storage if it occurs within a block. Here are examples of compound literals:
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
int *arr = (int []) { 0, 1, 2};
|
int *arr = (int []) { 0, 1, 2 };
|
||||||
struct Point {
|
struct Point {
|
||||||
int x;
|
int x;
|
||||||
int y;
|
int y;
|
||||||
|
@ -714,7 +714,7 @@ checked void f(void) {
|
||||||
// checking to nt_array_ptr<char>.
|
// checking to nt_array_ptr<char>.
|
||||||
nt_array_ptr<char> p = "hello";
|
nt_array_ptr<char> p = "hello";
|
||||||
|
|
||||||
// "goodbye" has type char nt_checked[8]. The type is altered during
|
// "goodbye" has type char nt_checked[8]. The type is altered during
|
||||||
// type checking to the pointer type array_ptr.
|
// type checking to the pointer type array_ptr.
|
||||||
array_ptr<char> r = "goodbye";
|
array_ptr<char> r = "goodbye";
|
||||||
|
|
||||||
|
@ -803,11 +803,11 @@ struct inner_array {
|
||||||
|
|
||||||
typedef struct inner_array ragged_2d_arr[];
|
typedef struct inner_array ragged_2d_arr[];
|
||||||
|
|
||||||
ragged_2d_arr data =
|
ragged_2d_arr data =
|
||||||
{ { 3, (double checked[]) { 1.0, 0.0, 0.0 } },
|
{ { 3, (double checked[]) { 1.0, 0.0, 0.0 } },
|
||||||
{ 1, (double checked[]) { 5.0 } },
|
{ 1, (double checked[]) { 5.0 } },
|
||||||
{ 0, 0 },
|
{ 0, 0 },
|
||||||
{ 2, (double checked[]) { 6.0, 7.0 } }
|
{ 2, (double checked[]) { 6.0, 7.0 } }
|
||||||
};
|
};
|
||||||
|
|
||||||
struct BufferPair {
|
struct BufferPair {
|
||||||
|
@ -841,7 +841,7 @@ nt_array_ptr<char> more_colors nt_checked[] = { "white", "black",
|
||||||
|
|
||||||
// A null-terminated array of pointers to null-terminated arrays
|
// A null-terminated array of pointers to null-terminated arrays
|
||||||
// of strings.
|
// of strings.
|
||||||
nt_array_ptr<nt_array_ptr<char>> sentences nt_checked[4] =
|
nt_array_ptr<nt_array_ptr<char>> sentences nt_checked[4] =
|
||||||
{ (nt_array_ptr<char> nt_checked[]) { "this", "is", "the", "first",
|
{ (nt_array_ptr<char> nt_checked[]) { "this", "is", "the", "first",
|
||||||
"sentence", 0 },
|
"sentence", 0 },
|
||||||
(nt_array_ptr<char> nt_checked[]) { "here", "is", "another", 0 },
|
(nt_array_ptr<char> nt_checked[]) { "here", "is", "another", 0 },
|
||||||
|
@ -869,16 +869,16 @@ The type to which a pointer and its bounds are relatively aligned is
|
||||||
called the relative alignment type. By default, the relative alignment
|
called the relative alignment type. By default, the relative alignment
|
||||||
type for a pointer and its bounds is the referent type. However, the
|
type for a pointer and its bounds is the referent type. However, the
|
||||||
relative alignment type can be overridden by specifying it explicitly as
|
relative alignment type can be overridden by specifying it explicitly as
|
||||||
part of the bounds. This is described in
|
part of the bounds. This is described in
|
||||||
Section~\ref{section:representing-relative-alignment}.
|
Section~\ref{section:representing-relative-alignment}.
|
||||||
This can be used for bounds for the results of pointers casts and
|
This can be used for bounds for the results of pointers casts and
|
||||||
for \arrayptrvoid\ pointers. The type
|
for \arrayptrvoid\ pointers. The type
|
||||||
\void\ has no defined size, so the default relative alignment is
|
\void\ has no defined size, so the default relative alignment is
|
||||||
undefined for \void.
|
undefined for \void.
|
||||||
|
|
||||||
We considered removing the entire concept of relative alignment from the
|
We considered removing the entire concept of relative alignment from the
|
||||||
design to simplify the design. We decided against that because it would
|
design to simplify the design. We decided against that because it would
|
||||||
increase the cost of bounds checking throughout the program.
|
increase the cost of bounds checking throughout the program.
|
||||||
Section~\ref{section:design-alternatives:always-unaligned} explains
|
Section~\ref{section:design-alternatives:always-unaligned} explains
|
||||||
this choice in more detail.
|
this choice in more detail.
|
||||||
|
|
||||||
|
@ -1036,7 +1036,7 @@ pointer arithmetic.
|
||||||
|
|
||||||
To support this expansion, integer arithmetic operators are extended
|
To support this expansion, integer arithmetic operators are extended
|
||||||
with the operators \plusovf, \minusovf, and \mulovf. The
|
with the operators \plusovf, \minusovf, and \mulovf. The
|
||||||
operators interpet pointers as unsigned integers in some range 0 to
|
operators interpret pointers as unsigned integers in some range 0 to
|
||||||
\code{UINTPTR_MAX}. An operator produces a runtime error if the value
|
\code{UINTPTR_MAX}. An operator produces a runtime error if the value
|
||||||
of its result cannot be represented by the result type:
|
of its result cannot be represented by the result type:
|
||||||
|
|
||||||
|
@ -1044,7 +1044,7 @@ of its result cannot be represented by the result type:
|
||||||
\item
|
\item
|
||||||
\plusovf\ takes an unsigned integer \var{i} and an
|
\plusovf\ takes an unsigned integer \var{i} and an
|
||||||
integer \var{j} and produces an unsigned integer in the range 0 to
|
integer \var{j} and produces an unsigned integer in the range 0 to
|
||||||
\code{UINTPTR_MAX}. Its result is the mathemetical value \var{i} + \var{j}.
|
\code{UINTPTR_MAX}. Its result is the mathematical value \var{i} + \var{j}.
|
||||||
\item
|
\item
|
||||||
For subtraction, there are two forms:
|
For subtraction, there are two forms:
|
||||||
|
|
||||||
|
@ -1057,7 +1057,7 @@ of its result cannot be represented by the result type:
|
||||||
\item
|
\item
|
||||||
\lstinline|-|\textsubscript{ovf\_diff } takes two unsigned integers \var{i}
|
\lstinline|-|\textsubscript{ovf\_diff } takes two unsigned integers \var{i}
|
||||||
and \var{j} and computes \var{i} - \var{j}, producing a signed integer of type
|
and \var{j} and computes \var{i} - \var{j}, producing a signed integer of type
|
||||||
\code{ptrdiff_t}. Its result is the mathemetical value \var{i} - \var{j}.
|
\code{ptrdiff_t}. Its result is the mathematical value \var{i} - \var{j}.
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\item
|
\item
|
||||||
\mulovf\ takes two integers \var{i} and \var{j} (both either
|
\mulovf\ takes two integers \var{i} and \var{j} (both either
|
||||||
|
@ -1080,7 +1080,7 @@ signed or unsigned integer as \code{e2}.
|
||||||
If \code{e2} is a signed integer, the expansion of \code{e1 + e2}
|
If \code{e2} is a signed integer, the expansion of \code{e1 + e2}
|
||||||
must cast \sizeof{\var{T}} to a signed integer. We introduce a
|
must cast \sizeof{\var{T}} to a signed integer. We introduce a
|
||||||
signed integer type \code{signed_size_t} that is large enough for
|
signed integer type \code{signed_size_t} that is large enough for
|
||||||
this cast. \code{e1 + e2} expands to \code{e1} \plusovf\
|
this cast. \code{e1 + e2} expands to \code{e1} \plusovf\
|
||||||
\code{((signed_size_t)} \sizeof{\var{T}}\code{)} \mulovf\ \code{e2}. This cast is
|
\code{((signed_size_t)} \sizeof{\var{T}}\code{)} \mulovf\ \code{e2}. This cast is
|
||||||
necessary because in C, multiplying a signed integer by an unsigned
|
necessary because in C, multiplying a signed integer by an unsigned
|
||||||
integer implicitly converts the signed integer to be an unsigned
|
integer implicitly converts the signed integer to be an unsigned
|
||||||
|
@ -1110,7 +1110,7 @@ Otherwise, in a typical C environment, the program would fault at
|
||||||
\lstinline+*p+. In the code \lstinline|if (p < hi) { p += 1; }|
|
\lstinline+*p+. In the code \lstinline|if (p < hi) { p += 1; }|
|
||||||
the check \lstinline+p < hi+ implies that the increment cannot
|
the check \lstinline+p < hi+ implies that the increment cannot
|
||||||
overflow. The checks are also inexpensive on superscalar processors:
|
overflow. The checks are also inexpensive on superscalar processors:
|
||||||
they can be placed so that they are statically predicted by
|
they can be placed so that they are statically predicted by
|
||||||
processors to never fail.
|
processors to never fail.
|
||||||
|
|
||||||
\section{Programmer-inserted dynamic checks}
|
\section{Programmer-inserted dynamic checks}
|
||||||
|
@ -1215,7 +1215,7 @@ void decode(array_ptr<char> output_buffer, array_ptr<char> input_buffer,
|
||||||
|\hl{\textit{// need check that dst and src have at least len bytes of space}}|
|
|\hl{\textit{// need check that dst and src have at least len bytes of space}}|
|
||||||
memcpy(dst, src, len);
|
memcpy(dst, src, len);
|
||||||
src += len;
|
src += len;
|
||||||
dst += len;
|
dst += len;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case COMPRESSED_INT64: {
|
case COMPRESSED_INT64: {
|
||||||
|
@ -1273,7 +1273,7 @@ so there is no way to test the path.
|
||||||
\end{lstlisting}
|
\end{lstlisting}
|
||||||
|
|
||||||
The problems with requiring functions that validate buffer lengths to
|
The problems with requiring functions that validate buffer lengths to
|
||||||
return status codes for errors are analyzed by O'Donell and Sebor\cite{ODonell2015}.
|
return status codes for errors are analyzed by O'Donell and Sebor\cite{ODonell2015}.
|
||||||
Annex K of the C Standard \cite{ISO2011} introduced a new set of standard library functions to replace
|
Annex K of the C Standard \cite{ISO2011} introduced a new set of standard library functions to replace
|
||||||
functions that provide no way to validate their arguments. These
|
functions that provide no way to validate their arguments. These
|
||||||
functions return status codes to indicate success or failure. A classic
|
functions return status codes to indicate success or failure. A classic
|
||||||
|
@ -1281,7 +1281,7 @@ example of a function prone to misuse is
|
||||||
\lstinline+strcpy(char *dst, const char *src)+.
|
\lstinline+strcpy(char *dst, const char *src)+.
|
||||||
It copies all bytes in \lstinline+src+ to \lstinline+dst+ until
|
It copies all bytes in \lstinline+src+ to \lstinline+dst+ until
|
||||||
it hits a null byte. If \lstinline+src+ is missing the null byte or
|
it hits a null byte. If \lstinline+src+ is missing the null byte or
|
||||||
\lstinline+dst+ is too small, this causes a buffer overrunn. The new
|
\lstinline+dst+ is too small, this causes a buffer overrun. The new
|
||||||
function \lstinline+strcpy_s+ takes an additional size parameter for
|
function \lstinline+strcpy_s+ takes an additional size parameter for
|
||||||
\lstinline+dst+ and has the signature
|
\lstinline+dst+ and has the signature
|
||||||
\lstinline+errno_t strcpy_s(char *dst, size_t dest_len, const char *src)+.
|
\lstinline+errno_t strcpy_s(char *dst, size_t dest_len, const char *src)+.
|
||||||
|
@ -1305,7 +1305,7 @@ void decode(array_ptr<char> output_buffer, array_ptr<char> input_buffer,
|
||||||
|\hl{\textit{dynamic\_check(dst + len < dst\_bound \&\& src + len < src\_bound);}}|
|
|\hl{\textit{dynamic\_check(dst + len < dst\_bound \&\& src + len < src\_bound);}}|
|
||||||
memcpy(dst, src, len);
|
memcpy(dst, src, len);
|
||||||
src += len;
|
src += len;
|
||||||
dst += len;
|
dst += len;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
...
|
...
|
||||||
|
@ -1321,15 +1321,15 @@ behavior is worse.
|
||||||
The following example uses \lstinline+dynamic_check+ to eliminate bounds
|
The following example uses \lstinline+dynamic_check+ to eliminate bounds
|
||||||
checks in a loop. It is based on experience hand-optimizing C\# and Java
|
checks in a loop. It is based on experience hand-optimizing C\# and Java
|
||||||
programs. This kind of example is typically found during a performance
|
programs. This kind of example is typically found during a performance
|
||||||
tuning phase of program development. In the example,
|
tuning phase of program development. In the example,
|
||||||
the \lstinline|: count(|\var{exp}\lstinline|)| notation indicates that \var{exp} is the
|
the \lstinline|: count(|\var{exp}\lstinline|)| notation indicates that \var{exp} is the
|
||||||
length of the buffer.
|
length of the buffer.
|
||||||
|
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
void append(array_ptr<char> dst : count(dst_count),
|
void append(array_ptr<char> dst : count(dst_count),
|
||||||
array_ptr<char> src : count(src_count),
|
array_ptr<char> src : count(src_count),
|
||||||
size_t dst_count, size_t src_count)
|
size_t dst_count, size_t src_count)
|
||||||
{
|
{
|
||||||
for (size_t i = 0; i < src_count; i++) {
|
for (size_t i = 0; i < src_count; i++) {
|
||||||
if (src[i] == marker) {
|
if (src[i] == marker) {
|
||||||
break;
|
break;
|
||||||
|
@ -1382,7 +1382,7 @@ A compiler would not introduce this dynamic\_check because it would
|
||||||
alter the behavior of \lstinline+append+. The bounds check on \lstinline+dst+
|
alter the behavior of \lstinline+append+. The bounds check on \lstinline+dst+
|
||||||
in the original code is done only if a marker is not found in
|
in the original code is done only if a marker is not found in
|
||||||
\lstinline+src+ and \lstinline+src_count > 0+. A compiler could
|
\lstinline+src+ and \lstinline+src_count > 0+. A compiler could
|
||||||
try to deduce a precondition for the loop
|
try to deduce a precondition for the loop
|
||||||
that prevents the bounds check from failing, but this is
|
that prevents the bounds check from failing, but this is
|
||||||
not possible because the precondition depends on the contents of
|
not possible because the precondition depends on the contents of
|
||||||
\lstinline+src+. A compiler would have to clone code to maintain the same
|
\lstinline+src+. A compiler would have to clone code to maintain the same
|
||||||
|
@ -1391,7 +1391,7 @@ sort of transformation or do it sparingly. Programmer control produces
|
||||||
better results. Here is the code a compiler might generate with cloning.
|
better results. Here is the code a compiler might generate with cloning.
|
||||||
|
|
||||||
\begin{lstlisting}[escapechar=\|]
|
\begin{lstlisting}[escapechar=\|]
|
||||||
void append(array_ptr<char> dst: count(dest_count),
|
void append(array_ptr<char> dst: count(dest_count),
|
||||||
array_ptr<char> src : count(src_count),
|
array_ptr<char> src : count(src_count),
|
||||||
size_t dst_count, size_t src_count)
|
size_t dst_count, size_t src_count)
|
||||||
{
|
{
|
||||||
|
@ -1493,7 +1493,7 @@ signed integer overflow:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item
|
\item
|
||||||
Unchecked pointers shall be treated as addresses of locations in memory,
|
Unchecked pointers shall be treated as addresses of locations in memory,
|
||||||
just as checked pointers are treated as addressses. The addresses shall
|
just as checked pointers are treated as addresses. The addresses shall
|
||||||
be unsigned integers with a defined range of 0 to
|
be unsigned integers with a defined range of 0 to
|
||||||
\code{UINTPTR_MAX}:
|
\code{UINTPTR_MAX}:
|
||||||
|
|
||||||
|
@ -1527,7 +1527,7 @@ In the case of a runtime error, program execution cannot continue in a
|
||||||
way that uses the value of the expression that produced the error.
|
way that uses the value of the expression that produced the error.
|
||||||
|
|
||||||
For programs with expressions and initializers with undefined meanings,
|
For programs with expressions and initializers with undefined meanings,
|
||||||
those programs must be rejected at translation-time.
|
those programs must be rejected at translation-time.
|
||||||
Section~\ref{section:avoiding-undefinedness}
|
Section~\ref{section:avoiding-undefinedness}
|
||||||
describes this in detail.
|
describes this in detail.
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
\chapter{Introduction}
|
\chapter{Introduction}
|
||||||
\label{chapter:introduction}
|
\label{chapter:introduction}
|
||||||
|
|
||||||
The C programming language \cite{Ritchie1988, ISO2011} allows programmers to use
|
The C programming language \cite{Ritchie1988, ISO2011} allows programmers to use
|
||||||
pointers directly. A pointer is an address of a location in memory. Programs may do
|
pointers directly. A pointer is an address of a location in memory. Programs may do
|
||||||
arithmetic on pointers, dereference them to read memory, or assign
|
arithmetic on pointers, dereference them to read memory, or assign
|
||||||
through them to modify memory. The ability to use pointers directly
|
through them to modify memory. The ability to use pointers directly
|
||||||
|
@ -69,7 +69,7 @@ Checked C addresses the bounds checking problem for C by:
|
||||||
extension to \arrayptr\ for null-terminated arrays (\ntarrayptr).
|
extension to \arrayptr\ for null-terminated arrays (\ntarrayptr).
|
||||||
\item
|
\item
|
||||||
For array pointer types (\arrayptr), in keeping with the low-level
|
For array pointer types (\arrayptr), in keeping with the low-level
|
||||||
nature of C, bounds checking is placed under programmer control.
|
nature of C, bounds checking is placed under programmer control.
|
||||||
This differs from languages like
|
This differs from languages like
|
||||||
Java, where bounds checking is completely automatic. A programmer
|
Java, where bounds checking is completely automatic. A programmer
|
||||||
declares \emph{bounds}, where the bounds for an \arrayptr\
|
declares \emph{bounds}, where the bounds for an \arrayptr\
|
||||||
|
@ -174,11 +174,11 @@ This problem is addressed in two ways. First, the language rules that are
|
||||||
used to check the validity of bounds are limited intentionally. The rules can
|
used to check the validity of bounds are limited intentionally. The rules can
|
||||||
check the validity of bounds that are needed in practice. However, there are
|
check the validity of bounds that are needed in practice. However, there are
|
||||||
bounds that are true that cannot be proved using the rules. In the terminology
|
bounds that are true that cannot be proved using the rules. In the terminology
|
||||||
of program logics, the rules are incomplete. As an example, the
|
of program logics, the rules are incomplete. As an example, the
|
||||||
rules about distributivity limit the size of
|
rules about distributivity limit the size of
|
||||||
expressions that they produce. In practice, this means that
|
expressions that they produce. In practice, this means that
|
||||||
simple disjunctive bounds can be checked, but complex disjunctive bounds cannot be checked.
|
simple disjunctive bounds can be checked, but complex disjunctive bounds cannot be checked.
|
||||||
|
|
||||||
Second, the inference that compilers do for checking program invariants is
|
Second, the inference that compilers do for checking program invariants is
|
||||||
limited. Compilers act as \emph{checkers} for invariants. They check that
|
limited. Compilers act as \emph{checkers} for invariants. They check that
|
||||||
declared invariants follow from other declared invariants, the
|
declared invariants follow from other declared invariants, the
|
||||||
|
@ -202,25 +202,25 @@ scope of this technical report. For now, it is assumed that programs are
|
||||||
correct in these other aspects.
|
correct in these other aspects.
|
||||||
|
|
||||||
This design is being done in an iterative fashion. To validate the
|
This design is being done in an iterative fashion. To validate the
|
||||||
design, we mocked up modifying a subset of the OpenSSL
|
design, we mocked up modifying a subset of the OpenSSL
|
||||||
code base \cite{OpenSSL2015} to be bounds-safe .
|
code base \cite{OpenSSL2015} to be bounds-safe .
|
||||||
We created C++ templates for the new pointer types and modified OpenSSL to
|
We created C++ templates for the new pointer types and modified OpenSSL to
|
||||||
compile as valid C++ code.
|
compile as valid C++ code.
|
||||||
We hand-edited about 11,000 lines of the code to use checked pointer
|
We hand-edited about 11,000 lines of the code to use checked pointer
|
||||||
types with full bounds annotations. We used macros to encapsulate
|
types with full bounds annotations. We used macros to encapsulate
|
||||||
the bounds annotations so that they could be elided from the code
|
the bounds annotations so that they could be elided from the code
|
||||||
and OpenSSL compiled and tested using the new types. We also modified the
|
and OpenSSL compiled and tested using the new types. We also modified the
|
||||||
generic stack type in OpenSSL to use the \code{ptr} type, which required
|
generic stack type in OpenSSL to use the \code{ptr} type, which required
|
||||||
cross-cutting changes across the code base (in all, about 160 files were changed)
|
cross-cutting changes across the code base (in all, about 160 files were changed)
|
||||||
as well dealing with complicated macros.
|
as well dealing with complicated macros.
|
||||||
|
|
||||||
We learned the following from this
|
We learned the following from this
|
||||||
experience. First, it was important to have a compact, succinct syntax
|
experience. First, it was important to have a compact, succinct syntax
|
||||||
for declaring bounds.
|
for declaring bounds.
|
||||||
Second, in most cases, declaring bounds at declarations was sufficient for
|
Second, in most cases, declaring bounds at declarations was sufficient for
|
||||||
tracking the bounds of variables. Large blocks of code
|
tracking the bounds of variables. Large blocks of code
|
||||||
remained unchanged, which matches the observations of the
|
remained unchanged, which matches the observations of the
|
||||||
Cyclone project \cite{Jim2002}, an earlier research
|
Cyclone project \cite{Jim2002}, an earlier research
|
||||||
effort to create a type-safe version of C.
|
effort to create a type-safe version of C.
|
||||||
Third, the expressions allowed in declaring
|
Third, the expressions allowed in declaring
|
||||||
bounds needed to be rich. Fourth, pointer casts were used
|
bounds needed to be rich. Fourth, pointer casts were used
|
||||||
|
@ -230,19 +230,19 @@ easily to be appropriate for the new referent type of the pointer.
|
||||||
Fifth, it was clear that there needed
|
Fifth, it was clear that there needed
|
||||||
to be a graceful way of interoperating with existing libraries that could
|
to be a graceful way of interoperating with existing libraries that could
|
||||||
not be changed. Finally, signed integer overflow was a pervasive
|
not be changed. Finally, signed integer overflow was a pervasive
|
||||||
possibility, which raised questions about the meaning of bounds
|
possibility, which raised questions about the meaning of bounds
|
||||||
declarations that used signed integers.
|
declarations that used signed integers.
|
||||||
|
|
||||||
We revised the design to
|
We revised the design to
|
||||||
address these issues. In particular, we paid close attention to
|
address these issues. In particular, we paid close attention to
|
||||||
tracking bounds through pointer casts. We also made sure that the
|
tracking bounds through pointer casts. We also made sure that the
|
||||||
contraints on signed integer expressions used in bounds expressions
|
constraints on signed integer expressions used in bounds expressions
|
||||||
were understood and could be written down in the language of
|
were understood and could be written down in the language of
|
||||||
simple invariants.
|
simple invariants.
|
||||||
|
|
||||||
The design is a work-in-progress. Some material will be missing
|
The design is a work-in-progress. Some material will be missing
|
||||||
or incomplete because the language design has not yet been done.
|
or incomplete because the language design has not yet been done.
|
||||||
Readers should be aware that all parts of the document are subject to change.
|
Readers should be aware that all parts of the document are subject to change.
|
||||||
We are interested in feedback and suggestions about ways to improve
|
We are interested in feedback and suggestions about ways to improve
|
||||||
the design.
|
the design.
|
||||||
|
|
||||||
|
@ -304,7 +304,7 @@ Two specific design principles are adopted based on these principles:
|
||||||
inventing new notations. Many systems are hybrid C/C++ systems, so
|
inventing new notations. Many systems are hybrid C/C++ systems, so
|
||||||
this approach fits with the principle of clarity. It also enables
|
this approach fits with the principle of clarity. It also enables
|
||||||
incremental adoption. One of the design goals of C++ has been that C
|
incremental adoption. One of the design goals of C++ has been that C
|
||||||
is a subset of C++. It is a design goal to allow Checked C to be a
|
is a subset of C++. It is a design goal to allow Checked C to be a
|
||||||
subset of C++ too.
|
subset of C++ too.
|
||||||
\end{enumerate}
|
\end{enumerate}
|
||||||
|
|
||||||
|
@ -314,13 +314,13 @@ Operators and symbolic characters will be shown in black (like \lstinline|->|).
|
||||||
variables will be shown in light blue (like \lstinline|for (int i = 0; i<10; i++)|). At times, we will
|
variables will be shown in light blue (like \lstinline|for (int i = 0; i<10; i++)|). At times, we will
|
||||||
define properties or make mathematical statements that range over sets of values or program
|
define properties or make mathematical statements that range over sets of values or program
|
||||||
elements, such as mathematical integers, variables, or expressions. We will use meta-variables in {\it italics} that
|
elements, such as mathematical integers, variables, or expressions. We will use meta-variables in {\it italics} that
|
||||||
range over these values or program elements. We might say something like
|
range over these values or program elements. We might say something like
|
||||||
``for all expressions \var{e\textsubscript{1}} and \var{e\textsubscript{2}}.'' These meta-variables
|
``for all expressions \var{e\textsubscript{1}} and \var{e\textsubscript{2}}.'' These meta-variables
|
||||||
differ from program variables, which are elements of specific C programs.
|
differ from program variables, which are elements of specific C programs.
|
||||||
|
|
||||||
\section{Organization of this document}
|
\section{Organization of this document}
|
||||||
|
|
||||||
The target audience for this document includes programmers interested in
|
The target audience for this document includes programmers interested in
|
||||||
learning about Checked C, compiler writers, and language designers.
|
learning about Checked C, compiler writers, and language designers.
|
||||||
These groups have different and conflicting needs. Programmers will find that
|
These groups have different and conflicting needs. Programmers will find that
|
||||||
some sections have too much detail, while language designers will likely
|
some sections have too much detail, while language designers will likely
|
||||||
|
@ -339,7 +339,7 @@ bounds safety even in the face of integer overflow, which is a
|
||||||
widespread possibility in C programs. This requires a more nuanced
|
widespread possibility in C programs. This requires a more nuanced
|
||||||
requirement on the result of overflow than ``the result is undefined.''
|
requirement on the result of overflow than ``the result is undefined.''
|
||||||
|
|
||||||
Chapters~\ref{chapter:core-extensions} through
|
Chapters~\ref{chapter:core-extensions} through
|
||||||
\ref{chapter:pointers-to-data-with-arrayptrs}
|
\ref{chapter:pointers-to-data-with-arrayptrs}
|
||||||
present the extensions to C for declaring bounds
|
present the extensions to C for declaring bounds
|
||||||
for \arrayptr\ values stored in variables and structures and
|
for \arrayptr\ values stored in variables and structures and
|
||||||
|
@ -377,13 +377,13 @@ covers inferring effects of an expression on the bounds of variables.
|
||||||
This inferred information is then used to validate that declarations and
|
This inferred information is then used to validate that declarations and
|
||||||
statements correctly declare bounds and maintain the bounds information.
|
statements correctly declare bounds and maintain the bounds information.
|
||||||
|
|
||||||
Chapter~\ref{chapter:interoperation} covers interoperation between
|
Chapter~\ref{chapter:interoperation} covers interoperation between
|
||||||
checked and unchecked code. It covers conversions between checked and
|
checked and unchecked code. It covers conversions between checked and
|
||||||
unchecked pointers, as well as conversions between the new kinds of checked pointers.
|
unchecked pointers, as well as conversions between the new kinds of checked pointers.
|
||||||
It pins down the notion of checked code and unchecked code. Finally, it covers
|
It pins down the notion of checked code and unchecked code. Finally, it covers
|
||||||
bounds-safe interfaces in depth.
|
bounds-safe interfaces in depth.
|
||||||
|
|
||||||
Chapter~\ref{chapter:structure-bounds} extends these ideas from variables to data
|
Chapter~\ref{chapter:structure-bounds} extends these ideas from variables to data
|
||||||
structures by
|
structures by
|
||||||
introducing member bounds, which are type-level invariants about members
|
introducing member bounds, which are type-level invariants about members
|
||||||
of structure types. For now, it assumes that the programmer has done
|
of structure types. For now, it assumes that the programmer has done
|
||||||
|
@ -395,13 +395,13 @@ uses to check programs with member bounds.
|
||||||
Chapter~\ref{chapter:pointers-to-data-with-arrayptrs} describes how
|
Chapter~\ref{chapter:pointers-to-data-with-arrayptrs} describes how
|
||||||
to extend reasoning about bounds to pointers to data that has \arrayptr-typed
|
to extend reasoning about bounds to pointers to data that has \arrayptr-typed
|
||||||
values within it. This includes pointers to structures with members that
|
values within it. This includes pointers to structures with members that
|
||||||
have \arrayptr\ types and pointers directly to \arrayptr s. Pointers to
|
have \arrayptr\ types and pointers directly to \arrayptr s. Pointers to
|
||||||
structures can be used easily by ensuring that modifications to members
|
structures can be used easily by ensuring that modifications to members
|
||||||
preserve type-level bounds invariants.
|
preserve type-level bounds invariants.
|
||||||
This follows the lead of the Deputy system \cite{Condit2007}.
|
This follows the lead of the Deputy system \cite{Condit2007}.
|
||||||
Complexity arises for pointers to \arrayptr s where
|
Complexity arises for pointers to \arrayptr s where
|
||||||
the \arrayptr s have bounds that depend on other variables or members. The
|
the \arrayptr s have bounds that depend on other variables or members. The
|
||||||
other variables or members need to be abstracted by pointers as well to ensure
|
other variables or members need to be abstracted by pointers as well to ensure
|
||||||
that invariants for bounds are not violated. Assignments via pointer
|
that invariants for bounds are not violated. Assignments via pointer
|
||||||
indirections need to be coordinated to maintain the invariant. This
|
indirections need to be coordinated to maintain the invariant. This
|
||||||
gives rise to constraints on the pointers and the variables or
|
gives rise to constraints on the pointers and the variables or
|
||||||
|
@ -418,12 +418,12 @@ about whether one fact is true given a set of other facts (for example,
|
||||||
given x == y and the statement \code{z = y;} z == x is true after that
|
given x == y and the statement \code{z = y;} z == x is true after that
|
||||||
statement). These rules and facts can be used to deduce the correctness
|
statement). These rules and facts can be used to deduce the correctness
|
||||||
of bounds declarations that differ from those inferred directly by the
|
of bounds declarations that differ from those inferred directly by the
|
||||||
checking described in Chapters~\ref{chapter:checking-bounds}
|
checking described in Chapters~\ref{chapter:checking-bounds}
|
||||||
and~\ref{chapter:pointers-to-data-with-arrayptrs}. For example,
|
and~\ref{chapter:pointers-to-data-with-arrayptrs}. For example,
|
||||||
a programmer may wish to narrow the memory that is accessible via an
|
a programmer may wish to narrow the memory that is accessible via an
|
||||||
\arrayptr\ variable by declaring bounds that are a subrange of
|
\arrayptr\ variable by declaring bounds that are a subrange of
|
||||||
the bounds inferred by the checking. A programmer may wish to update the
|
the bounds inferred by the checking. A programmer may wish to update the
|
||||||
bounds for an \arrayptr\ variable after an assignment \code{x = y}, substituting
|
bounds for an \arrayptr\ variable after an assignment \code{x = y}, substituting
|
||||||
\code{x} for \code{y}.
|
\code{x} for \code{y}.
|
||||||
|
|
||||||
The same static checking that is used for bounds can be used to reason
|
The same static checking that is used for bounds can be used to reason
|
||||||
|
@ -448,7 +448,7 @@ of the modifications or understand the practical benefits of checking.
|
||||||
Still, this gives some idea about the usefulness and applicability of
|
Still, this gives some idea about the usefulness and applicability of
|
||||||
the design.
|
the design.
|
||||||
|
|
||||||
Chapter~\ref{chapter:open-issues} summarizes the open problems uncovered by
|
Chapter~\ref{chapter:open-issues} summarizes the open problems uncovered by
|
||||||
Chapter~\ref{chapter:eval} or
|
Chapter~\ref{chapter:eval} or
|
||||||
unaddressed by the design. It also describes next steps for implementing
|
unaddressed by the design. It also describes next steps for implementing
|
||||||
this in a C compiler.
|
this in a C compiler.
|
||||||
|
@ -460,21 +460,21 @@ that were considered and not chosen. It explains why those alternatives were
|
||||||
not chosen.
|
not chosen.
|
||||||
|
|
||||||
Here are the suggested roadmaps for the different groups of readers.
|
Here are the suggested roadmaps for the different groups of readers.
|
||||||
For programmers interested in learning about Checked C,
|
For programmers interested in learning about Checked C,
|
||||||
we suggest reading Chapter~\ref{chapter:core-extensions} except
|
we suggest reading Chapter~\ref{chapter:core-extensions} except
|
||||||
Sections~\ref{section:pointer-arithmetic-errors} and \ref{section:changes-to-undefined-behavior},
|
Sections~\ref{section:pointer-arithmetic-errors} and \ref{section:changes-to-undefined-behavior},
|
||||||
Chapter~\ref{chapter:tracking-bounds}
|
Chapter~\ref{chapter:tracking-bounds}
|
||||||
except Section~\ref{section:extent-of-declarations},
|
except Section~\ref{section:extent-of-declarations},
|
||||||
Section~\ref{section:inferring-expression-bounds} of Chapter~\ref{chapter:checking-bounds},
|
Section~\ref{section:inferring-expression-bounds} of Chapter~\ref{chapter:checking-bounds},
|
||||||
Chapter~\ref{chapter:interoperation} except for Section~\ref{section:checking-bounds-interfaces},
|
Chapter~\ref{chapter:interoperation} except for Section~\ref{section:checking-bounds-interfaces},
|
||||||
Chapter~\ref{chapter:structure-bounds} except for Sections~\ref{section:member-bounds-state-extent}
|
Chapter~\ref{chapter:structure-bounds} except for Sections~\ref{section:member-bounds-state-extent}
|
||||||
and \ref{section:checking-bounds-with-structures},
|
and \ref{section:checking-bounds-with-structures},
|
||||||
and Chapter~\ref{chapter:simple-invariants} through the first section.
|
and Chapter~\ref{chapter:simple-invariants} through the first section.
|
||||||
Sections~\ref{section:pointer-cast-results}, \ref{section:integer-overflow-informal},
|
Sections~\ref{section:pointer-cast-results}, \ref{section:integer-overflow-informal},
|
||||||
and \ref{section:pointer-integer-conversions} are advanced topics that can be read
|
and \ref{section:pointer-integer-conversions} are advanced topics that can be read
|
||||||
after the other recommended chapters and sections.
|
after the other recommended chapters and sections.
|
||||||
|
|
||||||
We suggest language designers and compiler writers
|
We suggest language designers and compiler writers
|
||||||
read most of the document. Language designers can skip Sections~\ref{section:computing-extent},
|
read most of the document. Language designers can skip Sections~\ref{section:computing-extent},
|
||||||
\ref{section:canonicalization}, and \ref{section:canonicalization-example}.
|
\ref{section:canonicalization}, and \ref{section:canonicalization-example}.
|
||||||
Compiler writers can skip the discussions in Section~\ref{section:programmer-dynamic-checks}
|
Compiler writers can skip the discussions in Section~\ref{section:programmer-dynamic-checks}
|
||||||
|
@ -484,7 +484,7 @@ and Chapters~\ref{chapter:related-work} and \ref{chapter:design-alternatives}.
|
||||||
|
|
||||||
\section{Acknowledgements}
|
\section{Acknowledgements}
|
||||||
|
|
||||||
This design has benefited from many discussions with Weidong Cui, Gabriel Dos Reis,
|
This design has benefited from many discussions with Weidong Cui, Gabriel Dos Reis,
|
||||||
Sam Elliott, Chris Hawblitzel, Galen Hunt, Shuvendu Lahiri, and Reuben Olinsky. The design has
|
Sam Elliott, Chris Hawblitzel, Galen Hunt, Shuvendu Lahiri, and Reuben Olinsky. The design has
|
||||||
benefited also from feedback from Michael Hicks, Wonsub Kim, Greg
|
benefited also from feedback from Michael Hicks, Wonsub Kim, Greg
|
||||||
Morrisett, Jonghyun Park, and Andrew Ruef. We thank them for their contributions to
|
Morrisett, Jonghyun Park, and Andrew Ruef. We thank them for their contributions to
|
||||||
|
|
|
@ -32,7 +32,7 @@ the form:
|
||||||
\end{tabbing}
|
\end{tabbing}
|
||||||
|
|
||||||
where \var{x} is a variable. Additional forms for handling
|
where \var{x} is a variable. Additional forms for handling
|
||||||
\void\ pointers and casts are described in
|
\void\ pointers and casts are described in
|
||||||
Sections~\ref{section:pointers-to-void} and \ref{section:pointer-cast-results}.
|
Sections~\ref{section:pointers-to-void} and \ref{section:pointer-cast-results}.
|
||||||
|
|
||||||
A bounds expression describes the memory that can be accessed using the
|
A bounds expression describes the memory that can be accessed using the
|
||||||
|
@ -67,7 +67,7 @@ include counts, pairs that specify an upper bound and lower bound,
|
||||||
Bounds expression use ``non-modifying'' expressions. These are a subset
|
Bounds expression use ``non-modifying'' expressions. These are a subset
|
||||||
of C expressions that do not modify variables or memory. They include
|
of C expressions that do not modify variables or memory. They include
|
||||||
local variables, parameters, constant expressions, casts, and operators
|
local variables, parameters, constant expressions, casts, and operators
|
||||||
such as addition, subtraction, and address-of.
|
such as addition, subtraction, and address-of.
|
||||||
Section~\ref{section:non-modifying-expressions} describes
|
Section~\ref{section:non-modifying-expressions} describes
|
||||||
non-modifying expressions in detail.
|
non-modifying expressions in detail.
|
||||||
|
|
||||||
|
@ -80,8 +80,8 @@ type of \var{e1} is a character, a short integer, a bit field, or an enumeration
|
||||||
the expression is promoted to an \keyword{int} type if that is large
|
the expression is promoted to an \keyword{int} type if that is large
|
||||||
enough to hold all values of the type or an \keyword{unsigned int} type otherwise.}
|
enough to hold all values of the type or an \keyword{unsigned int} type otherwise.}
|
||||||
For \boundsdecl{\var{x}}{\bounds{\var{e1}}{\var{e2}}}, \var{e1} and
|
For \boundsdecl{\var{x}}{\bounds{\var{e1}}{\var{e2}}}, \var{e1} and
|
||||||
\var{e2} must be pointers to the same type. Typically \var{x} is also
|
\var{e2} must be pointers to the same type. Typically \var{x} is also
|
||||||
a pointer to that type or an array of that type.
|
a pointer to that type or an array of that type.
|
||||||
However, \var{x} can be a pointer to or an array of a different type.
|
However, \var{x} can be a pointer to or an array of a different type.
|
||||||
This is useful for describing the results of casts and
|
This is useful for describing the results of casts and
|
||||||
bounds for \arrayptrvoid\ pointers.
|
bounds for \arrayptrvoid\ pointers.
|
||||||
|
@ -140,7 +140,7 @@ have been derived via a sequence of operations from a pointer to some
|
||||||
object \var{obj} with \bounds{\var{low}}{\var{high}}.
|
object \var{obj} with \bounds{\var{low}}{\var{high}}.
|
||||||
The following statement will be true at runtime:
|
The following statement will be true at runtime:
|
||||||
\var{ev} \lstinline@== 0 || (@\var{low} \lstinline|<=| \var{lbv} \lstinline|&&|
|
\var{ev} \lstinline@== 0 || (@\var{low} \lstinline|<=| \var{lbv} \lstinline|&&|
|
||||||
\var{ubv} \lstinline|<=| \var{high}\lstinline|)|. In other words,
|
\var{ubv} \lstinline|<=| \var{high}\lstinline|)|. In other words,
|
||||||
if \var{ev} is null, the bounds
|
if \var{ev} is null, the bounds
|
||||||
may or may not be valid. If \var{ev} is non-null, the bounds must be
|
may or may not be valid. If \var{ev} is non-null, the bounds must be
|
||||||
valid. This implies that any access to memory where \var{ev}
|
valid. This implies that any access to memory where \var{ev}
|
||||||
|
@ -155,10 +155,10 @@ acceptable to use the address of an address-taken variable in a bounds
|
||||||
declaration. This is provided that the resulting address is not used to
|
declaration. This is provided that the resulting address is not used to
|
||||||
access memory in the bounds declaration (that is, indirectly access the
|
access memory in the bounds declaration (that is, indirectly access the
|
||||||
value of a variable). For example, given the declaration \code{int x},
|
value of a variable). For example, given the declaration \code{int x},
|
||||||
the expression \code{&x} may appear in a bounds expression.
|
the expression \code{&x} may appear in a bounds expression.
|
||||||
Chapter~\ref{chapter:pointers-to-data-with-arrayptrs} extends the
|
Chapter~\ref{chapter:pointers-to-data-with-arrayptrs} extends the
|
||||||
design to avoid these restrictions. It covers pointers to data with
|
design to avoid these restrictions. It covers pointers to data with
|
||||||
\arrayptr\ values, pointers to variables used in bounds, and bounds
|
\arrayptr\ values, pointers to variables used in bounds, and bounds
|
||||||
that use pointers.
|
that use pointers.
|
||||||
|
|
||||||
\section{Using bounds declarations}
|
\section{Using bounds declarations}
|
||||||
|
@ -169,7 +169,7 @@ by following the declarator with \code{:} \var{bounds-exp}. In that
|
||||||
case, the bounds expression applies to the variable that is the subject
|
case, the bounds expression applies to the variable that is the subject
|
||||||
of the declarator. By making the bounds be part of the program, this
|
of the declarator. By making the bounds be part of the program, this
|
||||||
preserves the control and efficiency of C. The bounds declarations will
|
preserves the control and efficiency of C. The bounds declarations will
|
||||||
be checked statically for correctness using rules described in
|
be checked statically for correctness using rules described in
|
||||||
Chapter~\ref{chapter:checking-bounds}.
|
Chapter~\ref{chapter:checking-bounds}.
|
||||||
|
|
||||||
\subsection{Bounds declarations at variable declarations}
|
\subsection{Bounds declarations at variable declarations}
|
||||||
|
@ -182,13 +182,13 @@ between \code{start} and \code{end}, where the integer stored at
|
||||||
|
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
int sum(array_ptr<int> start : bounds(start, end), array_ptr<int> end)
|
int sum(array_ptr<int> start : bounds(start, end), array_ptr<int> end)
|
||||||
{
|
{
|
||||||
int result = 0;
|
int result = 0;
|
||||||
array_ptr<int> current : bounds(start, end) = start;
|
array_ptr<int> current : bounds(start, end) = start;
|
||||||
while (current < end) {
|
while (current < end) {
|
||||||
result += *current++; // *current is bounds-checked. The checking ensures
|
result += *current++; // *current is bounds-checked. The checking ensures
|
||||||
// that current is within the bounds of (start, end)
|
// that current is within the bounds of (start, end)
|
||||||
// at the memory access.
|
// at the memory access.
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -198,13 +198,13 @@ This can be written using \keyword{where} clauses as well, at the
|
||||||
inconvenience of typing variable names twice in declarations:
|
inconvenience of typing variable names twice in declarations:
|
||||||
|
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
int sum(array_ptr<int> start where start : bounds(start, end),
|
int sum(array_ptr<int> start where start : bounds(start, end),
|
||||||
array_ptr<int> end)
|
array_ptr<int> end)
|
||||||
{
|
{
|
||||||
int result = 0;
|
int result = 0;
|
||||||
array_ptr<int> current where current : bounds(start, end) = start;
|
array_ptr<int> current where current : bounds(start, end) = start;
|
||||||
while (current < end) {
|
while (current < end) {
|
||||||
result += *current++;
|
result += *current++;
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -249,12 +249,12 @@ int bad_find(int key, array_ptr<int> a : count(len), int len)
|
||||||
This function adds two arrays of 2x2 arrays.
|
This function adds two arrays of 2x2 arrays.
|
||||||
|
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
int add(int a checked[][2][2] : count(len), int b checked[][2][2] : count(len),
|
int add(int a checked[][2][2] : count(len), int b checked[][2][2] : count(len),
|
||||||
int len)
|
int len)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < len; i++) {
|
for (int i = 0; i < len; i++) {
|
||||||
// All array accesses are bounds checked
|
// All array accesses are bounds checked
|
||||||
a[i][0][0] += b[i][0][0];
|
a[i][0][0] += b[i][0][0];
|
||||||
a[i][0][1] += b[i][0][1];
|
a[i][0][1] += b[i][0][1];
|
||||||
a[i][1][0] += b[i][1][0];
|
a[i][1][0] += b[i][1][0];
|
||||||
a[i][1][1] += b[i][1][1];
|
a[i][1][1] += b[i][1][1];
|
||||||
|
@ -282,9 +282,9 @@ int sum(void)
|
||||||
This is a declaration of a function that copies bytes provided that the
|
This is a declaration of a function that copies bytes provided that the
|
||||||
pointers and lengths are aligned:
|
pointers and lengths are aligned:
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
int aligned_memcpy(array<char> dst where dst : count(len) &&
|
int aligned_memcpy(array<char> dst where dst : count(len) &&
|
||||||
aligned(dst, 4),
|
aligned(dst, 4),
|
||||||
array<char> src where src : count(len) &&
|
array<char> src where src : count(len) &&
|
||||||
aligned(src, 4),
|
aligned(src, 4),
|
||||||
int len where len % 4 == 0);
|
int len where len % 4 == 0);
|
||||||
\end{lstlisting}
|
\end{lstlisting}
|
||||||
|
@ -374,7 +374,7 @@ statements. It does not make sense to have dataflow-sensitive bounds for
|
||||||
externally-scoped variables and variables with static or thread storage.
|
externally-scoped variables and variables with static or thread storage.
|
||||||
The bounds extend to the next assignment to any variable in the bounds
|
The bounds extend to the next assignment to any variable in the bounds
|
||||||
declaration, with some exceptions for pointer incrementing or
|
declaration, with some exceptions for pointer incrementing or
|
||||||
decrementing. Section~\ref{section:extent-of-declarations}
|
decrementing. Section~\ref{section:extent-of-declarations}
|
||||||
describes the extent of flow-sensitive bounds
|
describes the extent of flow-sensitive bounds
|
||||||
in more detail.
|
in more detail.
|
||||||
|
|
||||||
|
@ -384,7 +384,7 @@ elements and then points to an array with 10 elements; the bounds are
|
||||||
adjusted accordingly.
|
adjusted accordingly.
|
||||||
|
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
void f(void)
|
void f(void)
|
||||||
{
|
{
|
||||||
int x[5];
|
int x[5];
|
||||||
int y[10];
|
int y[10];
|
||||||
|
@ -401,7 +401,7 @@ lazily to either \code{a} or \code{b}, depending on another parameter \code{val}
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
/* use either a or b depending on val */
|
/* use either a or b depending on val */
|
||||||
int choose(int val, array_ptr<int> a : count(alen), int alen,
|
int choose(int val, array_ptr<int> a : count(alen), int alen,
|
||||||
array_ptr<int> b : count(blen), int blen)
|
array_ptr<int> b : count(blen), int blen)
|
||||||
{
|
{
|
||||||
array_ptr<int> c;
|
array_ptr<int> c;
|
||||||
int clen;
|
int clen;
|
||||||
|
@ -412,7 +412,7 @@ int choose(int val, array_ptr<int> a : count(alen), int alen,
|
||||||
else {
|
else {
|
||||||
clen = blen;
|
clen = blen;
|
||||||
c = b where c : count(clen);
|
c = b where c : count(clen);
|
||||||
}
|
}
|
||||||
...
|
...
|
||||||
}
|
}
|
||||||
\end{lstlisting}
|
\end{lstlisting}
|
||||||
|
@ -425,7 +425,7 @@ the update. Consider the following example:
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
/* sum integers stored between start and end, where end is not included */
|
/* sum integers stored between start and end, where end is not included */
|
||||||
int sum(array_ptr<int> start : bounds(start, end), array_ptr<int> end)
|
int sum(array_ptr<int> start : bounds(start, end), array_ptr<int> end)
|
||||||
{
|
{
|
||||||
...
|
...
|
||||||
// Adjusting end. Can declare new bounds for start at the assignment
|
// Adjusting end. Can declare new bounds for start at the assignment
|
||||||
end = end - 1 where start : bounds(start, end + 1);
|
end = end - 1 where start : bounds(start, end + 1);
|
||||||
|
@ -466,11 +466,11 @@ added:
|
||||||
|
|
||||||
\code{byte_count} is the identifier \code{byte_count}.
|
\code{byte_count} is the identifier \code{byte_count}.
|
||||||
The bounds declaration \boundsdecl{\var{x}}{\boundsbytecount{\var{e1}}}
|
The bounds declaration \boundsdecl{\var{x}}{\boundsbytecount{\var{e1}}}
|
||||||
describes the number of bytes that are accessible beginning at \var{x}.
|
describes the number of bytes that are accessible beginning at \var{x}.
|
||||||
Only memory that is at or above \var{x} and below
|
Only memory that is at or above \var{x} and below
|
||||||
\lstinline|(|\arrayptrchar\lstinline|)| \var{x} \code{+} \var{e1} can be accessed through
|
\lstinline|(|\arrayptrchar\lstinline|)| \var{x} \code{+} \var{e1} can be accessed through
|
||||||
\var{x}. The type of \var{e1} must be an integral type. The usual C integer conversions are
|
\var{x}. The type of \var{e1} must be an integral type. The usual C integer conversions are
|
||||||
applied to \var{e1}. This bounds declaration is a synonym for
|
applied to \var{e1}. This bounds declaration is a synonym for
|
||||||
\boundsdecl{\var{x}}
|
\boundsdecl{\var{x}}
|
||||||
{\boundsrel{\lstinline|(|\arrayptrchar)\lstinline|)| \var{x}}
|
{\boundsrel{\lstinline|(|\arrayptrchar)\lstinline|)| \var{x}}
|
||||||
{\lstinline|(|\arrayptrchar)\lstinline|)| \var{x} \code{+} \var{e1}}
|
{\lstinline|(|\arrayptrchar)\lstinline|)| \var{x} \code{+} \var{e1}}
|
||||||
|
@ -479,7 +479,7 @@ applied to \var{e1}. This bounds declaration is a synonym for
|
||||||
The standard C library functions for \code{malloc}, \code{memcmp}, and
|
The standard C library functions for \code{malloc}, \code{memcmp}, and
|
||||||
\code{memcpy} will be
|
\code{memcpy} will be
|
||||||
given bounds-safe interfaces to avoid breaking existing code as
|
given bounds-safe interfaces to avoid breaking existing code as
|
||||||
described in Section~\ref{section:function-bounds-safe-interfaces}.
|
described in Section~\ref{section:function-bounds-safe-interfaces}.
|
||||||
However, if they were to return checked pointer
|
However, if they were to return checked pointer
|
||||||
types, their bounds declarations would be:
|
types, their bounds declarations would be:
|
||||||
|
|
||||||
|
@ -509,7 +509,7 @@ in-line bounds declarations and \keyword{where} clauses for declarations:
|
||||||
\begin{tabbing}
|
\begin{tabbing}
|
||||||
\var{init-}\=\var{declarator:} \\
|
\var{init-}\=\var{declarator:} \\
|
||||||
\>\var{declarator inline-bounds-specifier\textsubscript{opt} where-clause\textsubscript{opt}} \\
|
\>\var{declarator inline-bounds-specifier\textsubscript{opt} where-clause\textsubscript{opt}} \\
|
||||||
\>\var{declarator inline-bounds-specifier\textsubscript{opt}
|
\>\var{declarator inline-bounds-specifier\textsubscript{opt}
|
||||||
where-clause\textsubscript{opt}} \lstinline|=| \var{initializer
|
where-clause\textsubscript{opt}} \lstinline|=| \var{initializer
|
||||||
where-clause\textsubscript{opt}} \\
|
where-clause\textsubscript{opt}} \\
|
||||||
\>\ldots{} \\
|
\>\ldots{} \\
|
||||||
|
@ -518,7 +518,7 @@ where-clause\textsubscript{opt}} \\
|
||||||
\>\var{declaration-specifiers declarator} \var{inline-bounds-specifier\textsubscript{opt} where-clause\textsubscript{opt}} \\
|
\>\var{declaration-specifiers declarator} \var{inline-bounds-specifier\textsubscript{opt} where-clause\textsubscript{opt}} \\
|
||||||
\\
|
\\
|
||||||
\var{inline-bounds-specifier:}\\
|
\var{inline-bounds-specifier:}\\
|
||||||
\>\lstinline|:| \var{bounds-exp} \\
|
\>\lstinline|:| \var{bounds-exp} \\
|
||||||
\\
|
\\
|
||||||
\var{where-clause}: \\
|
\var{where-clause}: \\
|
||||||
\>\keyword{where} \var{facts} \\
|
\>\keyword{where} \var{facts} \\
|
||||||
|
@ -573,7 +573,7 @@ must be the identifier \code{any} or the identifier \code{unknown}.
|
||||||
The semantics of C expressions is subtle. The expression
|
The semantics of C expressions is subtle. The expression
|
||||||
\lstinline|*|\var{e1} does not dereference memory. It
|
\lstinline|*|\var{e1} does not dereference memory. It
|
||||||
produces an lvalue that can be used to access memory. If the expression
|
produces an lvalue that can be used to access memory. If the expression
|
||||||
occurs on the left-hand side of an assigment, the memory pointed to
|
occurs on the left-hand side of an assignment, the memory pointed to
|
||||||
by the lvalue is updated with the value of the right-hand side of
|
by the lvalue is updated with the value of the right-hand side of
|
||||||
the assignment.
|
the assignment.
|
||||||
For example, given \lstinline|*|\var{e1} \lstinline|=| \var{e2},
|
For example, given \lstinline|*|\var{e1} \lstinline|=| \var{e2},
|
||||||
|
@ -645,7 +645,7 @@ is used to access memory. The compiler computes the bounds of
|
||||||
runtime checks that \var{e1} \lstinline|+| \var{e2} is within this bounds as
|
runtime checks that \var{e1} \lstinline|+| \var{e2} is within this bounds as
|
||||||
part of the evaluation of \var{e1}\lstinline|[|\var{e2}\lstinline|]|. For example,
|
part of the evaluation of \var{e1}\lstinline|[|\var{e2}\lstinline|]|. For example,
|
||||||
given \lstinline|x[5]| where \lstinline|x| has \lstinline|bounds(x, x + c)|, the
|
given \lstinline|x[5]| where \lstinline|x| has \lstinline|bounds(x, x + c)|, the
|
||||||
compiler inserts runtime checks that \lstinline|x <= x + 5 < x + c|.
|
compiler inserts runtime checks that \lstinline|x <= x + 5 < x + c|.
|
||||||
The runtime checks simplify to \lstinline|5 < c|.
|
The runtime checks simplify to \lstinline|5 < c|.
|
||||||
|
|
||||||
For a multi-dimensional array access, only one bounds check is done.
|
For a multi-dimensional array access, only one bounds check is done.
|
||||||
|
@ -685,11 +685,11 @@ has {\bounds{\var{e2}}{\var{e3}}}:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item Given an assignment of the form \lstinline|*|\var{e1} \lstinline|=| \var{e4}, the
|
\item Given an assignment of the form \lstinline|*|\var{e1} \lstinline|=| \var{e4}, the
|
||||||
value of \var{e4} is computed to some temporary \var{v}. The
|
value of \var{e4} is computed to some temporary \var{v}. The
|
||||||
check is \lstinline|(|\var{e2} \lstinline|<=| \var{t} \lstinline|&&| \var{t} \lstinline|<|
|
check is \lstinline|(|\var{e2} \lstinline|<=| \var{t} \lstinline|&&| \var{t} \lstinline|<|
|
||||||
\var{e3}\lstinline@) || (@ \var{e2} \lstinline|<=| \var{t} \lstinline|&&| \var{t}
|
\var{e3}\lstinline@) || (@ \var{e2} \lstinline|<=| \var{t} \lstinline|&&| \var{t}
|
||||||
\lstinline|==| \var{e3} \lstinline|&&| \var{v} \lstinline|== 0)|.
|
\lstinline|==| \var{e3} \lstinline|&&| \var{v} \lstinline|== 0)|.
|
||||||
|
|
||||||
When checking for a write of \code{0} exactly at the upper bound, we
|
When checking for a write of \code{0} exactly at the upper bound, we
|
||||||
include the first element of the sequence in the allowed memory range.
|
include the first element of the sequence in the allowed memory range.
|
||||||
The second lower bound comparison \var{e2} \lstinline|<=| \var{t}
|
The second lower bound comparison \var{e2} \lstinline|<=| \var{t}
|
||||||
prevents an assignment
|
prevents an assignment
|
||||||
|
@ -740,7 +740,7 @@ Additional storage would be particularly
|
||||||
problematic when bounds declarations are extend to structure members.
|
problematic when bounds declarations are extend to structure members.
|
||||||
It would cause the size and layout of data structures to change.
|
It would cause the size and layout of data structures to change.
|
||||||
Deferring evaluation also avoids complications when \code{x} is
|
Deferring evaluation also avoids complications when \code{x} is
|
||||||
\code{null}. Section~\ref{section:bounds-declarations-alternate-semantics}
|
\code{null}. Section~\ref{section:bounds-declarations-alternate-semantics}
|
||||||
discusses eager evaluation of bounds expressions at
|
discusses eager evaluation of bounds expressions at
|
||||||
bounds declarations in more detail and explains why this was not chosen.
|
bounds declarations in more detail and explains why this was not chosen.
|
||||||
|
|
||||||
|
@ -750,7 +750,7 @@ It is common in C code to use multiple statements to update program state.
|
||||||
This can cause problems when variables in a bounds declaration are updated.
|
This can cause problems when variables in a bounds declaration are updated.
|
||||||
Invariant bounds declarations must be valid at the end of every statement.
|
Invariant bounds declarations must be valid at the end of every statement.
|
||||||
When updates involve multiple statements, a bounds declaration may be valid only
|
When updates involve multiple statements, a bounds declaration may be valid only
|
||||||
after all the updates are done. In Checked C, statements and declarations can be
|
after all the updates are done. In Checked C, statements and declarations can be
|
||||||
grouped into bundled blocks. Bounds declarations are checked only at the end of bundled blocks.
|
grouped into bundled blocks. Bounds declarations are checked only at the end of bundled blocks.
|
||||||
|
|
||||||
Consider the following example where a function is added to the earlier \code{sum}
|
Consider the following example where a function is added to the earlier \code{sum}
|
||||||
|
@ -770,7 +770,7 @@ int sum(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* buggy resize function */
|
/* buggy resize function */
|
||||||
void resize(int len)
|
void resize(int len)
|
||||||
{
|
{
|
||||||
array_ptr<int> tmp : count(len) = malloc(sizeof(int) * len);
|
array_ptr<int> tmp : count(len) = malloc(sizeof(int) * len);
|
||||||
copy(tmp, buf, buflen);
|
copy(tmp, buf, buflen);
|
||||||
|
@ -787,7 +787,7 @@ The updates to \code{buflen} and \code{buf} can be grouped together,
|
||||||
so the checker considers them to be one action:
|
so the checker considers them to be one action:
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
/* correct resize function */
|
/* correct resize function */
|
||||||
void resize(int len)
|
void resize(int len)
|
||||||
{
|
{
|
||||||
array_ptr<int> tmp : count(len) = malloc(sizeof(int) * len);
|
array_ptr<int> tmp : count(len) = malloc(sizeof(int) * len);
|
||||||
copy(tmp, buf, buflen);
|
copy(tmp, buf, buflen);
|
||||||
|
@ -812,7 +812,7 @@ The C syntax for is extended with:
|
||||||
\\
|
\\
|
||||||
\var{bundled-item:}\\
|
\var{bundled-item:}\\
|
||||||
\> \var{declaration}\\
|
\> \var{declaration}\\
|
||||||
\> \var{expression-statement}
|
\> \var{expression-statement}
|
||||||
\end{tabbing}
|
\end{tabbing}
|
||||||
|
|
||||||
There is some subtlety with bundled blocks and function calls. The
|
There is some subtlety with bundled blocks and function calls. The
|
||||||
|
@ -825,7 +825,7 @@ results in temporary variables and updating static variables \textit{en
|
||||||
masse} after the required function calls have been made.
|
masse} after the required function calls have been made.
|
||||||
|
|
||||||
Bundled blocks expose an interesting difference between regular C
|
Bundled blocks expose an interesting difference between regular C
|
||||||
and Checked C programs. In regular C,
|
and Checked C programs. In regular C,
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
expr1, expr2;
|
expr1, expr2;
|
||||||
\end{lstlisting}
|
\end{lstlisting}
|
||||||
|
@ -843,7 +843,7 @@ statements. Thus bounds could be valid after
|
||||||
In the \code{resize} example, the following suceeds with a bundle
|
In the \code{resize} example, the following suceeds with a bundle
|
||||||
block:
|
block:
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
void resize(int len)
|
void resize(int len)
|
||||||
{
|
{
|
||||||
array_ptr<int> tmp = malloc(sizeof(int) * len);
|
array_ptr<int> tmp = malloc(sizeof(int) * len);
|
||||||
copy(tmp, buf, buflen);
|
copy(tmp, buf, buflen);
|
||||||
|
@ -1043,7 +1043,7 @@ the declaration.
|
||||||
\label{section:integer-overflow-informal}
|
\label{section:integer-overflow-informal}
|
||||||
|
|
||||||
When objects are allocated dynamically in C, programmers have to compute
|
When objects are allocated dynamically in C, programmers have to compute
|
||||||
the amount of memory to allocate for the objects. It is well-known
|
the amount of memory to allocate for the objects. It is well-known
|
||||||
that integer overflow or wraparound in these computations can lead to buffer
|
that integer overflow or wraparound in these computations can lead to buffer
|
||||||
overruns \cite{Howard2003,Mitre2015-128,Mitre2015-190,Mitre2015-680,Dietz2015}.
|
overruns \cite{Howard2003,Mitre2015-128,Mitre2015-190,Mitre2015-680,Dietz2015}.
|
||||||
In Checked C, the explicit size computations are not enough
|
In Checked C, the explicit size computations are not enough
|
||||||
|
@ -1073,10 +1073,10 @@ a mismatch when overflow or wraparound can happen, which causes static
|
||||||
checking to fail.
|
checking to fail.
|
||||||
|
|
||||||
We expand the count expression; to integer arithmetic to make its size
|
We expand the count expression; to integer arithmetic to make its size
|
||||||
computation clear. \lstinline|count(|\var{e1}\lstinline|)| expands to
|
computation clear. \lstinline|count(|\var{e1}\lstinline|)| expands to
|
||||||
\bounds{\code{p}}{\code{p +} \var{e1}}.
|
\bounds{\code{p}}{\code{p +} \var{e1}}.
|
||||||
Following the rules in Section~\ref{section:pointers-as-integers},
|
Following the rules in Section~\ref{section:pointers-as-integers},
|
||||||
the expansion of \lstinline|p +| \var{e1} from pointer arithmetic to integer
|
the expansion of \lstinline|p +| \var{e1} from pointer arithmetic to integer
|
||||||
arithmetic depends on the type of \var{e1}:
|
arithmetic depends on the type of \var{e1}:
|
||||||
|
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
|
@ -1086,7 +1086,7 @@ arithmetic depends on the type of \var{e1}:
|
||||||
\var{e1}.
|
\var{e1}.
|
||||||
\item
|
\item
|
||||||
If \var{e1} is a signed integer, \lstinline|p +| \var{e1} expands to
|
If \var{e1} is a signed integer, \lstinline|p +| \var{e1} expands to
|
||||||
\lstinline|p| \lstinline{+}\textsubscript{ovf}
|
\lstinline|p| \lstinline{+}\textsubscript{ovf}
|
||||||
\lstinline|((signed_size_t)| \sizeof{\var{T}}\lstinline|)| \lstinline{*}\textsubscript{ovf}
|
\lstinline|((signed_size_t)| \sizeof{\var{T}}\lstinline|)| \lstinline{*}\textsubscript{ovf}
|
||||||
\var{e1}.
|
\var{e1}.
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
@ -1103,11 +1103,11 @@ Values differ?\tabularnewline
|
||||||
\midrule
|
\midrule
|
||||||
\endhead
|
\endhead
|
||||||
Unsigned integer & \sizeof{\var{T}} \lstinline|*|\textsubscript{ovf} \var{e1} &
|
Unsigned integer & \sizeof{\var{T}} \lstinline|*|\textsubscript{ovf} \var{e1} &
|
||||||
\sizeof{\var{T}} \lstinline|* |\var{e1} &
|
\sizeof{\var{T}} \lstinline|* |\var{e1} &
|
||||||
On overflow\tabularnewline
|
On overflow\tabularnewline
|
||||||
Signed integer & \lstinline|(signed_size_t)| \sizeof{\var{T}}
|
Signed integer & \lstinline|(signed_size_t)| \sizeof{\var{T}}
|
||||||
\lstinline|*|\textsubscript{ovf} \var{e1} &
|
\lstinline|*|\textsubscript{ovf} \var{e1} &
|
||||||
\sizeof{\var{T}} \lstinline|* (size_t)| \var{e1} &
|
\sizeof{\var{T}} \lstinline|* (size_t)| \var{e1} &
|
||||||
On overflow or when \code{e1 < 0}.\tabularnewline
|
On overflow or when \code{e1 < 0}.\tabularnewline
|
||||||
\bottomrule
|
\bottomrule
|
||||||
\end{longtable}
|
\end{longtable}
|
||||||
|
@ -1166,7 +1166,7 @@ Function types can be declared to have parameters and return values that have bo
|
||||||
declarations, just like functions. The bounds declarations become part of the function types.
|
declarations, just like functions. The bounds declarations become part of the function types.
|
||||||
Here are simple examples of function pointer types with bounds declarations:
|
Here are simple examples of function pointer types with bounds declarations:
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
// Function that takes a pointer to a 5 element array and returns an
|
// Function that takes a pointer to a 5 element array and returns an
|
||||||
// integer.
|
// integer.
|
||||||
int (array_ptr<int> arr : count(5))
|
int (array_ptr<int> arr : count(5))
|
||||||
// checked function pointer
|
// checked function pointer
|
||||||
|
@ -1233,7 +1233,7 @@ the pointer has type
|
||||||
\arrayptrT. The
|
\arrayptrT. The
|
||||||
bounds specify a range of memory that is exactly the size of some array
|
bounds specify a range of memory that is exactly the size of some array
|
||||||
of \var{T} and the pointer points exactly at an element of that array. For
|
of \var{T} and the pointer points exactly at an element of that array. For
|
||||||
example, suppose short ints are 2 bytes in size and
|
example, suppose short ints are 2 bytes in size and
|
||||||
{\boundsdecl{\code{x}}{\bounds{\code{y}}{\code{z}}}}, where the types of \code{x},
|
{\boundsdecl{\code{x}}{\bounds{\code{y}}{\code{z}}}}, where the types of \code{x},
|
||||||
\code{y}, \code{z} are \arrayptrinst{\code{short int}} .
|
\code{y}, \code{z} are \arrayptrinst{\code{short int}} .
|
||||||
|
|
||||||
|
@ -1264,7 +1264,7 @@ becomes more expensive. With relative alignment, the bounds check is
|
||||||
alignment, the check for the upper bound needs to compute the highest
|
alignment, the check for the upper bound needs to compute the highest
|
||||||
address that will be accessed using x. For this example, the highest
|
address that will be accessed using x. For this example, the highest
|
||||||
address accessed will be
|
address accessed will be
|
||||||
\lstinline|(|\arrayptrchar\lstinline|) x + sizeof(short int) - 1|, so the check becomes
|
\lstinline|(|\arrayptrchar\lstinline|) x + sizeof(short int) - 1|, so the check becomes
|
||||||
\code{y <= x} and \code{x + 1 < z}.
|
\code{y <= x} and \code{x + 1 < z}.
|
||||||
|
|
||||||
A pointer cast can create a pointer that is not relatively aligned to
|
A pointer cast can create a pointer that is not relatively aligned to
|
||||||
|
@ -1309,8 +1309,8 @@ instead of \code{char}) usually already are doing these checks.
|
||||||
\subsection{Representing relative alignment in bounds declarations}
|
\subsection{Representing relative alignment in bounds declarations}
|
||||||
\label{section:representing-relative-alignment}
|
\label{section:representing-relative-alignment}
|
||||||
|
|
||||||
Bounds expressions are extended with an optional relative alignment
|
Bounds expressions are extended with an optional relative alignment
|
||||||
clause to represent situations where an \arrayptrT\ is not relatively
|
clause to represent situations where an \arrayptrT\ is not relatively
|
||||||
aligned to its bounds for type \var{T}:
|
aligned to its bounds for type \var{T}:
|
||||||
|
|
||||||
\begin{tabbing}
|
\begin{tabbing}
|
||||||
|
@ -1321,7 +1321,7 @@ aligned to its bounds for type \var{T}:
|
||||||
\\
|
\\
|
||||||
\var{relative-alignment-clause:}\\
|
\var{relative-alignment-clause:}\\
|
||||||
\> \relalign{\var{type}} \\
|
\> \relalign{\var{type}} \\
|
||||||
\> \relalignval{\var{constant-exp}}
|
\> \relalignval{\var{constant-exp}}
|
||||||
\end{tabbing}
|
\end{tabbing}
|
||||||
|
|
||||||
This clause is only added to bounds pairs because (by definition) count
|
This clause is only added to bounds pairs because (by definition) count
|
||||||
|
@ -1331,9 +1331,9 @@ identifiers. The optional clause specifies a
|
||||||
relative alignment type \var{T} or the required relative alignment in
|
relative alignment type \var{T} or the required relative alignment in
|
||||||
bytes. Given
|
bytes. Given
|
||||||
\boundsdecl{\var{x}}{\boundsrel{\var{e1}}{\var{e2}}{\var{T}}},
|
\boundsdecl{\var{x}}{\boundsrel{\var{e1}}{\var{e2}}{\var{T}}},
|
||||||
\lstinline|((|\arrayptrchar\lstinline|)| \var{x} \lstinline|- (|\arrayptrchar\lstinline|)| \var{e1})
|
\lstinline|((|\arrayptrchar\lstinline|)| \var{x} \lstinline|- (|\arrayptrchar\lstinline|)| \var{e1})
|
||||||
\lstinline|%| \sizeof{\var{T}}\lstinline|== 0| and
|
\lstinline|%| \sizeof{\var{T}}\lstinline|== 0| and
|
||||||
\lstinline|((|\arrayptrchar\lstinline|)| \var{e2} \lstinline|- (|\arrayptrchar\lstinline|)| \var{x})
|
\lstinline|((|\arrayptrchar\lstinline|)| \var{e2} \lstinline|- (|\arrayptrchar\lstinline|)| \var{x})
|
||||||
\lstinline|% sizeof(|\var{T}\lstinline|) == 0| must be true. If the number of bytes is
|
\lstinline|% sizeof(|\var{T}\lstinline|) == 0| must be true. If the number of bytes is
|
||||||
specified, \sizeof{\var{T}} is replaced by the
|
specified, \sizeof{\var{T}} is replaced by the
|
||||||
constant expression. The relative alignment clause \relalign{\var{type}}
|
constant expression. The relative alignment clause \relalign{\var{type}}
|
||||||
|
@ -1346,7 +1346,7 @@ If the default relative alignment has been overridden and
|
||||||
\sizeof{referent-type(\var{e1})} is a common factor of \sizeof{\var{T}}
|
\sizeof{referent-type(\var{e1})} is a common factor of \sizeof{\var{T}}
|
||||||
If it is, it inserts the same
|
If it is, it inserts the same
|
||||||
runtime check as before. Otherwise, it inserts a runtime check that
|
runtime check as before. Otherwise, it inserts a runtime check that
|
||||||
\var{e2} \lstinline|<=| \var{e1} \lstinline|&&|
|
\var{e2} \lstinline|<=| \var{e1} \lstinline|&&|
|
||||||
\var{e1} \lstinline|+ sizeof(|\var{T}) \lstinline|- 1 <| \var{e3}.
|
\var{e1} \lstinline|+ sizeof(|\var{T}) \lstinline|- 1 <| \var{e3}.
|
||||||
|
|
||||||
\subsection{Examples of uses of bounds declarations that specify relative alignment}
|
\subsection{Examples of uses of bounds declarations that specify relative alignment}
|
||||||
|
@ -1403,7 +1403,7 @@ possible. For simplicity, it is assumed that the memory pointed to by
|
||||||
\code{dst} and \code{src} does not overlap and that \code{sizeof(int)} is 4:
|
\code{dst} and \code{src} does not overlap and that \code{sizeof(int)} is 4:
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
void copy(array_ptr<char> dst : bounds(dst, dst + num),
|
void copy(array_ptr<char> dst : bounds(dst, dst + num),
|
||||||
array_ptr<char> src : bounds(src, src + num),
|
array_ptr<char> src : bounds(src, src + num),
|
||||||
size_t num)
|
size_t num)
|
||||||
{
|
{
|
||||||
if (num % 4 == 0) {
|
if (num % 4 == 0) {
|
||||||
|
@ -1429,7 +1429,7 @@ relative alignment can be changed:
|
||||||
|
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
void copy(array_ptr<char> dst : bounds(dst, dst + num),
|
void copy(array_ptr<char> dst : bounds(dst, dst + num),
|
||||||
array_ptr<char> src : bounds(src, src + num),
|
array_ptr<char> src : bounds(src, src + num),
|
||||||
size_t num)
|
size_t num)
|
||||||
{
|
{
|
||||||
if (num % 4 == 0) {
|
if (num % 4 == 0) {
|
||||||
|
@ -1444,7 +1444,7 @@ void copy(array_ptr<char> dst : bounds(dst, dst + num),
|
||||||
aligned_dst[i] = aligned_src[i];
|
aligned_dst[i] = aligned_src[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
...
|
...
|
||||||
}
|
}
|
||||||
\end{lstlisting}
|
\end{lstlisting}
|
||||||
|
@ -1472,12 +1472,12 @@ array_ptr<int> r : count(2) = (array_ptr<int>) p;
|
||||||
Variables that have bounds declared for them at expression statements
|
Variables that have bounds declared for them at expression statements
|
||||||
have dataflow-sensitive bounds declarations. There are two aspects to
|
have dataflow-sensitive bounds declarations. There are two aspects to
|
||||||
dataflow-sensitive bounds: extent and consistency. The extent of a
|
dataflow-sensitive bounds: extent and consistency. The extent of a
|
||||||
flow-sensitive bounds declaration is the part of the program to which
|
flow-sensitive bounds declaration is the part of the program to which
|
||||||
a flow-sensitive bounds
|
a flow-sensitive bounds
|
||||||
declaration applies. Within the extent of a flow-sensitive
|
declaration applies. Within the extent of a flow-sensitive
|
||||||
bounds declaration, the
|
bounds declaration, the
|
||||||
bounds for the variable for memory dereferences are given by the bounds
|
bounds for the variable for memory dereferences are given by the bounds
|
||||||
declaration. Consistency is that all the flow-sensitive bounds
|
declaration. Consistency is that all the flow-sensitive bounds
|
||||||
declarations flowing to a statement agree with each other.
|
declarations flowing to a statement agree with each other.
|
||||||
|
|
||||||
As mentioned in Section~\ref{section:statement-declarations},
|
As mentioned in Section~\ref{section:statement-declarations},
|
||||||
|
@ -1511,19 +1511,19 @@ lead to unexpected bounds checks failures.
|
||||||
\subsection{Definition of extent}
|
\subsection{Definition of extent}
|
||||||
\label{section:extent-definition}
|
\label{section:extent-definition}
|
||||||
|
|
||||||
We first define the set of flow-sensitive bounds declarations that apply to a
|
We first define the set of flow-sensitive bounds declarations that apply to a
|
||||||
component of a function. A component is an expression statement,
|
component of a function. A component is an expression statement,
|
||||||
variable declaration, or a compound statement. For any flow-sensitive bounds
|
variable declaration, or a compound statement. For any flow-sensitive bounds
|
||||||
declaration for a variable \var{v}, if
|
declaration for a variable \var{v}, if
|
||||||
|
|
||||||
\begin{compactenum}
|
\begin{compactenum}
|
||||||
\item
|
\item
|
||||||
There is some path from the bounds declaration to the
|
There is some path from the bounds declaration to the
|
||||||
component, and
|
component, and
|
||||||
\item
|
\item
|
||||||
\var{v} occurs in the component, and
|
\var{v} occurs in the component, and
|
||||||
\item
|
\item
|
||||||
there is no other flow-sensitive bounds declaration for
|
there is no other flow-sensitive bounds declaration for
|
||||||
\var{v} along the path
|
\var{v} along the path
|
||||||
\end{compactenum}
|
\end{compactenum}
|
||||||
|
|
||||||
|
@ -1531,26 +1531,26 @@ then
|
||||||
|
|
||||||
\begin{compactenum}
|
\begin{compactenum}
|
||||||
\item
|
\item
|
||||||
if all the variables occurring in the bounds declaration are
|
if all the variables occurring in the bounds declaration are
|
||||||
in-scope at the component, and
|
in-scope at the component, and
|
||||||
\begin{compactenum}
|
\begin{compactenum}
|
||||||
\item No expressions or statements on the path modify a variable
|
\item No expressions or statements on the path modify a variable
|
||||||
occurring in the bounds declaration
|
occurring in the bounds declaration
|
||||||
\item or
|
\item or
|
||||||
\begin{compactenum}
|
\begin{compactenum}
|
||||||
\item The only expressions or statements on the path that
|
\item The only expressions or statements on the path that
|
||||||
modify a variable occurring in the bounds declaration
|
modify a variable occurring in the bounds declaration
|
||||||
are pointer increments or decrements of \var{v}, and
|
are pointer increments or decrements of \var{v}, and
|
||||||
\item \var{v} does not occur in the bounds expression, and
|
\item \var{v} does not occur in the bounds expression, and
|
||||||
\item the bounds expression is not a count expression
|
\item the bounds expression is not a count expression
|
||||||
\end{compactenum}
|
\end{compactenum}
|
||||||
\end{compactenum}
|
\end{compactenum}
|
||||||
then the bounds declaration applies to the component
|
then the bounds declaration applies to the component
|
||||||
\item
|
\item
|
||||||
Otherwise, the bounds declaration \boundsdecl{\var{v}}{\boundsunknown}
|
Otherwise, the bounds declaration \boundsdecl{\var{v}}{\boundsunknown}
|
||||||
applies to the component.
|
applies to the component.
|
||||||
\end{compactenum}
|
\end{compactenum}
|
||||||
|
|
||||||
\subsection{Consistency}
|
\subsection{Consistency}
|
||||||
|
|
||||||
These conditions ensure the consistency of bounds declarations:
|
These conditions ensure the consistency of bounds declarations:
|
||||||
|
@ -1582,7 +1582,7 @@ rejected by the compiler.
|
||||||
/* buggy function */
|
/* buggy function */
|
||||||
/* sum integers stored between start and end, where end is not included */
|
/* sum integers stored between start and end, where end is not included */
|
||||||
int sum(array_ptr<int> start : bounds(start, end), array_ptr<int> end)
|
int sum(array_ptr<int> start : bounds(start, end), array_ptr<int> end)
|
||||||
{
|
{
|
||||||
end = end + 1; // bounds(start, end) does not hold after this, so program is
|
end = end + 1; // bounds(start, end) does not hold after this, so program is
|
||||||
// rejected
|
// rejected
|
||||||
start[5] = 0;
|
start[5] = 0;
|
||||||
|
@ -1595,22 +1595,22 @@ A correct function declares new bounds for \code{start}:
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
/* sum integers stored between start and end, where end is not included */
|
/* sum integers stored between start and end, where end is not included */
|
||||||
int sum(array_ptr<int> start : bounds(start, end), array_ptr<int> end)
|
int sum(array_ptr<int> start : bounds(start, end), array_ptr<int> end)
|
||||||
{
|
{
|
||||||
end = end + 1 where start : bounds(start, end - 1);
|
end = end + 1 where start : bounds(start, end - 1);
|
||||||
start[5] = 0; // program accepted by compiler; may fail bounds check
|
start[5] = 0; // program accepted by compiler; may fail bounds check
|
||||||
// at run time
|
// at run time
|
||||||
...
|
...
|
||||||
}
|
}
|
||||||
\end{lstlisting}
|
\end{lstlisting}
|
||||||
|
|
||||||
The following example shows the extents of the two bounds declarations for
|
The following example shows the extents of the two bounds declarations for
|
||||||
\code{start}. The bounds declaration at the declaration of \code{start} and
|
\code{start}. The bounds declaration at the declaration of \code{start} and
|
||||||
its extent is highlighted in blue. The bounds declaration at the assignment
|
its extent is highlighted in blue. The bounds declaration at the assignment
|
||||||
to \code{end} and its extent is highlighted in yellow:
|
to \code{end} and its extent is highlighted in yellow:
|
||||||
|
|
||||||
\sethlcolor{lightblue}
|
\sethlcolor{lightblue}
|
||||||
\begin{lstlisting}[escapechar=\|]
|
\begin{lstlisting}[escapechar=\|]
|
||||||
/* sum integers stored between start and end, where end is not included */
|
/* sum integers stored between start and end, where end is not included */
|
||||||
int sum(array_ptr<int> start : |\hl{bounds(start, end)}|, array_ptr<int> end)
|
int sum(array_ptr<int> start : |\hl{bounds(start, end)}|, array_ptr<int> end)
|
||||||
|\hl{\{\\
|
|\hl{\{\\
|
||||||
start[5] = 0; // bounds checked using (start, end)}\\
|
start[5] = 0; // bounds checked using (start, end)}\\
|
||||||
|
@ -1620,14 +1620,14 @@ start[5] = 0; // bounds checked using (start, end - 1)\\
|
||||||
}
|
}
|
||||||
\end{lstlisting}
|
\end{lstlisting}
|
||||||
|
|
||||||
Figure~\ref{fig:bounds-extent:choose} expands on the \code{choose} function earlier.
|
Figure~\ref{fig:bounds-extent:choose} expands on the \code{choose} function earlier.
|
||||||
There are 4 explicit bounds declarations for \code{c}. The bounds declarations
|
There are 4 explicit bounds declarations for \code{c}. The bounds declarations
|
||||||
and their extents are also color-coded. Three bounds declarations for \code{c}
|
and their extents are also color-coded. Three bounds declarations for \code{c}
|
||||||
have the form \boundsdecl{\code{c}}{\boundscount{\code{clen}}}. They must be syntactically
|
have the form \boundsdecl{\code{c}}{\boundscount{\code{clen}}}. They must be syntactically
|
||||||
identical because there are statements that they cover in common. Their
|
identical because there are statements that they cover in common. Their
|
||||||
extents are highlighted in blue. Another bounds declaration has
|
extents are highlighted in blue. Another bounds declaration has
|
||||||
the form \boundsdecl{\code{c}}{\boundscount{\code{clen - 1}}}. Its extent is highlighted
|
the form \boundsdecl{\code{c}}{\boundscount{\code{clen - 1}}}. Its extent is highlighted
|
||||||
in yellow. It covers code where \code{c} is incremented and the
|
in yellow. It covers code where \code{c} is incremented and the
|
||||||
length is decremented. The increment invalidates the bounds declaration
|
length is decremented. The increment invalidates the bounds declaration
|
||||||
\boundsdecl{\code{c}}{\boundscount{\code{clen}}}. A different bounds declaration for \code{c}
|
\boundsdecl{\code{c}}{\boundscount{\code{clen}}}. A different bounds declaration for \code{c}
|
||||||
is needed after the increment.
|
is needed after the increment.
|
||||||
|
@ -1650,16 +1650,16 @@ int choose(int val, array_ptr<int> a: count(alen), int alen,
|
||||||
clen = blen;
|
clen = blen;
|
||||||
c = b where |\hl{c : count(clen);}|
|
c = b where |\hl{c : count(clen);}|
|
||||||
|\hl{\}}|
|
|\hl{\}}|
|
||||||
|
|
||||||
|\hl{if (clen > 1) \{}|
|
|\hl{if (clen > 1) \{}|
|
||||||
|\hl{result = c[0];}|
|
|\hl{result = c[0];}|
|
||||||
|\hl{\}}|
|
|\hl{\}}|
|
||||||
|
|
||||||
|\hl{if (cond \&\& clen > 1) \{}|
|
|\hl{if (cond \&\& clen > 1) \{}|
|
||||||
c = |\hl{c + 1}| where |\sethlcolor{lightyellow}\hl{c : count(clen - 1);}|
|
c = |\hl{c + 1}| where |\sethlcolor{lightyellow}\hl{c : count(clen - 1);}|
|
||||||
clen = |\sethlcolor{lightyellow}\hl{clen - 1}| where |\sethlcolor{lightblue}\hl{c : count(clen);}|
|
clen = |\sethlcolor{lightyellow}\hl{clen - 1}| where |\sethlcolor{lightblue}\hl{c : count(clen);}|
|
||||||
|\hl{result += c[0];}|
|
|\hl{result += c[0];}|
|
||||||
|\hl{\}}|
|
|\hl{\}}|
|
||||||
|\hl{...}|
|
|\hl{...}|
|
||||||
|\hl{return result;}|
|
|\hl{return result;}|
|
||||||
}
|
}
|
||||||
|
@ -1687,8 +1687,8 @@ function. In functions that do not have loops in their control-flow, the
|
||||||
dataflow analysis can be done in O(N*M) time.
|
dataflow analysis can be done in O(N*M) time.
|
||||||
|
|
||||||
The dataflow analysis uses a lattice of values, assigning one lattice
|
The dataflow analysis uses a lattice of values, assigning one lattice
|
||||||
value to each variable at each program point in the function.
|
value to each variable at each program point in the function.
|
||||||
Figure~\ref{fig:extent-dataflow-lattice} shows the lattice of values.
|
Figure~\ref{fig:extent-dataflow-lattice} shows the lattice of values.
|
||||||
It includes singleton values consisting of the bounds
|
It includes singleton values consisting of the bounds
|
||||||
expressions in the flow-sensitive bounds declaration for variables in
|
expressions in the flow-sensitive bounds declaration for variables in
|
||||||
the function, \boundsunknown\ (indicating absence of bounds
|
the function, \boundsunknown\ (indicating absence of bounds
|
||||||
|
@ -1706,11 +1706,11 @@ information), and \code{Top} (indicating contradiction or error):
|
||||||
\caption{The lattice of values used in dataflow computation of extent}
|
\caption{The lattice of values used in dataflow computation of extent}
|
||||||
\label{fig:extent-dataflow-lattice}
|
\label{fig:extent-dataflow-lattice}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
For an assignment to an \arrayptr\ variable, the existing
|
For an assignment to an \arrayptr\ variable, the existing
|
||||||
lattice value for the \arrayptr\ variable is killed, unless
|
lattice value for the \arrayptr\ variable is killed, unless
|
||||||
the special conditions described in Section~\ref{section:extent-of-declarations}
|
the special conditions described in Section~\ref{section:extent-of-declarations}
|
||||||
are met.
|
are met.
|
||||||
A new lattice value is generated for the \arrayptr\ variable. If the
|
A new lattice value is generated for the \arrayptr\ variable. If the
|
||||||
assignment declares bounds for the \arrayptr\ variable, the new
|
assignment declares bounds for the \arrayptr\ variable, the new
|
||||||
lattice value is the bounds expression in the bounds declaration.
|
lattice value is the bounds expression in the bounds declaration.
|
||||||
|
@ -1749,7 +1749,7 @@ loop. The loop modifies \code{current}, but the bounds for
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
/* sum integers stored between start and end, where end is not included */
|
/* sum integers stored between start and end, where end is not included */
|
||||||
int sum(array_ptr<int> start where start : bounds(start, end), array_ptr<int> end)
|
int sum(array_ptr<int> start where start : bounds(start, end), array_ptr<int> end)
|
||||||
{
|
{
|
||||||
int sum = 0;
|
int sum = 0;
|
||||||
array_ptr<int> current : (start, end) = start;
|
array_ptr<int> current : (start, end) = start;
|
||||||
|
|
||||||
|
@ -1776,10 +1776,10 @@ eliminate the runtime bounds checks easily.
|
||||||
|
|
||||||
\begin{lstlisting}
|
\begin{lstlisting}
|
||||||
/* lexicographic comparison of two arrays of integers */
|
/* lexicographic comparison of two arrays of integers */
|
||||||
int compare(array_ptr<int> x : bounds(x, x_end),
|
int compare(array_ptr<int> x : bounds(x, x_end),
|
||||||
array_ptr<int> y : bounds(y, y_end)
|
array_ptr<int> y : bounds(y, y_end)
|
||||||
array_ptr<int> x_end, array_ptr<int> y_end)
|
array_ptr<int> x_end, array_ptr<int> y_end)
|
||||||
{
|
{
|
||||||
while (x < x_end && y < y_end) {
|
while (x < x_end && y < y_end) {
|
||||||
if (*x == *y) { // bounds check: x >= x && x < x_end; easily
|
if (*x == *y) { // bounds check: x >= x && x < x_end; easily
|
||||||
// optimizable.
|
// optimizable.
|
||||||
|
@ -1802,7 +1802,7 @@ int compare(array_ptr<int> x : bounds(x, x_end),
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
\end{lstlisting}
|
\end{lstlisting}
|
||||||
|
|
Загрузка…
Ссылка в новой задаче