.ds TYPE C
.\"
.\" See the file LICENSE for redistribution information.
.\"
.\" Copyright (c) 1996, 1997, 1998
.\"	Sleepycat Software.  All rights reserved.
.\"
.\" Copyright (c) 1994, 1995
.\"	The President and Fellows of Harvard University.  All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\"    notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\"    notice, this list of conditions and the following disclaimer in the
.\"    documentation and/or other materials provided with the distribution.
.\" 3. All advertising materials mentioning features or use of this software
.\"    must display the following acknowledgement:
.\"	This product includes software developed by the University of
.\"	California, Berkeley and its contributors.
.\" 4. Neither the name of the University nor the names of its contributors
.\"    may be used to endorse or promote products derived from this software
.\"    without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\"	@(#)db_txn.so	10.34 (Sleepycat) 5/10/98
.\"
.\"
.\" See the file LICENSE for redistribution information.
.\"
.\" Copyright (c) 1997, 1998
.\"	Sleepycat Software.  All rights reserved.
.\"
.\"	@(#)macros.so	10.45 (Sleepycat) 5/4/98
.\"
.\" We don't want hyphenation for any HTML documents.
.ie '\*[HTML]'YES'\{\
.nh
\}
.el\{\
.ds Hy
.hy
..
.ds Nh
.nh
..
\}
.\" The alternative text macro
.\" This macro takes two arguments:
.\"	+ the text produced if this is a "C" manpage
.\"	+ the text produced if this is a "CXX" or "JAVA" manpage
.\"
.de Al
.ie '\*[TYPE]'C'\{\\$1
\}
.el\{\\$2
\}
..
.\" Scoped name macro.
.\" Produces a_b, a::b, a.b depending on language
.\" This macro takes two arguments:
.\"	+ the class or prefix (without underscore)
.\"	+ the name within the class or following the prefix
.de Sc
.ie '\*[TYPE]'C'\{\\$1_\\$2
\}
.el\{\
.ie '\*[TYPE]'CXX'\{\\$1::\\$2
\}
.el\{\\$1.\\$2
\}
\}
..
.\" Scoped name for Java.
.\" Produces Db.b, for Java, otherwise just b.  This macro is used for
.\" constants that must be scoped in Java, but are global otherwise.
.\" This macro takes two arguments:
.\"	+ the class
.\"	+ the name within the class or following the prefix
.de Sj
.ie '\*[TYPE]'JAVA'\{\
.TP 5
Db.\\$1\}
.el\{\
.TP 5
\\$1\}
..
.\" The general information text macro.
.de Gn
.ie '\*[TYPE]'C'\{The DB library is a family of groups of functions that provides a modular
programming interface to transactions and record-oriented file access.
The library includes support for transactions, locking, logging and file
page caching, as well as various indexed access methods.
Many of the functional groups (e.g., the file page caching functions)
are useful independent of the other DB functions,
although some functional groups are explicitly based on other functional
groups (e.g., transactions and logging).
\}
.el\{The DB library is a family of classes that provides a modular
programming interface to transactions and record-oriented file access.
The library includes support for transactions, locking, logging and file
page caching, as well as various indexed access methods.
Many of the classes (e.g., the file page caching class)
are useful independent of the other DB classes,
although some classes are explicitly based on other classes
(e.g., transactions and logging).
\}
For a general description of the DB package, see
.IR db_intro (3).
..
.\" The library error macro, the local error macro.
.\" These macros take one argument:
.\"	+ the function name.
.de Ee
The
.I \\$1
.ie '\*[TYPE]'C'\{function may fail and return
.I errno
\}
.el\{method may fail and throw a
.IR DbException (3)
.if '\*[TYPE]'CXX'\{
or return
.I errno
\}
\}
for any of the errors specified for the following DB and library functions:
..
.de Ec
In addition, the
.I \\$1
.ie '\*[TYPE]'C'\{function may fail and return
.I errno
\}
.el\{method may fail and throw a
.IR DbException (3)
.ie '\*[TYPE]'CXX'\{or return
.I errno
\}
.el\{encapsulating an
.I errno
\}
\}
for the following conditions:
..
.de Ea
[EAGAIN]
A lock was unavailable.
..
.de Eb
[EBUSY]
The shared memory region was in use and the force flag was not set.
..
.de Em
[EAGAIN]
The shared memory region was locked and (repeatedly) unavailable.
..
.de Ei
[EINVAL]
An invalid flag value or parameter was specified.
..
.de Es
[EACCES]
An attempt was made to modify a read-only database.
..
.de Et
The DB_THREAD flag was specified and spinlocks are not implemented for
this architecture.
..
.de Ep
[EPERM]
Database corruption was detected.
All subsequent database calls (other than
.ie '\*[TYPE]'C'\{\
.IR DB->close )
\}
.el\{\
.IR Db::close )
\}
will return EPERM.
..
.de Ek
.if '\*[TYPE]'CXX'\{\
Methods marked as returning
.I errno
will, by default, throw an exception that encapsulates the error information.
The default error behavior can be changed, see
.IR DbException (3).
\}
..
.\" The SEE ALSO text macro
.de Sa
.\" make the line long for nroff.
.if n .ll 72
.nh
.na
.IR db_archive (1),
.IR db_checkpoint (1),
.IR db_deadlock (1),
.IR db_dump (1),
.IR db_load (1),
.IR db_recover (1),
.IR db_stat (1),
.IR db_intro (3),
.ie '\*[TYPE]'C'\{\
.IR db_appinit (3),
.IR db_cursor (3),
.IR db_dbm (3),
.IR db_internal (3),
.IR db_lock (3),
.IR db_log (3),
.IR db_mpool (3),
.IR db_open (3),
.IR db_thread (3),
.IR db_txn (3)
\}
.el\{\
.IR db_internal (3),
.IR db_thread (3),
.IR Db (3),
.IR Dbc (3),
.IR DbEnv (3),
.IR DbException (3),
.IR DbInfo (3),
.IR DbLock (3),
.IR DbLockTab (3),
.IR DbLog (3),
.IR DbLsn (3),
.IR DbMpool (3),
.if !'\*[TYPE]'JAVA'\{\
.IR DbMpoolFile (3),
\}
.IR Dbt (3),
.IR DbTxn (3),
.IR DbTxnMgr (3)
\}
.ad
.Hy
..
.\" The function header macro.
.\" This macro takes one argument:
.\"	+ the function name.
.de Fn
.in 2
.I \\$1
.in
..
.\" The XXX_open function text macro, for merged create/open calls.
.\" This macro takes two arguments:
.\"	+ the interface, e.g., "transaction region"
.\"	+ the prefix, e.g., "txn" (or the class name for C++, e.g., "DbTxn")
.de Co
.ie '\*[TYPE]'C'\{\
.Fn \\$2_open
The
.I \\$2_open
function copies a pointer, to the \\$1 identified by the
.B directory
.IR dir ,
into the memory location referenced by
.IR regionp .
.PP
If the
.I dbenv
argument to
.I \\$2_open
was initialized using
.IR db_appinit ,
.I dir
is interpreted as described by
.IR db_appinit (3).
\}
.el\{\
.Fn \\$2::open
The
.I \\$2::open
.ie '\*[TYPE]'CXX'\{\
method copies a pointer, to the \\$1 identified by the
.B directory
.IR dir ,
into the memory location referenced by
.IR regionp .
\}
.el\{\
method returns a \\$1 identified by the
.B directory
.IR dir .
\}
.PP
If the
.I dbenv
argument to
.I \\$2::open
was initialized using
.IR DbEnv::appinit ,
.I dir
is interpreted as described by
.IR DbEnv (3).
\}
.PP
Otherwise,
if
.I dir
is not NULL,
it is interpreted relative to the current working directory of the process.
If
.I dir
is NULL,
the following environment variables are checked in order:
``TMPDIR'', ``TEMP'', and ``TMP''.
If one of them is set,
\\$1 files are created relative to the directory it specifies.
If none of them are set, the first possible one of the following
directories is used:
.IR /var/tmp ,
.IR /usr/tmp ,
.IR /temp ,
.IR /tmp ,
.I C:/temp
and
.IR C:/tmp .
.PP
All files associated with the \\$1 are created in this directory.
This directory must already exist when
.ie '\*[TYPE]'C'\{
\\$1_open
\}
.el\{\
\\$2::open
\}
is called.
If the \\$1 already exists,
the process must have permission to read and write the existing files.
If the \\$1 does not already exist,
it is optionally created and initialized.
..
.\" The common close language macro, for discarding created regions
.\" This macro takes one argument:
.\"	+ the function prefix, e.g., txn (the class name for C++, e.g., DbTxn)
.de Cc
In addition, if the
.I dir
argument to
.ie '\*[TYPE]'C'\{\
.ds Va db_appinit
.ds Vo \\$1_open
.ds Vu \\$1_unlink
\}
.el\{\
.ds Va DbEnv::appinit
.ds Vo \\$1::open
.ds Vu \\$1::unlink
\}
.I \\*(Vo
was NULL
and
.I dbenv
was not initialized using
.IR \\*(Va ,
.if '\\$1'memp'\{\
or the DB_MPOOL_PRIVATE flag was set,
\}
all files created for this shared region will be removed,
as if
.I \\*(Vu
were called.
.rm Va
.rm Vo
.rm Vu
..
.\" The DB_ENV information macro.
.\" This macro takes two arguments:
.\"	+ the function called to open, e.g., "txn_open"
.\"	+ the function called to close, e.g., "txn_close"
.de En
.ie '\*[TYPE]'C'\{\
based on the
.I dbenv
argument to
.IR \\$1 ,
which is a pointer to a structure of type DB_ENV (typedef'd in <db.h>).
Applications will normally use the same DB_ENV structure (initialized
by
.IR db_appinit (3)),
as an argument to all of the subsystems in the DB package.
.PP
References to the DB_ENV structure are maintained by DB,
so it may not be discarded until the last close function,
corresponding to an open function for which it was an argument,
has returned.
In order to ensure compatibility with future releases of DB, all fields of
the DB_ENV structure that are not explicitly set should be initialized to 0
before the first time the structure is used.
Do this by declaring the structure external or static, or by calling the C
library routine
.IR bzero (3)
or
.IR memset (3).
.PP
The fields of the DB_ENV structure used by
.I \\$1
are described below.
.if '\*[TYPE]'CXX'\{\
As references to the DB_ENV structure may be maintained by
.IR \\$1 ,
it is necessary that the DB_ENV structure and memory it references be valid
until the
.I \\$2
function is called.
\}
.ie '\\$1'db_appinit'\{The
.I dbenv
argument may not be NULL.
If any of the fields of the
.I dbenv
are set to 0,
defaults appropriate for the system are used where possible.
\}
.el\{If
.I dbenv
is NULL
or any of its fields are set to 0,
defaults appropriate for the system are used where possible.
\}
.PP
The following fields in the DB_ENV structure may be initialized before calling
.IR \\$1 :
\}
.el\{\
based on which set methods have been used.
It is expected that applications will use a single DbEnv object as the
argument to all of the subsystems in the DB package.
The fields of the DbEnv object used by
.I \\$1
are described below.
As references to the DbEnv object may be maintained by
.IR \\$1 ,
it is necessary that the DbEnv object and memory it references be valid
until the object is destroyed.
.ie '\\$1'appinit'\{\
The
.I dbenv
argument may not be NULL.
If any of the fields of the
.I dbenv
are set to 0,
defaults appropriate for the system are used where possible.
\}
.el\{\
Any of the DbEnv fields that are not explicitly set will default to
appropriate values.
\}
.PP
The following fields in the DbEnv object may be initialized, using the
appropriate set method, before calling
.IR \\$1 :
\}
..
.\" The DB_ENV common fields macros.
.de Se
.if '\*[TYPE]'JAVA'\{\
.TP 5
DbErrcall db_errcall;
.ns
.TP 5
String db_errpfx;
.ns
.TP 5
int db_verbose;
The error fields of the DbEnv behave as described for
.IR DbEnv (3).
\}
.ie '\*[TYPE]'CXX'\{\
.TP 5
void *(*db_errcall)(char *db_errpfx, char *buffer);
.ns
.TP 5
FILE *db_errfile;
.ns
.TP 5
const char *db_errpfx;
.ns
.TP 5
class ostream *db_error_stream;
.ns
.TP 5
int db_verbose;
The error fields of the DbEnv behave as described for
.IR DbEnv (3).
\}
.el\{\
void *(*db_errcall)(char *db_errpfx, char *buffer);
.ns
.TP 5
FILE *db_errfile;
.ns
.TP 5
const char *db_errpfx;
.ns
.TP 5
int db_verbose;
The error fields of the DB_ENV behave as described for
.IR db_appinit (3).
.sp
\}
..
.\" The open flags.
.de Fm
The
.I flags
and
.I mode
arguments specify how files will be opened and/or created when they
don't already exist.
The flags value is specified by
.BR or 'ing
together one or more of the following values:
.Sj DB_CREATE
Create any underlying files, as necessary.
If the files do not already exist and the DB_CREATE flag is not specified,
the call will fail.
..
.\" DB_THREAD open flag macro.
.\" This macro takes two arguments:
.\"	+ the open function name
.\"	+ the object it returns.
.de Ft
.TP 5
.Sj DB_THREAD
Cause the \\$2 handle returned by the
.I \\$1
.Al function method
to be useable by multiple threads within a single address space,
i.e., to be ``free-threaded''.
.if '\*[TYPE]'JAVA'\{\
Threading is assumed in the Java API,
so no special flags are required,
and DB functions will always behave as if the DB_THREAD flag was specified.
\}
..
.\" The mode macro.
.\" This macro takes one argument:
.\"	+ the subsystem name.
.de Mo
All files created by the \\$1 are created with mode
.I mode
(as described in
.IR chmod (2))
and modified by the process' umask value at the time of creation (see
.IR umask (2)).
The group ownership of created files is based on the system and directory
defaults, and is not further specified by DB.
..
.\" The application exits macro.
.\" This macro takes one argument:
.\"	+ the application name.
.de Ex
The
.I \\$1
utility exits 0 on success, and >0 if an error occurs.
..
.\" The application -h section.
.\" This macro takes one argument:
.\"	+ the application name
.de Dh
DB_HOME
If the
.B \-h
option is not specified and the environment variable
.I DB_HOME
is set, it is used as the path of the database home, as described in
.IR db_appinit (3).
..
.\" The function DB_HOME ENVIRONMENT VARIABLES section.
.\" This macro takes one argument:
.\"	+ the open function name
.de Eh
DB_HOME
If the
.I dbenv
argument to
.I \\$1
was initialized using
.IR db_appinit ,
the environment variable DB_HOME may be used as the path of the database
home for the interpretation of the
.I dir
argument to
.IR \\$1 ,
as described in
.IR db_appinit (3).
.if \\n(.$>1 \{Specifically,
.I \\$1
is affected by the configuration string value of \\$2.\}
..
.\" The function TMPDIR ENVIRONMENT VARIABLES section.
.\" This macro takes two arguments:
.\"	+ the interface, e.g., "transaction region"
.\"	+ the prefix, e.g., "txn" (or the class name for C++, e.g., "DbTxn")
.de Ev
TMPDIR
If the
.I dbenv
argument to
.ie '\*[TYPE]'C'\{\
.ds Vo \\$2_open
\}
.el\{\
.ds Vo \\$2::open
\}
.I \\*(Vo
was NULL or not initialized using
.IR db_appinit ,
the environment variable TMPDIR may be used as the directory in which to
create the \\$1,
as described in the
.I \\*(Vo
section above.
.rm Vo
..
.\" The unused flags macro.
.de Fl
The
.I flags
parameter is currently unused, and must be set to 0.
..
.\" The no-space TP macro.
.de Nt
.br
.ns
.TP 5
..
.\" The return values of the functions macros.
.\" Rc is the standard two-value return with a suffix for more values.
.\" Ro is the standard two-value return but there were previous values.
.\" Rt is the standard two-value return, returning errno, 0, or < 0.
.\" These macros take one argument:
.\"	+ the routine name
.de Rc
The
.I \\$1
.ie '\*[TYPE]'C'\{function returns the value of
.I errno
on failure,
0 on success,
\}
.el\{method throws a
.IR DbException (3)
.ie '\*[TYPE]'CXX'\{or returns the value of
.I errno
on failure,
0 on success,
\}
.el\{that encapsulates an
.I errno
on failure,
\}
\}
..
.de Ro
Otherwise, the
.I \\$1
.ie '\*[TYPE]'C'\{function returns the value of
.I errno
on failure and 0 on success.
\}
.el\{method throws a
.IR DbException (3)
.ie '\*[TYPE]'CXX'\{or returns the value of
.I errno
on failure and 0 on success.
\}
.el\{that encapsulates an
.I errno
on failure,
\}
\}
..
.de Rt
The
.I \\$1
.ie '\*[TYPE]'C'\{function returns the value of
.I errno
on failure and 0 on success.
\}
.el\{method throws a
.IR DbException (3)
.ie '\*[TYPE]'CXX'\{or returns the value of
.I errno
on failure and 0 on success.
\}
.el\{that encapsulates an
.I errno
on failure.
\}
\}
..
.\" The TXN id macro.
.de Tx
.IP
If the file is being accessed under transaction protection,
the
.I txnid
parameter is a transaction ID returned from
.IR txn_begin ,
otherwise, NULL.
..
.\" The XXX_unlink function text macro.
.\" This macro takes two arguments:
.\"	+ the interface, e.g., "transaction region"
.\"	+ the prefix (for C++, this is the class name)
.de Un
.ie '\*[TYPE]'C'\{\
.ds Va db_appinit
.ds Vc \\$2_close
.ds Vo \\$2_open
.ds Vu \\$2_unlink
\}
.el\{\
.ds Va DbEnv::appinit
.ds Vc \\$2::close
.ds Vo \\$2::open
.ds Vu \\$2::unlink
\}
.Fn \\*(Vu
The
.I \\*(Vu
.Al function method
destroys the \\$1 identified by the directory
.IR dir ,
removing all files used to implement the \\$1.
.ie '\\$2'log' \{(The log files themselves and the directory
.I dir
are not removed.)\}
.el \{(The directory
.I dir
is not removed.)\}
If there are processes that have called
.I \\*(Vo
without calling
.I \\*(Vc
(i.e., there are processes currently using the \\$1),
.I \\*(Vu
will fail without further action,
unless the force flag is set,
in which case
.I \\*(Vu
will attempt to remove the \\$1 files regardless of any processes
still using the \\$1.
.PP
The result of attempting to forcibly destroy the region when a process
has the region open is unspecified.
Processes using a shared memory region maintain an open file descriptor
for it.
On UNIX systems, the region removal should succeed
and processes that have already joined the region should continue to
run in the region without change,
however processes attempting to join the \\$1 will either fail or
attempt to create a new region.
On other systems, e.g., WNT, where the
.IR unlink (2)
system call will fail if any process has an open file descriptor
for the file,
the region removal will fail.
.PP
In the case of catastrophic or system failure,
database recovery must be performed (see
.IR db_recover (1)
or the DB_RECOVER and DB_RECOVER_FATAL flags to
.IR \\*(Va (3)).
Alternatively, if recovery is not required because no database state is
maintained across failures,
it is possible to clean up a \\$1 by removing all of the
files in the directory specified to the
.I \\*(Vo
.Al function, method,
as \\$1 files are never created in any directory other than the one
specified to
.IR \\*(Vo .
Note, however,
that this has the potential to remove files created by the other DB
subsystems in this database environment.
.PP
.Rt \\*(Vu
.rm Va
.rm Vo
.rm Vu
.rm Vc
..
.\" Signal paragraph for standard utilities.
.\" This macro takes one argument:
.\"	+ the utility name.
.de Si
The
.I \\$1
utility attaches to DB shared memory regions.
In order to avoid region corruption,
it should always be given the chance to detach and exit gracefully.
To cause
.I \\$1
to clean up after itself and exit,
send it an interrupt signal (SIGINT).
..
.\" Logging paragraph for standard utilities.
.\" This macro takes one argument:
.\"	+ the utility name.
.de Pi
.B \-L
Log the execution of the \\$1 utility to the specified file in the
following format, where ``###'' is the process ID, and the date is
the time the utility starting running.
.sp
\\$1: ### Wed Jun 15 01:23:45 EDT 1995
.sp
This file will be removed if the \\$1 utility exits gracefully.
..
.\" Malloc paragraph.
.\" This macro takes one argument:
.\"	+ the allocated object
.de Ma
.if !'\*[TYPE]'JAVA'\{\
\\$1 are created in allocated memory.
If
.I db_malloc
is non-NULL,
it is called to allocate the memory,
otherwise,
the library function
.IR malloc (3)
is used.
The function
.I db_malloc
must match the calling conventions of the
.IR malloc (3)
library routine.
Regardless,
the caller is responsible for deallocating the returned memory.
To deallocate the returned memory,
free each returned memory pointer;
pointers inside the memory do not need to be individually freed.
\}
..
.\" Underlying function paragraph.
.\" This macro takes two arguments:
.\"	+ the function name
.\"	+ the utility name
.de Uf
The
.I \\$1
.Al function method
is the underlying function used by the
.IR \\$2 (1)
utility.
See the source code for the
.I \\$2
utility for an example of using
.I \\$1
in a UNIX environment.
..
.\" Underlying function paragraph, for C++.
.\" This macro takes three arguments:
.\"	+ the C++ method name
.\"	+ the function name for C
.\"	+ the utility name
.de Ux
The
.I \\$1
method is based on the C
.I \\$2
function, which
is the underlying function used by the
.IR \\$3 (1)
utility.
See the source code for the
.I \\$3
utility for an example of using
.I \\$2
in a UNIX environment.
..
.TH DB_TXN 3 "May 10, 1998"
.UC 7
.SH NAME
db_txn \- DB transaction management
.SH SYNOPSIS
.nf
.ft B
#include <db.h>

int
txn_open(const char *dir,
.ti +5
u_int32_t flags, int mode, DB_ENV *dbenv, DB_TXNMGR **regionp);

int
txn_begin(DB_TXNMGR *txnp, DB_TXN *pid, DB_TXN **tid);

int
txn_prepare(DB_TXN *tid);

int
txn_commit(DB_TXN *tid);

int
txn_abort(DB_TXN *tid);

u_int32_t
txn_id(DB_TXN *tid);

int
txn_checkpoint(const DB_TXNMGR *txnp, u_int32_t kbyte, u_int32_t min);

int
txn_close(DB_TXNMGR *txnp);

int
txn_unlink(const char *dir, int force, DB_ENV *dbenv);

int
txn_stat(DB_TXNMGR *txnp,
.ti +5
DB_TXN_STAT **statp, void *(*db_malloc)(size_t));
.ft R
.fi
.SH DESCRIPTION
.Gn
.PP
This manual page describes the specific details of the DB transaction
support.
.PP
The
.I db_txn
functions are the library interface that provides transaction semantics.
Full transaction support is provided by a collection of modules that
provide interfaces to the services required for transaction processing.
These services are recovery (see
.IR db_log (3)),
concurrency control (see
.IR db_lock (3)),
and the management of shared data (see
.IR db_mpool (3)).
Transaction semantics can be applied to the access methods described in
.IR db_open (3)
through function call parameters.
.PP
The model intended for transactional use (and the one that is used by
the access methods) is write-ahead logging provided by
.IR db_log (3)
to record both before- and after-images.
Locking follows a two-phase protocol, with all locks being released
at transaction commit.
.PP
.Co "transaction region" txn
.PP
.Fm
.Ft txn_open DB_TXNMGR
.TP 5
DB_TXN_NOSYNC
On transaction commit, do not synchronously flush the log.
This means that transactions exhibit the ACI (atomicity,
consistency and isolation) properties, but not D (durability), i.e.,
database integrity will be maintained but it is possible that some number
of the most recently committed transactions may be undone during recovery
instead of being redone.
.sp
The number of transactions that are potentially at risk is governed by
how often the log is checkpointed (see
.IR db_checkpoint (1))
and how many log updates can fit on a single log page.
.PP
.Mo "transaction subsystem"
.PP
The transaction subsystem is configured
.En "txn_open" "txn_close"
.TP 5
.Se
.TP 5
DB_LOG *lg_info;
The logging region that is being used for this transaction environment.
The
.I lg_info
field contains a return value from the function
.IR log_open .
.ft B
Logging is required for transaction environments,
and it is an error to not specify a logging region.
.ft R
.TP 5
DB_LOCKTAB *lk_info;
The locking region that is being used for this transaction environment.
The
.I lk_info
field contains a return value from the function
.IR lock_open .
If
.I lk_info
is NULL, no locking is done in this transaction environment.
.TP 5
u_int32_t tx_max;
The maximum number of simultaneous transactions that are supported.
This bounds the size of backing files and is used to derive limits for
the size of the lock region and logfiles.
When there are more than
.I tx_max
concurrent transactions, calls to
.I txn_begin
may cause backing files to grow.
If
.I tx_max
is 0, a default value is used.
.TP 5
int (*tx_recover)(DB_LOG *logp, DBT *log_rec,
.ti +5
DB_LSN *lsnp, int redo, void *info);
.br
A function that is called by
.I txn_abort
during transaction abort.
This function takes five arguments:
.RS
.TP 5
logp
A pointer to the transaction log (DB_LOG *).
.TP 5
log_rec
A log record.
.TP 5
lsnp
A pointer to a log sequence number (DB_LSN *).
.TP 5
redo
An integer value that is set to one of the following values:
.RS
.TP 5
DB_TXN_BACKWARD_ROLL
The log is being read backward to determine which transactions have been
committed and which transactions were not (and should therefore be aborted
during recovery).
.TP 5
DB_TXN_FORWARD_ROLL
The log is being played forward, any transaction ids encountered that
have not been entered into the list referenced by
.I info
should be ignored.
.TP 5
DB_TXN_OPENFILES
The log is being read to open all the files required to perform recovery.
.TP 5
DB_TXN_REDO
Redo the operation described by the log record.
.TP 5
DB_TXN_UNDO
Undo the operation described by the log record.
.RE
.TP 5
info
An opaque pointer used to reference the list of transaction IDs encountered
during recovery.
.RE
.IP
If
.I recover
is NULL,
the default is that only DB access method operations are transaction
protected,
and the default recover function will be used.
.PP
.Rt txn_open
.PP
.Fn txn_begin
The
.I txn_begin
function creates a new transaction in the designated transaction manager,
copying a pointer to a DB_TXN that uniquely identifies it into the memory
referenced by
.IR tid .
If the
.I pid
argument is non-NULL,
the new transaction is a nested transaction with the transaction indicated by
.I pid
as its parent.
.PP
Transactions may not span threads, i.e.,
each transaction must begin and end in the same thread,
and each transaction may only be used by a single thread.
.PP
.Rt txn_begin
.PP
.Fn txn_prepare
The
.I txn_prepare
function initiates the beginning of a two phase commit.
In a distributed transaction environment,
.I db
can be used as a local transaction manager.
In this case,
the distributed transaction manager must send
.I prepare
messages to each local manager.
The local manager must then issue a
.I txn_prepare
and await its successful return before responding to the distributed
transaction manager.
Only after the distributed transaction manager receives successful
responses from all of its
.I prepare
messages should it issue any
.I commit
messages.
.PP
.Rt txn_prepare
.PP
.Fn txn_commit
The
.I txn_commit
function ends the transaction specified by the
.I tid
argument.
If DB_TXN_NOSYNC was not specified, a commit log record is written and
flushed to disk, as are all previously written log records.
If the transaction is nested, its locks are acquired by the parent
transaction, otherwise its locks are released.
Any applications that require strict two-phase locking must not
release any locks explicitly, leaving them all to be released by
.IR txn_commit .
.PP
.Rt txn_commit
.PP
.Fn txn_abort
The
.I txn_abort
function causes an abnormal termination of the transaction.
The log is played backwards and any necessary recovery operations are
initiated through the
.I recover
function specified to
.IR txn_open .
After recovery is completed, all locks held by the transaction are acquired
by the parent transaction in the case of a nested transaction or released
in the case of a non-nested transaction.
As is the case for
.IR txn_commit ,
applications that require strict two phase locking should not explicitly
release any locks.
.PP
.Rt txn_abort
.PP
.Fn txn_id
The
.I txn_id
function returns the unique transaction id associated with the specified
transaction.
Locking calls made on behalf of this transaction should use the value
returned from
.I txn_id
as the locker parameter to the
.I lock_get
or
.I lock_vec
calls.
.PP
.Fn txn_close
The
.I txn_close
function detaches a process from the transaction environment specified
by the DB_TXNMGR pointer.
All mapped regions are unmapped and any allocated resources are freed.
Any uncommitted transactions are aborted.
.PP
.Cc txn
.PP
When multiple threads are using the DB_TXNMGR handle concurrently,
only a single thread may call the
.I txn_close
function.
.PP
.Rt txn_close
.PP
.Un "transaction region" txn
.PP
.Fn txn_checkpoint
The
.I txn_checkpoint
function syncs the underlying memory pool,
writes a checkpoint record to the log and then flushes the log.
.PP
If either
.I kbyte
or
.I min
is non-zero,
the checkpoint is only done if more than
.I min
minutes have passed since the last checkpoint,
or if more than
.I kbyte
kilobytes of log data have been written since the last checkpoint.
.PP
.Rc txn_checkpoint
and DB_INCOMPLETE if there were pages that needed to be written but that
.IR memp_sync (3)
was unable to write immediately.
In this case, the
.I txn_checkpoint
call should be retried.
.PP
.Uf txn_checkpoint db_checkpoint
.PP
.Fn txn_stat
The
.I txn_stat
function creates a statistical structure and copies a pointer to it into
the user-specified memory location.
.PP
.Ma "Statistical structure"
.PP
The transaction region statistics are stored in a structure of type
DB_TXN_STAT (typedef'd in <db.h>).
The following DB_TXN_STAT fields will be filled in:
.TP 5
u_int32_t st_refcnt;
The number of references to the region.
.Nt
u_int32_t st_regsize;
The size of the region.
.Nt
DB_LSN st_last_ckp;
The LSN of the last checkpoint.
.Nt
DB_LSN st_pending_ckp;
The LSN of any checkpoint that is currently in progress.
If
.I st_pending_ckp
is the same as
.I st_last_ckp
there is no checkpoint in progress.
.Nt
time_t st_time_ckp;
The time the last completed checkpoint finished (as returned by
.IR time (2)).
.Nt
u_int32_t st_last_txnid;
The last transaction ID allocated.
.Nt
u_int32_t st_maxtxns;
The maximum number of active transactions supported by the region.
.Nt
u_int32_t st_naborts;
The number of transactions that have aborted.
.Nt
u_int32_t st_nactive;
The number of transactions that are currently active.
.Nt
u_int32_t st_nbegins;
The number of transactions that have begun.
.Nt
u_int32_t st_ncommits;
The number of transactions that have committed.
.Nt
u_int32_t st_region_wait;
The number of times that a thread of control was forced to wait before
obtaining the region lock.
.Nt
u_int32_t st_region_nowait;
The number of times that a thread of control was able to obtain
the region lock without waiting.
.Nt
DB_TXN_ACTIVE *st_txnarray;
A pointer to an array of
.I st_nactive
DB_TXN_ACTIVE structures, describing the currently active transactions.
The following fields of the DB_TXN_ACTIVE structure (typedef'd in <db.h>)
will be filled in:
.sp
.RS
.TP 5
u_int32_t txnid;
The transaction ID as returned by
.IR txn_begin (3).
.Nt
DB_LSN lsn;
The LSN of the transaction-begin record.
.RE
.PP
.SH "TRANSACTIONS
Creating transaction protected applications using the DB access methods
requires little system customization.
In most cases,
the default parameters to the locking, logging, memory pool,
and transaction subsystems will suffice.
Applications can use
.IR db_appinit (3)
to perform this initialization, or they may do it explicitly.
.PP
Each database operation (i.e., any call to a function underlying the
handles returned by
.IR db_open (3)
and
.IR db_cursor (3))
is normally performed on behalf of a unique locker.
If multiple calls on behalf of the same locker are desired,
then transactions must be used.
.PP
Once the application has initialized the DB subsystems that it is using,
it may open the DB access method databases.
For applications performing transactions,
the databases must be opened after subsystem initialization,
and cannot be opened as part of a transaction.
Once the databases are opened, the application can group sets of
operations into transactions, by surrounding the operations
with the appropriate
.IR txn_begin ,
.I txn_commit
and
.I txn_abort
calls.
Databases accessed by a transaction must not be closed
during the transaction.
Note,
it is not necessary to transaction protect read-only transactions,
unless those transactions require repeatable reads.
.PP
The DB access methods will make the appropriate calls into the
lock, log and memory pool subsystems in order to guarantee that
transaction semantics are applied.
When the application is ready to exit, all outstanding transactions
should have been committed or aborted.
At this point, all open DB files should be closed.
Once the DB database files are closed,
the DB subsystems should be closed,
either explicitly or by calling
.IR db_appexit (3).
.PP
It is also possible to use the locking, logging and transaction subsystems
of DB to provide transaction semantics to objects other than those described
by the DB access methods.
In these cases, the application will need more explicit customization of
the subsystems as well as the development of appropriate
data-structure-specific recovery functions.
.PP
For example, consider an application that provides transaction semantics
to data stored in plain UNIX files accessed using the
.IR read (2)
and
.IR write (2)
system calls.
The operations for which transaction protection is desired are bracketed
by calls to
.I txn_begin
and
.IR txn_commit .
.PP
Before data are referenced,
the application must make a call to the lock manager,
.IR db_lock ,
for a lock of the appropriate type (e.g., read)
on the object being locked.
The object might be a page in the file, a byte, a range of bytes,
or some key.
It is up to the application to ensure that appropriate locks are acquired.
Before a write is performed, the application should acquire a write
lock on the object, by making an appropriate call to the lock
manager,
.IR db_lock .
Then, the application should make a call to the
log manager,
.IR db_log ,
to record enough information to redo the operation in case of
failure after commit and to undo the operation in case of abort.
As discussed in the
.IR db_log (3)
manual page,
the application is responsible for providing any necessary structure
to the log record.
For example, the application must understand what part of the log
record is an operation code, what part identifies the file being
modified, what part is redo information, and what
part is undo information.
.PP
After the log message is written, the application may issue the write system call.
After all requests are issued, the application may call
.IR txn_commit .
When
.I txn_commit
returns, the caller is guaranteed that all necessary log writes have
been written to disk.
.PP
At any time, the application may call
.IR txn_abort ,
which will result in the appropriate calls to the
.I recover
function to restore the ``database'' to a consistent pre-transaction
state.
(The recover function must be able to either re-apply or undo the update
depending on the context, for each different type of log record.)
.PP
If the application should crash, the recovery process uses the
.I db_log
interface to read the log and call the
.I recover
function to restore the database to a consistent state.
.PP
The
.I txn_prepare
function provides the core functionality to implement distributed
transactions,
but it does not manage the notification of distributed transaction managers.
The caller is responsible for issuing
.I txn_prepare
calls to all sites participating in the transaction.
If all responses are positive, the caller can issue a
.IR txn_commit .
If any of the responses are negative, the caller should issue a
.IR txn_abort .
In general, the
.I txn_prepare
call requires that the transaction log be flushed to disk.
.\"
.\" See the file LICENSE for redistribution information.
.\"
.\" Copyright (c) 1998
.\"	Sleepycat Software.  All rights reserved.
.\"
.\"	@(#)limits.so	8.1 (Sleepycat) 5/3/98
.\"
.de Ll
.SH "LOG FILE LIMITS
Log file sizes impose a time limit on the length of time a database
may be accessed under transaction protection, before it needs to be
dumped and reloaded (see
.IR db_dump(3)
and
.IR db_load(3)).
Unfortunately, the limits are potentially difficult to calculate.
.PP
The log file name consists of "log." followed by 5 digits, resulting
in a maximum of 99,999 log files.
Consider an application performing 600 transactions per second, for
15 hours a day, logged into 10Mb log files, where each transaction
is logging approximately 100 bytes of data.  The calculation:
.PP
.nf
.RS
(10 * 2^20 * 99999) /
.ti +5
(600 * 60 * 60 * 15 * 100) = 323.63
.RE
.fi
.PP
indicates that the system will run out of log file space in
roughly 324 days.
If we increase the maximum size of the files from 10Mb to 100Mb,
the same calculation indicates that the application will run out
of log file space in roughly 9 years.
.PP
There is no way to reset the log file name space in Berkeley DB.
If your application is reaching the end of its log file name space,
you should:
.TP 5
1.
Archive your databases as if to prepare for catastrophic failure (see
.IR db_archive (1)
for more information).
.TP 5
2.
Dump and re-load
.B all
your databases (see
.IR db_dump (1)
and
.IR db_load (1)
for more information).
.TP 5
3.
Remove all of the log files from the database environment (see
.IR db_archive (1)
for more information).
.TP 5
4.
Restart your applications.
..
.de Tl
.SH "TRANSACTION ID LIMITS
The transaction ID space in Berkeley DB is 2^31, or 2 billion entries.
It is possible that some environments may need to be aware of this
limitation.
Consider an application performing 600 transactions a second for 15
hours a day.
The transaction ID space will run out in roughly 66 days:
.PP
.nf
.RS
2^31 / (600 * 15 * 60 * 60) = 66
.RE
.fi
.PP
Doing only 100 transactions a second exhausts the transaction ID space
in roughly one year.
.PP
The transaction ID space is reset each time recovery is run.
If you reach the end of your transaction ID space,
shut down your applications and restart them after running recovery (see
.IR db_recover (1)
for more information).
The most recently allocated transaction ID is the
.I st_last_txnid
value in the transaction statistics information, and is displayed by the
.IR db_stat (1)
utility.
..
.Tl
.SH "ENVIRONMENT VARIABLES"
The following environment variables affect the execution of
.IR db_txn :
.TP 5
.Eh txn_open
.TP 5
.Ev "transaction region" txn
.SH ERRORS
.Ee txn_open
.na
.Nh
close(2), 
db_version(3), 
fcntl(2), 
fflush(3), 
lseek(2), 
malloc(3), 
memcpy(3), 
memset(3), 
mmap(2), 
munmap(2), 
open(2), 
sigfillset(3), 
sigprocmask(2), 
stat(2), 
strcpy(3), 
strdup(3), 
strerror(3), 
strlen(3), 
time(3), 
txn_unlink(3), 
unlink(2), 
and
write(2). 
.Hy
.ad
.PP
.Ec txn_open
.TP 5
.Ei
.sp
.Et
.sp
The
.I dbenv
parameter was NULL.
.TP 5
.Em
.PP
.Ee txn_begin
.na
.Nh
fcntl(2), 
fflush(3), 
log_put(3), 
lseek(2), 
malloc(3), 
memcpy(3), 
memset(3), 
mmap(2), 
munmap(2), 
strerror(3), 
and
write(2). 
.Hy
.ad
.PP
.Ec txn_begin
.TP 5
[ENOSPC]
The maximum number of concurrent transactions has been reached.
.PP
.Ee txn_prepare
.na
.Nh
fcntl(2), 
fflush(3), 
log_flush(3), 
and
strerror(3). 
.Hy
.ad
.PP
.Ee txn_commit
.na
.Nh
fcntl(2), 
fflush(3), 
lock_vec(3), 
log_put(3), 
malloc(3), 
memcpy(3), 
and
strerror(3). 
.Hy
.ad
.PP
.Ec txn_commit
.TP 5
[EINVAL]
The transaction was aborted.
.PP
.Ee txn_abort
.na
.Nh
DBenv->tx_recover(3), 
fcntl(2), 
fflush(3), 
lock_vec(3), 
log_get(3), 
memset(3), 
and
strerror(3). 
.Hy
.ad
.TP 5
[EINVAL]
The transaction was already aborted.
.PP
.Ee txn_checkpoint
.na
.Nh
fcntl(2), 
fflush(3), 
log_compare(3), 
log_put(3), 
malloc(3), 
memcpy(3), 
memp_sync(3), 
memset(3), 
strerror(3), 
and
time(3). 
.Hy
.ad
.TP 5
.Ei
.PP
.Ee txn_close
.na
.Nh
close(2), 
fcntl(2), 
fflush(3), 
log_flush(3), 
munmap(2), 
strerror(3), 
and
txn_abort(3). 
.Hy
.ad
.PP
.Ee txn_unlink
.na
.Nh
close(2), 
fcntl(2), 
fflush(3), 
malloc(3), 
memcpy(3), 
memset(3), 
mmap(2), 
munmap(2), 
open(2), 
sigfillset(3), 
sigprocmask(2), 
stat(2), 
strcpy(3), 
strdup(3), 
strerror(3), 
strlen(3), 
and
unlink(2). 
.Hy
.ad
.PP
.Ec txn_unlink
.TP 5
.Eb
.PP
.Ee txn_stat
.na
.Nh
fcntl(2), 
and
malloc(3). 
.Hy
.ad
.SH "SEE ALSO"
.IR "LIBTP: Portable, Modular Transactions for UNIX" ,
Margo Seltzer, Michael Olson, USENIX proceedings, Winter 1992.
.SH BUGS
Nested transactions are not yet implemented.
.sp
.Sa