Added userspace db files.

This commit is contained in:
Jennifer Averett 2012-09-12 10:59:41 -05:00
parent a4e4484df6
commit e4e17463bb
30 changed files with 7083 additions and 0 deletions

View File

@ -16,6 +16,9 @@ CFLAGS += -I../services/librpc/include
CFLAGS += -I$(INSTALL_BASE)/include
#Only Needed for db files
CFLAGS += -D__DBINTERFACE_PRIVATE
CFLAGS += -w
CFLAGS += -std=gnu99
CFLAGS += -MT $@ -MD -MP -MF $(basename $@).d
@ -87,6 +90,32 @@ C_FILES += lib/libc/isc/ev_timers.c
C_FILES += lib/libc/stdio/fgetln.c
C_FILES += lib/libc/db/db/db.c
C_FILES += lib/libc/db/btree/bt_close.c
C_FILES += lib/libc/db/btree/bt_get.c
C_FILES += lib/libc/db/btree/bt_put.c
C_FILES += lib/libc/db/btree/bt_utils.c
C_FILES += lib/libc/db/btree/bt_conv.c
C_FILES += lib/libc/db/btree/bt_open.c
C_FILES += lib/libc/db/btree/bt_search.c
C_FILES += lib/libc/db/btree/bt_debug.c
C_FILES += lib/libc/db/btree/bt_overflow.c
C_FILES += lib/libc/db/btree/bt_seq.c
C_FILES += lib/libc/db/btree/bt_delete.c
C_FILES += lib/libc/db/btree/bt_page.c
C_FILES += lib/libc/db/btree/bt_split.c
C_FILES += lib/libc/db/recno/rec_close.c
C_FILES += lib/libc/db/recno/rec_get.c
C_FILES += lib/libc/db/recno/rec_put.c
C_FILES += lib/libc/db/recno/rec_seq.c
C_FILES += lib/libc/db/recno/rec_delete.c
C_FILES += lib/libc/db/recno/rec_open.c
C_FILES += lib/libc/db/recno/rec_search.c
C_FILES += lib/libc/db/recno/rec_utils.c
C_FILES += lib/libc/db/mpool/mpool.c
# RTEMS Specific Files
# C_FILES += rtems/rtems-net-setup.c
C_FILES += rtems/syslog.c

View File

@ -0,0 +1,113 @@
/*-
* Copyright (c) 1991, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)mpool.h 8.4 (Berkeley) 11/2/95
* $FreeBSD$
*/
#ifndef _MPOOL_H_
#define _MPOOL_H_
#include <sys/queue.h>
/*
* The memory pool scheme is a simple one. Each in-memory page is referenced
* by a bucket which is threaded in up to two of three ways. All active pages
* are threaded on a hash chain (hashed by page number) and an lru chain.
* Inactive pages are threaded on a free chain. Each reference to a memory
* pool is handed an opaque MPOOL cookie which stores all of this information.
*/
#define HASHSIZE 128
#define HASHKEY(pgno) ((pgno - 1 + HASHSIZE) % HASHSIZE)
/* The BKT structures are the elements of the queues. */
typedef struct _bkt {
TAILQ_ENTRY(_bkt) hq; /* hash queue */
TAILQ_ENTRY(_bkt) q; /* lru queue */
void *page; /* page */
pgno_t pgno; /* page number */
#define MPOOL_DIRTY 0x01 /* page needs to be written */
#define MPOOL_PINNED 0x02 /* page is pinned into memory */
#define MPOOL_INUSE 0x04 /* page address is valid */
u_int8_t flags; /* flags */
} BKT;
typedef struct MPOOL {
TAILQ_HEAD(_lqh, _bkt) lqh; /* lru queue head */
/* hash queue array */
TAILQ_HEAD(_hqh, _bkt) hqh[HASHSIZE];
pgno_t curcache; /* current number of cached pages */
pgno_t maxcache; /* max number of cached pages */
pgno_t npages; /* number of pages in the file */
unsigned long pagesize; /* file page size */
int fd; /* file descriptor */
/* page in conversion routine */
void (*pgin)(void *, pgno_t, void *);
/* page out conversion routine */
void (*pgout)(void *, pgno_t, void *);
void *pgcookie; /* cookie for page in/out routines */
#ifdef STATISTICS
unsigned long cachehit;
unsigned long cachemiss;
unsigned long pagealloc;
unsigned long pageflush;
unsigned long pageget;
unsigned long pagenew;
unsigned long pageput;
unsigned long pageread;
unsigned long pagewrite;
#endif
} MPOOL;
#define MPOOL_IGNOREPIN 0x01 /* Ignore if the page is pinned. */
#define MPOOL_PAGE_REQUEST 0x01 /* Allocate a new page with a
specific page number. */
#define MPOOL_PAGE_NEXT 0x02 /* Allocate a new page with the next
page number. */
__BEGIN_DECLS
MPOOL *mpool_open(void *, int, pgno_t, pgno_t);
void mpool_filter(MPOOL *, void (*)(void *, pgno_t, void *),
void (*)(void *, pgno_t, void *), void *);
void *mpool_new(MPOOL *, pgno_t *, unsigned int);
void *mpool_get(MPOOL *, pgno_t, unsigned int);
int mpool_delete(MPOOL *, void *);
int mpool_put(MPOOL *, void *, unsigned int);
int mpool_sync(MPOOL *);
int mpool_close(MPOOL *);
#ifdef STATISTICS
void mpool_stat(MPOOL *);
#endif
__END_DECLS
#endif

View File

@ -0,0 +1,180 @@
#include "port_before.h"
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)bt_close.c 8.7 (Berkeley) 8/17/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "namespace.h"
#include <sys/param.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "un-namespace.h"
#include <db.h>
#include "btree.h"
static int bt_meta(BTREE *);
/*
* BT_CLOSE -- Close a btree.
*
* Parameters:
* dbp: pointer to access method
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
int
__bt_close(DB *dbp)
{
BTREE *t;
int fd;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
/* Sync the tree. */
if (__bt_sync(dbp, 0) == RET_ERROR)
return (RET_ERROR);
/* Close the memory pool. */
if (mpool_close(t->bt_mp) == RET_ERROR)
return (RET_ERROR);
/* Free random memory. */
if (t->bt_cursor.key.data != NULL) {
free(t->bt_cursor.key.data);
t->bt_cursor.key.size = 0;
t->bt_cursor.key.data = NULL;
}
if (t->bt_rkey.data) {
free(t->bt_rkey.data);
t->bt_rkey.size = 0;
t->bt_rkey.data = NULL;
}
if (t->bt_rdata.data) {
free(t->bt_rdata.data);
t->bt_rdata.size = 0;
t->bt_rdata.data = NULL;
}
fd = t->bt_fd;
free(t);
free(dbp);
return (_close(fd) ? RET_ERROR : RET_SUCCESS);
}
/*
* BT_SYNC -- sync the btree to disk.
*
* Parameters:
* dbp: pointer to access method
*
* Returns:
* RET_SUCCESS, RET_ERROR.
*/
int
__bt_sync(const DB *dbp, u_int flags)
{
BTREE *t;
int status;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
/* Sync doesn't currently take any flags. */
if (flags != 0) {
errno = EINVAL;
return (RET_ERROR);
}
if (F_ISSET(t, B_INMEM | B_RDONLY) || !F_ISSET(t, B_MODIFIED))
return (RET_SUCCESS);
if (F_ISSET(t, B_METADIRTY) && bt_meta(t) == RET_ERROR)
return (RET_ERROR);
if ((status = mpool_sync(t->bt_mp)) == RET_SUCCESS)
F_CLR(t, B_MODIFIED);
return (status);
}
/*
* BT_META -- write the tree meta data to disk.
*
* Parameters:
* t: tree
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
static int
bt_meta(BTREE *t)
{
BTMETA m;
void *p;
if ((p = mpool_get(t->bt_mp, P_META, 0)) == NULL)
return (RET_ERROR);
/* Fill in metadata. */
m.magic = BTREEMAGIC;
m.version = BTREEVERSION;
m.psize = t->bt_psize;
m.free = t->bt_free;
m.nrecs = t->bt_nrecs;
m.flags = F_ISSET(t, SAVEMETA);
memmove(p, &m, sizeof(BTMETA));
mpool_put(t->bt_mp, p, MPOOL_DIRTY);
return (RET_SUCCESS);
}

View File

@ -0,0 +1,214 @@
#include "port_before.h"
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)bt_conv.c 8.5 (Berkeley) 8/17/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <stdio.h>
#include <db.h>
#include "btree.h"
static void mswap(PAGE *);
/*
* __BT_BPGIN, __BT_BPGOUT --
* Convert host-specific number layout to/from the host-independent
* format stored on disk.
*
* Parameters:
* t: tree
* pg: page number
* h: page to convert
*/
void
__bt_pgin(void *t, pgno_t pg, void *pp)
{
PAGE *h;
indx_t i, top;
u_char flags;
char *p;
if (!F_ISSET(((BTREE *)t), B_NEEDSWAP))
return;
if (pg == P_META) {
mswap(pp);
return;
}
h = pp;
M_32_SWAP(h->pgno);
M_32_SWAP(h->prevpg);
M_32_SWAP(h->nextpg);
M_32_SWAP(h->flags);
M_16_SWAP(h->lower);
M_16_SWAP(h->upper);
top = NEXTINDEX(h);
if ((h->flags & P_TYPE) == P_BINTERNAL)
for (i = 0; i < top; i++) {
M_16_SWAP(h->linp[i]);
p = (char *)GETBINTERNAL(h, i);
P_32_SWAP(p);
p += sizeof(u_int32_t);
P_32_SWAP(p);
p += sizeof(pgno_t);
if (*(u_char *)p & P_BIGKEY) {
p += sizeof(u_char);
P_32_SWAP(p);
p += sizeof(pgno_t);
P_32_SWAP(p);
}
}
else if ((h->flags & P_TYPE) == P_BLEAF)
for (i = 0; i < top; i++) {
M_16_SWAP(h->linp[i]);
p = (char *)GETBLEAF(h, i);
P_32_SWAP(p);
p += sizeof(u_int32_t);
P_32_SWAP(p);
p += sizeof(u_int32_t);
flags = *(u_char *)p;
if (flags & (P_BIGKEY | P_BIGDATA)) {
p += sizeof(u_char);
if (flags & P_BIGKEY) {
P_32_SWAP(p);
p += sizeof(pgno_t);
P_32_SWAP(p);
}
if (flags & P_BIGDATA) {
p += sizeof(u_int32_t);
P_32_SWAP(p);
p += sizeof(pgno_t);
P_32_SWAP(p);
}
}
}
}
void
__bt_pgout(void *t, pgno_t pg, void *pp)
{
PAGE *h;
indx_t i, top;
u_char flags;
char *p;
if (!F_ISSET(((BTREE *)t), B_NEEDSWAP))
return;
if (pg == P_META) {
mswap(pp);
return;
}
h = pp;
top = NEXTINDEX(h);
if ((h->flags & P_TYPE) == P_BINTERNAL)
for (i = 0; i < top; i++) {
p = (char *)GETBINTERNAL(h, i);
P_32_SWAP(p);
p += sizeof(u_int32_t);
P_32_SWAP(p);
p += sizeof(pgno_t);
if (*(u_char *)p & P_BIGKEY) {
p += sizeof(u_char);
P_32_SWAP(p);
p += sizeof(pgno_t);
P_32_SWAP(p);
}
M_16_SWAP(h->linp[i]);
}
else if ((h->flags & P_TYPE) == P_BLEAF)
for (i = 0; i < top; i++) {
p = (char *)GETBLEAF(h, i);
P_32_SWAP(p);
p += sizeof(u_int32_t);
P_32_SWAP(p);
p += sizeof(u_int32_t);
flags = *(u_char *)p;
if (flags & (P_BIGKEY | P_BIGDATA)) {
p += sizeof(u_char);
if (flags & P_BIGKEY) {
P_32_SWAP(p);
p += sizeof(pgno_t);
P_32_SWAP(p);
}
if (flags & P_BIGDATA) {
p += sizeof(u_int32_t);
P_32_SWAP(p);
p += sizeof(pgno_t);
P_32_SWAP(p);
}
}
M_16_SWAP(h->linp[i]);
}
M_32_SWAP(h->pgno);
M_32_SWAP(h->prevpg);
M_32_SWAP(h->nextpg);
M_32_SWAP(h->flags);
M_16_SWAP(h->lower);
M_16_SWAP(h->upper);
}
/*
* MSWAP -- Actually swap the bytes on the meta page.
*
* Parameters:
* p: page to convert
*/
static void
mswap(PAGE *pg)
{
char *p;
p = (char *)pg;
P_32_SWAP(p); /* magic */
p += sizeof(u_int32_t);
P_32_SWAP(p); /* version */
p += sizeof(u_int32_t);
P_32_SWAP(p); /* psize */
p += sizeof(u_int32_t);
P_32_SWAP(p); /* free */
p += sizeof(u_int32_t);
P_32_SWAP(p); /* nrecs */
p += sizeof(u_int32_t);
P_32_SWAP(p); /* flags */
p += sizeof(u_int32_t);
}

View File

@ -0,0 +1,318 @@
#include "port_before.h"
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)bt_debug.c 8.5 (Berkeley) 8/17/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <db.h>
#include "btree.h"
#ifdef DEBUG
/*
* BT_DUMP -- Dump the tree
*
* Parameters:
* dbp: pointer to the DB
*/
void
__bt_dump(DB *dbp)
{
BTREE *t;
PAGE *h;
pgno_t i;
char *sep;
t = dbp->internal;
(void)fprintf(stderr, "%s: pgsz %u",
F_ISSET(t, B_INMEM) ? "memory" : "disk", t->bt_psize);
if (F_ISSET(t, R_RECNO))
(void)fprintf(stderr, " keys %u", t->bt_nrecs);
#undef X
#define X(flag, name) \
if (F_ISSET(t, flag)) { \
(void)fprintf(stderr, "%s%s", sep, name); \
sep = ", "; \
}
if (t->flags != 0) {
sep = " flags (";
X(R_FIXLEN, "FIXLEN");
X(B_INMEM, "INMEM");
X(B_NODUPS, "NODUPS");
X(B_RDONLY, "RDONLY");
X(R_RECNO, "RECNO");
X(B_METADIRTY,"METADIRTY");
(void)fprintf(stderr, ")\n");
}
#undef X
for (i = P_ROOT;
(h = mpool_get(t->bt_mp, i, MPOOL_IGNOREPIN)) != NULL; ++i)
__bt_dpage(h);
}
/*
* BT_DMPAGE -- Dump the meta page
*
* Parameters:
* h: pointer to the PAGE
*/
void
__bt_dmpage(PAGE *h)
{
BTMETA *m;
char *sep;
m = (BTMETA *)h;
(void)fprintf(stderr, "magic %x\n", m->magic);
(void)fprintf(stderr, "version %u\n", m->version);
(void)fprintf(stderr, "psize %u\n", m->psize);
(void)fprintf(stderr, "free %u\n", m->free);
(void)fprintf(stderr, "nrecs %u\n", m->nrecs);
(void)fprintf(stderr, "flags %u", m->flags);
#undef X
#define X(flag, name) \
if (m->flags & flag) { \
(void)fprintf(stderr, "%s%s", sep, name); \
sep = ", "; \
}
if (m->flags) {
sep = " (";
X(B_NODUPS, "NODUPS");
X(R_RECNO, "RECNO");
(void)fprintf(stderr, ")");
}
}
/*
* BT_DNPAGE -- Dump the page
*
* Parameters:
* n: page number to dump.
*/
void
__bt_dnpage(DB *dbp, pgno_t pgno)
{
BTREE *t;
PAGE *h;
t = dbp->internal;
if ((h = mpool_get(t->bt_mp, pgno, MPOOL_IGNOREPIN)) != NULL)
__bt_dpage(h);
}
/*
* BT_DPAGE -- Dump the page
*
* Parameters:
* h: pointer to the PAGE
*/
void
__bt_dpage(PAGE *h)
{
BINTERNAL *bi;
BLEAF *bl;
RINTERNAL *ri;
RLEAF *rl;
indx_t cur, top;
char *sep;
(void)fprintf(stderr, " page %u: (", h->pgno);
#undef X
#define X(flag, name) \
if (h->flags & flag) { \
(void)fprintf(stderr, "%s%s", sep, name); \
sep = ", "; \
}
sep = "";
X(P_BINTERNAL, "BINTERNAL") /* types */
X(P_BLEAF, "BLEAF")
X(P_RINTERNAL, "RINTERNAL") /* types */
X(P_RLEAF, "RLEAF")
X(P_OVERFLOW, "OVERFLOW")
X(P_PRESERVE, "PRESERVE");
(void)fprintf(stderr, ")\n");
#undef X
(void)fprintf(stderr, "\tprev %2u next %2u", h->prevpg, h->nextpg);
if (h->flags & P_OVERFLOW)
return;
top = NEXTINDEX(h);
(void)fprintf(stderr, " lower %3d upper %3d nextind %d\n",
h->lower, h->upper, top);
for (cur = 0; cur < top; cur++) {
(void)fprintf(stderr, "\t[%03d] %4d ", cur, h->linp[cur]);
switch (h->flags & P_TYPE) {
case P_BINTERNAL:
bi = GETBINTERNAL(h, cur);
(void)fprintf(stderr,
"size %03d pgno %03d", bi->ksize, bi->pgno);
if (bi->flags & P_BIGKEY)
(void)fprintf(stderr, " (indirect)");
else if (bi->ksize)
(void)fprintf(stderr,
" {%.*s}", (int)bi->ksize, bi->bytes);
break;
case P_RINTERNAL:
ri = GETRINTERNAL(h, cur);
(void)fprintf(stderr, "entries %03d pgno %03d",
ri->nrecs, ri->pgno);
break;
case P_BLEAF:
bl = GETBLEAF(h, cur);
if (bl->flags & P_BIGKEY)
(void)fprintf(stderr,
"big key page %u size %u/",
*(pgno_t *)bl->bytes,
*(u_int32_t *)(bl->bytes + sizeof(pgno_t)));
else if (bl->ksize)
(void)fprintf(stderr, "%.*s/",
bl->ksize, bl->bytes);
if (bl->flags & P_BIGDATA)
(void)fprintf(stderr,
"big data page %u size %u",
*(pgno_t *)(bl->bytes + bl->ksize),
*(u_int32_t *)(bl->bytes + bl->ksize +
sizeof(pgno_t)));
else if (bl->dsize)
(void)fprintf(stderr, "%.*s",
(int)bl->dsize, bl->bytes + bl->ksize);
break;
case P_RLEAF:
rl = GETRLEAF(h, cur);
if (rl->flags & P_BIGDATA)
(void)fprintf(stderr,
"big data page %u size %u",
*(pgno_t *)rl->bytes,
*(u_int32_t *)(rl->bytes + sizeof(pgno_t)));
else if (rl->dsize)
(void)fprintf(stderr,
"%.*s", (int)rl->dsize, rl->bytes);
break;
}
(void)fprintf(stderr, "\n");
}
}
#endif
#ifdef STATISTICS
/*
* BT_STAT -- Gather/print the tree statistics
*
* Parameters:
* dbp: pointer to the DB
*/
void
__bt_stat(DB *dbp)
{
extern u_long bt_cache_hit, bt_cache_miss, bt_pfxsaved, bt_rootsplit;
extern u_long bt_sortsplit, bt_split;
BTREE *t;
PAGE *h;
pgno_t i, pcont, pinternal, pleaf;
u_long ifree, lfree, nkeys;
int levels;
t = dbp->internal;
pcont = pinternal = pleaf = 0;
nkeys = ifree = lfree = 0;
for (i = P_ROOT;
(h = mpool_get(t->bt_mp, i, MPOOL_IGNOREPIN)) != NULL; ++i)
switch (h->flags & P_TYPE) {
case P_BINTERNAL:
case P_RINTERNAL:
++pinternal;
ifree += h->upper - h->lower;
break;
case P_BLEAF:
case P_RLEAF:
++pleaf;
lfree += h->upper - h->lower;
nkeys += NEXTINDEX(h);
break;
case P_OVERFLOW:
++pcont;
break;
}
/* Count the levels of the tree. */
for (i = P_ROOT, levels = 0 ;; ++levels) {
h = mpool_get(t->bt_mp, i, MPOOL_IGNOREPIN);
if (h->flags & (P_BLEAF|P_RLEAF)) {
if (levels == 0)
levels = 1;
break;
}
i = F_ISSET(t, R_RECNO) ?
GETRINTERNAL(h, 0)->pgno :
GETBINTERNAL(h, 0)->pgno;
}
(void)fprintf(stderr, "%d level%s with %lu keys",
levels, levels == 1 ? "" : "s", nkeys);
if (F_ISSET(t, R_RECNO))
(void)fprintf(stderr, " (%u header count)", t->bt_nrecs);
(void)fprintf(stderr,
"\n%u pages (leaf %u, internal %u, overflow %u)\n",
pinternal + pleaf + pcont, pleaf, pinternal, pcont);
(void)fprintf(stderr, "%lu cache hits, %lu cache misses\n",
bt_cache_hit, bt_cache_miss);
(void)fprintf(stderr, "%lu splits (%lu root splits, %lu sort splits)\n",
bt_split, bt_rootsplit, bt_sortsplit);
pleaf *= t->bt_psize - BTDATAOFF;
if (pleaf)
(void)fprintf(stderr,
"%.0f%% leaf fill (%lu bytes used, %lu bytes free)\n",
((double)(pleaf - lfree) / pleaf) * 100,
pleaf - lfree, lfree);
pinternal *= t->bt_psize - BTDATAOFF;
if (pinternal)
(void)fprintf(stderr,
"%.0f%% internal fill (%lu bytes used, %lu bytes free\n",
((double)(pinternal - ifree) / pinternal) * 100,
pinternal - ifree, ifree);
if (bt_pfxsaved)
(void)fprintf(stderr, "prefix checking removed %lu bytes.\n",
bt_pfxsaved);
}
#endif

View File

@ -0,0 +1,637 @@
#include "port_before.h"
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)bt_delete.c 8.13 (Berkeley) 7/28/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <db.h>
#include "btree.h"
static int __bt_bdelete(BTREE *, const DBT *);
static int __bt_curdel(BTREE *, const DBT *, PAGE *, u_int);
static int __bt_pdelete(BTREE *, PAGE *);
static int __bt_relink(BTREE *, PAGE *);
static int __bt_stkacq(BTREE *, PAGE **, CURSOR *);
/*
* __bt_delete
* Delete the item(s) referenced by a key.
*
* Return RET_SPECIAL if the key is not found.
*/
int
__bt_delete(const DB *dbp, const DBT *key, u_int flags)
{
BTREE *t;
CURSOR *c;
PAGE *h;
int status;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
/* Check for change to a read-only tree. */
if (F_ISSET(t, B_RDONLY)) {
errno = EPERM;
return (RET_ERROR);
}
switch (flags) {
case 0:
status = __bt_bdelete(t, key);
break;
case R_CURSOR:
/*
* If flags is R_CURSOR, delete the cursor. Must already
* have started a scan and not have already deleted it.
*/
c = &t->bt_cursor;
if (F_ISSET(c, CURS_INIT)) {
if (F_ISSET(c, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE))
return (RET_SPECIAL);
if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
return (RET_ERROR);
/*
* If the page is about to be emptied, we'll need to
* delete it, which means we have to acquire a stack.
*/
if (NEXTINDEX(h) == 1)
if (__bt_stkacq(t, &h, &t->bt_cursor))
return (RET_ERROR);
status = __bt_dleaf(t, NULL, h, c->pg.index);
if (NEXTINDEX(h) == 0 && status == RET_SUCCESS) {
if (__bt_pdelete(t, h))
return (RET_ERROR);
} else
mpool_put(t->bt_mp,
h, status == RET_SUCCESS ? MPOOL_DIRTY : 0);
break;
}
/* FALLTHROUGH */
default:
errno = EINVAL;
return (RET_ERROR);
}
if (status == RET_SUCCESS)
F_SET(t, B_MODIFIED);
return (status);
}
/*
* __bt_stkacq --
* Acquire a stack so we can delete a cursor entry.
*
* Parameters:
* t: tree
* hp: pointer to current, pinned PAGE pointer
* c: pointer to the cursor
*
* Returns:
* 0 on success, 1 on failure
*/
static int
__bt_stkacq(BTREE *t, PAGE **hp, CURSOR *c)
{
BINTERNAL *bi;
EPG *e;
EPGNO *parent;
PAGE *h;
indx_t idx;
pgno_t pgno;
recno_t nextpg, prevpg;
int exact, level;
/*
* Find the first occurrence of the key in the tree. Toss the
* currently locked page so we don't hit an already-locked page.
*/
h = *hp;
mpool_put(t->bt_mp, h, 0);
if ((e = __bt_search(t, &c->key, &exact)) == NULL)
return (1);
h = e->page;
/* See if we got it in one shot. */
if (h->pgno == c->pg.pgno)
goto ret;
/*
* Move right, looking for the page. At each move we have to move
* up the stack until we don't have to move to the next page. If
* we have to change pages at an internal level, we have to fix the
* stack back up.
*/
while (h->pgno != c->pg.pgno) {
if ((nextpg = h->nextpg) == P_INVALID)
break;
mpool_put(t->bt_mp, h, 0);
/* Move up the stack. */
for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
/* Get the parent page. */
if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
return (1);
/* Move to the next index. */
if (parent->index != NEXTINDEX(h) - 1) {
idx = parent->index + 1;
BT_PUSH(t, h->pgno, idx);
break;
}
mpool_put(t->bt_mp, h, 0);
}
/* Restore the stack. */
while (level--) {
/* Push the next level down onto the stack. */
bi = GETBINTERNAL(h, idx);
pgno = bi->pgno;
BT_PUSH(t, pgno, 0);
/* Lose the currently pinned page. */
mpool_put(t->bt_mp, h, 0);
/* Get the next level down. */
if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
return (1);
idx = 0;
}
mpool_put(t->bt_mp, h, 0);
if ((h = mpool_get(t->bt_mp, nextpg, 0)) == NULL)
return (1);
}
if (h->pgno == c->pg.pgno)
goto ret;
/* Reacquire the original stack. */
mpool_put(t->bt_mp, h, 0);
if ((e = __bt_search(t, &c->key, &exact)) == NULL)
return (1);
h = e->page;
/*
* Move left, looking for the page. At each move we have to move
* up the stack until we don't have to change pages to move to the
* next page. If we have to change pages at an internal level, we
* have to fix the stack back up.
*/
while (h->pgno != c->pg.pgno) {
if ((prevpg = h->prevpg) == P_INVALID)
break;
mpool_put(t->bt_mp, h, 0);
/* Move up the stack. */
for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
/* Get the parent page. */
if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
return (1);
/* Move to the next index. */
if (parent->index != 0) {
idx = parent->index - 1;
BT_PUSH(t, h->pgno, idx);
break;
}
mpool_put(t->bt_mp, h, 0);
}
/* Restore the stack. */
while (level--) {
/* Push the next level down onto the stack. */
bi = GETBINTERNAL(h, idx);
pgno = bi->pgno;
/* Lose the currently pinned page. */
mpool_put(t->bt_mp, h, 0);
/* Get the next level down. */
if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
return (1);
idx = NEXTINDEX(h) - 1;
BT_PUSH(t, pgno, idx);
}
mpool_put(t->bt_mp, h, 0);
if ((h = mpool_get(t->bt_mp, prevpg, 0)) == NULL)
return (1);
}
ret: mpool_put(t->bt_mp, h, 0);
return ((*hp = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL);
}
/*
* __bt_bdelete --
* Delete all key/data pairs matching the specified key.
*
* Parameters:
* t: tree
* key: key to delete
*
* Returns:
* RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
*/
static int
__bt_bdelete(BTREE *t, const DBT *key)
{
EPG *e;
PAGE *h;
int deleted, exact, redo;
deleted = 0;
/* Find any matching record; __bt_search pins the page. */
loop: if ((e = __bt_search(t, key, &exact)) == NULL)
return (deleted ? RET_SUCCESS : RET_ERROR);
if (!exact) {
mpool_put(t->bt_mp, e->page, 0);
return (deleted ? RET_SUCCESS : RET_SPECIAL);
}
/*
* Delete forward, then delete backward, from the found key. If
* there are duplicates and we reach either side of the page, do
* the key search again, so that we get them all.
*/
redo = 0;
h = e->page;
do {
if (__bt_dleaf(t, key, h, e->index)) {
mpool_put(t->bt_mp, h, 0);
return (RET_ERROR);
}
if (F_ISSET(t, B_NODUPS)) {
if (NEXTINDEX(h) == 0) {
if (__bt_pdelete(t, h))
return (RET_ERROR);
} else
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
return (RET_SUCCESS);
}
deleted = 1;
} while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0);
/* Check for right-hand edge of the page. */
if (e->index == NEXTINDEX(h))
redo = 1;
/* Delete from the key to the beginning of the page. */
while (e->index-- > 0) {
if (__bt_cmp(t, key, e) != 0)
break;
if (__bt_dleaf(t, key, h, e->index) == RET_ERROR) {
mpool_put(t->bt_mp, h, 0);
return (RET_ERROR);
}
if (e->index == 0)
redo = 1;
}
/* Check for an empty page. */
if (NEXTINDEX(h) == 0) {
if (__bt_pdelete(t, h))
return (RET_ERROR);
goto loop;
}
/* Put the page. */
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
if (redo)
goto loop;
return (RET_SUCCESS);
}
/*
* __bt_pdelete --
* Delete a single page from the tree.
*
* Parameters:
* t: tree
* h: leaf page
*
* Returns:
* RET_SUCCESS, RET_ERROR.
*
* Side-effects:
* mpool_put's the page
*/
static int
__bt_pdelete(BTREE *t, PAGE *h)
{
BINTERNAL *bi;
PAGE *pg;
EPGNO *parent;
indx_t cnt, idx, *ip, offset;
u_int32_t nksize;
char *from;
/*
* Walk the parent page stack -- a LIFO stack of the pages that were
* traversed when we searched for the page where the delete occurred.
* Each stack entry is a page number and a page index offset. The
* offset is for the page traversed on the search. We've just deleted
* a page, so we have to delete the key from the parent page.
*
* If the delete from the parent page makes it empty, this process may
* continue all the way up the tree. We stop if we reach the root page
* (which is never deleted, it's just not worth the effort) or if the
* delete does not empty the page.
*/
while ((parent = BT_POP(t)) != NULL) {
/* Get the parent page. */
if ((pg = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
return (RET_ERROR);
idx = parent->index;
bi = GETBINTERNAL(pg, idx);
/* Free any overflow pages. */
if (bi->flags & P_BIGKEY &&
__ovfl_delete(t, bi->bytes) == RET_ERROR) {
mpool_put(t->bt_mp, pg, 0);
return (RET_ERROR);
}
/*
* Free the parent if it has only the one key and it's not the
* root page. If it's the rootpage, turn it back into an empty
* leaf page.
*/
if (NEXTINDEX(pg) == 1) {
if (pg->pgno == P_ROOT) {
pg->lower = BTDATAOFF;
pg->upper = t->bt_psize;
pg->flags = P_BLEAF;
} else {
if (__bt_relink(t, pg) || __bt_free(t, pg))
return (RET_ERROR);
continue;
}
} else {
/* Pack remaining key items at the end of the page. */
nksize = NBINTERNAL(bi->ksize);
from = (char *)pg + pg->upper;
memmove(from + nksize, from, (char *)bi - from);
pg->upper += nksize;
/* Adjust indices' offsets, shift the indices down. */
offset = pg->linp[idx];
for (cnt = idx, ip = &pg->linp[0]; cnt--; ++ip)
if (ip[0] < offset)
ip[0] += nksize;
for (cnt = NEXTINDEX(pg) - idx; --cnt; ++ip)
ip[0] = ip[1] < offset ? ip[1] + nksize : ip[1];
pg->lower -= sizeof(indx_t);
}
mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
break;
}
/* Free the leaf page, as long as it wasn't the root. */
if (h->pgno == P_ROOT) {
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
return (RET_SUCCESS);
}
return (__bt_relink(t, h) || __bt_free(t, h));
}
/*
* __bt_dleaf --
* Delete a single record from a leaf page.
*
* Parameters:
* t: tree
* key: referenced key
* h: page
* idx: index on page to delete
*
* Returns:
* RET_SUCCESS, RET_ERROR.
*/
int
__bt_dleaf(BTREE *t, const DBT *key, PAGE *h, u_int idx)
{
BLEAF *bl;
indx_t cnt, *ip, offset;
u_int32_t nbytes;
void *to;
char *from;
/* If this record is referenced by the cursor, delete the cursor. */
if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
!F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index == idx &&
__bt_curdel(t, key, h, idx))
return (RET_ERROR);
/* If the entry uses overflow pages, make them available for reuse. */
to = bl = GETBLEAF(h, idx);
if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR)
return (RET_ERROR);
if (bl->flags & P_BIGDATA &&
__ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR)
return (RET_ERROR);
/* Pack the remaining key/data items at the end of the page. */
nbytes = NBLEAF(bl);
from = (char *)h + h->upper;
memmove(from + nbytes, from, (char *)to - from);
h->upper += nbytes;
/* Adjust the indices' offsets, shift the indices down. */
offset = h->linp[idx];
for (cnt = idx, ip = &h->linp[0]; cnt--; ++ip)
if (ip[0] < offset)
ip[0] += nbytes;
for (cnt = NEXTINDEX(h) - idx; --cnt; ++ip)
ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
h->lower -= sizeof(indx_t);
/* If the cursor is on this page, adjust it as necessary. */
if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
!F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index > idx)
--t->bt_cursor.pg.index;
return (RET_SUCCESS);
}
/*
* __bt_curdel --
* Delete the cursor.
*
* Parameters:
* t: tree
* key: referenced key (or NULL)
* h: page
* idx: index on page to delete
*
* Returns:
* RET_SUCCESS, RET_ERROR.
*/
static int
__bt_curdel(BTREE *t, const DBT *key, PAGE *h, u_int idx)
{
CURSOR *c;
EPG e;
PAGE *pg;
int curcopy, status;
/*
* If there are duplicates, move forward or backward to one.
* Otherwise, copy the key into the cursor area.
*/
c = &t->bt_cursor;
F_CLR(c, CURS_AFTER | CURS_BEFORE | CURS_ACQUIRE);
curcopy = 0;
if (!F_ISSET(t, B_NODUPS)) {
/*
* We're going to have to do comparisons. If we weren't
* provided a copy of the key, i.e. the user is deleting
* the current cursor position, get one.
*/
if (key == NULL) {
e.page = h;
e.index = idx;
if ((status = __bt_ret(t, &e,
&c->key, &c->key, NULL, NULL, 1)) != RET_SUCCESS)
return (status);
curcopy = 1;
key = &c->key;
}
/* Check previous key, if not at the beginning of the page. */
if (idx > 0) {
e.page = h;
e.index = idx - 1;
if (__bt_cmp(t, key, &e) == 0) {
F_SET(c, CURS_BEFORE);
goto dup2;
}
}
/* Check next key, if not at the end of the page. */
if (idx < NEXTINDEX(h) - 1) {
e.page = h;
e.index = idx + 1;
if (__bt_cmp(t, key, &e) == 0) {
F_SET(c, CURS_AFTER);
goto dup2;
}
}
/* Check previous key if at the beginning of the page. */
if (idx == 0 && h->prevpg != P_INVALID) {
if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
return (RET_ERROR);
e.page = pg;
e.index = NEXTINDEX(pg) - 1;
if (__bt_cmp(t, key, &e) == 0) {
F_SET(c, CURS_BEFORE);
goto dup1;
}
mpool_put(t->bt_mp, pg, 0);
}
/* Check next key if at the end of the page. */
if (idx == NEXTINDEX(h) - 1 && h->nextpg != P_INVALID) {
if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
return (RET_ERROR);
e.page = pg;
e.index = 0;
if (__bt_cmp(t, key, &e) == 0) {
F_SET(c, CURS_AFTER);
dup1: mpool_put(t->bt_mp, pg, 0);
dup2: c->pg.pgno = e.page->pgno;
c->pg.index = e.index;
return (RET_SUCCESS);
}
mpool_put(t->bt_mp, pg, 0);
}
}
e.page = h;
e.index = idx;
if (curcopy || (status =
__bt_ret(t, &e, &c->key, &c->key, NULL, NULL, 1)) == RET_SUCCESS) {
F_SET(c, CURS_ACQUIRE);
return (RET_SUCCESS);
}
return (status);
}
/*
* __bt_relink --
* Link around a deleted page.
*
* Parameters:
* t: tree
* h: page to be deleted
*/
static int
__bt_relink(BTREE *t, PAGE *h)
{
PAGE *pg;
if (h->nextpg != P_INVALID) {
if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
return (RET_ERROR);
pg->prevpg = h->prevpg;
mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
}
if (h->prevpg != P_INVALID) {
if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
return (RET_ERROR);
pg->nextpg = h->nextpg;
mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
}
return (0);
}

View File

@ -0,0 +1,101 @@
#include "port_before.h"
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)bt_get.c 8.6 (Berkeley) 7/20/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <errno.h>
#include <stddef.h>
#include <stdio.h>
#include <db.h>
#include "btree.h"
/*
* __BT_GET -- Get a record from the btree.
*
* Parameters:
* dbp: pointer to access method
* key: key to find
* data: data to return
* flag: currently unused
*
* Returns:
* RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
*/
int
__bt_get(const DB *dbp, const DBT *key, DBT *data, u_int flags)
{
BTREE *t;
EPG *e;
int exact, status;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
/* Get currently doesn't take any flags. */
if (flags) {
errno = EINVAL;
return (RET_ERROR);
}
if ((e = __bt_search(t, key, &exact)) == NULL)
return (RET_ERROR);
if (!exact) {
mpool_put(t->bt_mp, e->page, 0);
return (RET_SPECIAL);
}
status = __bt_ret(t, e, NULL, NULL, data, &t->bt_rdata, 0);
/*
* If the user is doing concurrent access, we copied the
* key/data, toss the page.
*/
if (F_ISSET(t, B_DB_LOCK))
mpool_put(t->bt_mp, e->page, 0);
else
t->bt_pinned = e->page;
return (status);
}

View File

@ -0,0 +1,453 @@
#include "port_before.h"
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)bt_open.c 8.10 (Berkeley) 8/17/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
/*
* Implementation of btree access method for 4.4BSD.
*
* The design here was originally based on that of the btree access method
* used in the Postgres database system at UC Berkeley. This implementation
* is wholly independent of the Postgres code.
*/
#include "namespace.h"
#include <sys/param.h>
#include <sys/stat.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "un-namespace.h"
#include <db.h>
#include "btree.h"
#ifdef DEBUG
#undef MINPSIZE
#define MINPSIZE 128
#endif
static int byteorder(void);
static int nroot(BTREE *);
static int tmp(void);
/*
* __BT_OPEN -- Open a btree.
*
* Creates and fills a DB struct, and calls the routine that actually
* opens the btree.
*
* Parameters:
* fname: filename (NULL for in-memory trees)
* flags: open flag bits
* mode: open permission bits
* b: BTREEINFO pointer
*
* Returns:
* NULL on failure, pointer to DB on success.
*
*/
DB *
__bt_open(const char *fname, int flags, int mode, const BTREEINFO *openinfo, int dflags)
{
struct stat sb;
BTMETA m;
BTREE *t;
BTREEINFO b;
DB *dbp;
pgno_t ncache;
ssize_t nr;
int machine_lorder, saved_errno;
t = NULL;
/*
* Intention is to make sure all of the user's selections are okay
* here and then use them without checking. Can't be complete, since
* we don't know the right page size, lorder or flags until the backing
* file is opened. Also, the file's page size can cause the cachesize
* to change.
*/
machine_lorder = byteorder();
if (openinfo) {
b = *openinfo;
/* Flags: R_DUP. */
if (b.flags & ~(R_DUP))
goto einval;
/*
* Page size must be indx_t aligned and >= MINPSIZE. Default
* page size is set farther on, based on the underlying file
* transfer size.
*/
if (b.psize &&
(b.psize < MINPSIZE || b.psize > MAX_PAGE_OFFSET + 1 ||
b.psize & (sizeof(indx_t) - 1) ))
goto einval;
/* Minimum number of keys per page; absolute minimum is 2. */
if (b.minkeypage) {
if (b.minkeypage < 2)
goto einval;
} else
b.minkeypage = DEFMINKEYPAGE;
/* If no comparison, use default comparison and prefix. */
if (b.compare == NULL) {
b.compare = __bt_defcmp;
if (b.prefix == NULL)
b.prefix = __bt_defpfx;
}
if (b.lorder == 0)
b.lorder = machine_lorder;
} else {
b.compare = __bt_defcmp;
b.cachesize = 0;
b.flags = 0;
b.lorder = machine_lorder;
b.minkeypage = DEFMINKEYPAGE;
b.prefix = __bt_defpfx;
b.psize = 0;
}
/* Check for the ubiquitous PDP-11. */
if (b.lorder != BIG_ENDIAN && b.lorder != LITTLE_ENDIAN)
goto einval;
/* Allocate and initialize DB and BTREE structures. */
if ((t = (BTREE *)calloc(1, sizeof(BTREE))) == NULL)
goto err;
t->bt_fd = -1; /* Don't close unopened fd on error. */
t->bt_lorder = b.lorder;
t->bt_order = NOT;
t->bt_cmp = b.compare;
t->bt_pfx = b.prefix;
t->bt_rfd = -1;
if ((t->bt_dbp = dbp = (DB *)calloc(1, sizeof(DB))) == NULL)
goto err;
if (t->bt_lorder != machine_lorder)
F_SET(t, B_NEEDSWAP);
dbp->type = DB_BTREE;
dbp->internal = t;
dbp->close = __bt_close;
dbp->del = __bt_delete;
dbp->fd = __bt_fd;
dbp->get = __bt_get;
dbp->put = __bt_put;
dbp->seq = __bt_seq;
dbp->sync = __bt_sync;
/*
* If no file name was supplied, this is an in-memory btree and we
* open a backing temporary file. Otherwise, it's a disk-based tree.
*/
if (fname) {
switch (flags & O_ACCMODE) {
case O_RDONLY:
F_SET(t, B_RDONLY);
break;
case O_RDWR:
break;
case O_WRONLY:
default:
goto einval;
}
if ((t->bt_fd = _open(fname, flags, mode)) < 0)
goto err;
} else {
if ((flags & O_ACCMODE) != O_RDWR)
goto einval;
if ((t->bt_fd = tmp()) == -1)
goto err;
F_SET(t, B_INMEM);
}
if (_fcntl(t->bt_fd, F_SETFD, 1) == -1)
goto err;
if (_fstat(t->bt_fd, &sb))
goto err;
if (sb.st_size) {
if ((nr = _read(t->bt_fd, &m, sizeof(BTMETA))) < 0)
goto err;
if (nr != sizeof(BTMETA))
goto eftype;
/*
* Read in the meta-data. This can change the notion of what
* the lorder, page size and flags are, and, when the page size
* changes, the cachesize value can change too. If the user
* specified the wrong byte order for an existing database, we
* don't bother to return an error, we just clear the NEEDSWAP
* bit.
*/
if (m.magic == BTREEMAGIC)
F_CLR(t, B_NEEDSWAP);
else {
F_SET(t, B_NEEDSWAP);
M_32_SWAP(m.magic);
M_32_SWAP(m.version);
M_32_SWAP(m.psize);
M_32_SWAP(m.free);
M_32_SWAP(m.nrecs);
M_32_SWAP(m.flags);
}
if (m.magic != BTREEMAGIC || m.version != BTREEVERSION)
goto eftype;
if (m.psize < MINPSIZE || m.psize > MAX_PAGE_OFFSET + 1 ||
m.psize & (sizeof(indx_t) - 1) )
goto eftype;
if (m.flags & ~SAVEMETA)
goto eftype;
b.psize = m.psize;
F_SET(t, m.flags);
t->bt_free = m.free;
t->bt_nrecs = m.nrecs;
} else {
/*
* Set the page size to the best value for I/O to this file.
* Don't overflow the page offset type.
*/
if (b.psize == 0) {
b.psize = sb.st_blksize;
if (b.psize < MINPSIZE)
b.psize = MINPSIZE;
if (b.psize > MAX_PAGE_OFFSET + 1)
b.psize = MAX_PAGE_OFFSET + 1;
}
/* Set flag if duplicates permitted. */
if (!(b.flags & R_DUP))
F_SET(t, B_NODUPS);
t->bt_free = P_INVALID;
t->bt_nrecs = 0;
F_SET(t, B_METADIRTY);
}
t->bt_psize = b.psize;
/* Set the cache size; must be a multiple of the page size. */
if (b.cachesize && b.cachesize & (b.psize - 1) )
b.cachesize += (~b.cachesize & (b.psize - 1) ) + 1;
if (b.cachesize < b.psize * MINCACHE)
b.cachesize = b.psize * MINCACHE;
/* Calculate number of pages to cache. */
ncache = (b.cachesize + t->bt_psize - 1) / t->bt_psize;
/*
* The btree data structure requires that at least two keys can fit on
* a page, but other than that there's no fixed requirement. The user
* specified a minimum number per page, and we translated that into the
* number of bytes a key/data pair can use before being placed on an
* overflow page. This calculation includes the page header, the size
* of the index referencing the leaf item and the size of the leaf item
* structure. Also, don't let the user specify a minkeypage such that
* a key/data pair won't fit even if both key and data are on overflow
* pages.
*/
t->bt_ovflsize = (t->bt_psize - BTDATAOFF) / b.minkeypage -
(sizeof(indx_t) + NBLEAFDBT(0, 0));
if (t->bt_ovflsize < NBLEAFDBT(NOVFLSIZE, NOVFLSIZE) + sizeof(indx_t))
t->bt_ovflsize =
NBLEAFDBT(NOVFLSIZE, NOVFLSIZE) + sizeof(indx_t);
/* Initialize the buffer pool. */
if ((t->bt_mp =
mpool_open(NULL, t->bt_fd, t->bt_psize, ncache)) == NULL)
goto err;
if (!F_ISSET(t, B_INMEM))
mpool_filter(t->bt_mp, __bt_pgin, __bt_pgout, t);
/* Create a root page if new tree. */
if (nroot(t) == RET_ERROR)
goto err;
/* Global flags. */
if (dflags & DB_LOCK)
F_SET(t, B_DB_LOCK);
if (dflags & DB_SHMEM)
F_SET(t, B_DB_SHMEM);
if (dflags & DB_TXN)
F_SET(t, B_DB_TXN);
return (dbp);
einval: errno = EINVAL;
goto err;
eftype: errno = EFTYPE;
goto err;
err: saved_errno = errno;
if (t) {
if (t->bt_dbp)
free(t->bt_dbp);
if (t->bt_fd != -1)
(void)_close(t->bt_fd);
free(t);
}
errno = saved_errno;
return (NULL);
}
/*
* NROOT -- Create the root of a new tree.
*
* Parameters:
* t: tree
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
static int
nroot(BTREE *t)
{
PAGE *meta, *root;
pgno_t npg;
if ((root = mpool_get(t->bt_mp, 1, 0)) != NULL) {
if (root->lower == 0 &&
root->pgno == 0 &&
root->linp[0] == 0) {
mpool_delete(t->bt_mp, root);
errno = EINVAL;
} else {
mpool_put(t->bt_mp, root, 0);
return (RET_SUCCESS);
}
}
if (errno != EINVAL) /* It's OK to not exist. */
return (RET_ERROR);
errno = 0;
if ((meta = mpool_new(t->bt_mp, &npg, MPOOL_PAGE_NEXT)) == NULL)
return (RET_ERROR);
if ((root = mpool_new(t->bt_mp, &npg, MPOOL_PAGE_NEXT)) == NULL)
return (RET_ERROR);
if (npg != P_ROOT)
return (RET_ERROR);
root->pgno = npg;
root->prevpg = root->nextpg = P_INVALID;
root->lower = BTDATAOFF;
root->upper = t->bt_psize;
root->flags = P_BLEAF;
memset(meta, 0, t->bt_psize);
mpool_put(t->bt_mp, meta, MPOOL_DIRTY);
mpool_put(t->bt_mp, root, MPOOL_DIRTY);
return (RET_SUCCESS);
}
static int
tmp(void)
{
sigset_t set, oset;
int fd, len;
char *envtmp = NULL;
char path[MAXPATHLEN];
if (issetugid() == 0)
envtmp = getenv("TMPDIR");
len = snprintf(path,
sizeof(path), "%s/bt.XXXXXXXXXX", envtmp ? envtmp : "/tmp");
if (len < 0 || len >= (int)sizeof(path)) {
errno = ENAMETOOLONG;
return(-1);
}
(void)sigfillset(&set);
(void)_sigprocmask(SIG_BLOCK, &set, &oset);
if ((fd = mkstemp(path)) != -1)
(void)unlink(path);
(void)_sigprocmask(SIG_SETMASK, &oset, NULL);
return(fd);
}
static int
byteorder(void)
{
u_int32_t x;
u_char *p;
x = 0x01020304;
p = (u_char *)&x;
switch (*p) {
case 1:
return (BIG_ENDIAN);
case 4:
return (LITTLE_ENDIAN);
default:
return (0);
}
}
int
__bt_fd(const DB *dbp)
{
BTREE *t;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
/* In-memory database can't have a file descriptor. */
if (F_ISSET(t, B_INMEM)) {
errno = ENOENT;
return (-1);
}
return (t->bt_fd);
}

View File

@ -0,0 +1,218 @@
#include "port_before.h"
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)bt_overflow.c 8.5 (Berkeley) 7/16/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <db.h>
#include "btree.h"
/*
* Big key/data code.
*
* Big key and data entries are stored on linked lists of pages. The initial
* reference is byte string stored with the key or data and is the page number
* and size. The actual record is stored in a chain of pages linked by the
* nextpg field of the PAGE header.
*
* The first page of the chain has a special property. If the record is used
* by an internal page, it cannot be deleted and the P_PRESERVE bit will be set
* in the header.
*
* XXX
* A single DBT is written to each chain, so a lot of space on the last page
* is wasted. This is a fairly major bug for some data sets.
*/
/*
* __OVFL_GET -- Get an overflow key/data item.
*
* Parameters:
* t: tree
* p: pointer to { pgno_t, u_int32_t }
* buf: storage address
* bufsz: storage size
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
int
__ovfl_get(BTREE *t, void *p, size_t *ssz, void **buf, size_t *bufsz)
{
PAGE *h;
pgno_t pg;
size_t nb, plen;
u_int32_t sz;
memmove(&pg, p, sizeof(pgno_t));
memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(u_int32_t));
*ssz = sz;
#ifdef DEBUG
if (pg == P_INVALID || sz == 0)
abort();
#endif
/* Make the buffer bigger as necessary. */
if (*bufsz < sz) {
*buf = reallocf(*buf, sz);
if (*buf == NULL)
return (RET_ERROR);
*bufsz = sz;
}
/*
* Step through the linked list of pages, copying the data on each one
* into the buffer. Never copy more than the data's length.
*/
plen = t->bt_psize - BTDATAOFF;
for (p = *buf;; p = (char *)p + nb, pg = h->nextpg) {
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (RET_ERROR);
nb = MIN(sz, plen);
memmove(p, (char *)h + BTDATAOFF, nb);
mpool_put(t->bt_mp, h, 0);
if ((sz -= nb) == 0)
break;
}
return (RET_SUCCESS);
}
/*
* __OVFL_PUT -- Store an overflow key/data item.
*
* Parameters:
* t: tree
* data: DBT to store
* pgno: storage page number
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
int
__ovfl_put(BTREE *t, const DBT *dbt, pgno_t *pg)
{
PAGE *h, *last;
void *p;
pgno_t npg;
size_t nb, plen;
u_int32_t sz;
/*
* Allocate pages and copy the key/data record into them. Store the
* number of the first page in the chain.
*/
plen = t->bt_psize - BTDATAOFF;
for (last = NULL, p = dbt->data, sz = dbt->size;;
p = (char *)p + plen, last = h) {
if ((h = __bt_new(t, &npg)) == NULL)
return (RET_ERROR);
h->pgno = npg;
h->nextpg = h->prevpg = P_INVALID;
h->flags = P_OVERFLOW;
h->lower = h->upper = 0;
nb = MIN(sz, plen);
memmove((char *)h + BTDATAOFF, p, nb);
if (last) {
last->nextpg = h->pgno;
mpool_put(t->bt_mp, last, MPOOL_DIRTY);
} else
*pg = h->pgno;
if ((sz -= nb) == 0) {
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
break;
}
}
return (RET_SUCCESS);
}
/*
* __OVFL_DELETE -- Delete an overflow chain.
*
* Parameters:
* t: tree
* p: pointer to { pgno_t, u_int32_t }
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
int
__ovfl_delete(BTREE *t, void *p)
{
PAGE *h;
pgno_t pg;
size_t plen;
u_int32_t sz;
memmove(&pg, p, sizeof(pgno_t));
memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(u_int32_t));
#ifdef DEBUG
if (pg == P_INVALID || sz == 0)
abort();
#endif
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (RET_ERROR);
/* Don't delete chains used by internal pages. */
if (h->flags & P_PRESERVE) {
mpool_put(t->bt_mp, h, 0);
return (RET_SUCCESS);
}
/* Step through the chain, calling the free routine for each page. */
for (plen = t->bt_psize - BTDATAOFF;; sz -= plen) {
pg = h->nextpg;
__bt_free(t, h);
if (sz <= plen)
break;
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (RET_ERROR);
}
return (RET_SUCCESS);
}

View File

@ -0,0 +1,96 @@
#include "port_before.h"
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)bt_page.c 8.3 (Berkeley) 7/14/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <stdio.h>
#include <db.h>
#include "btree.h"
/*
* __bt_free --
* Put a page on the freelist.
*
* Parameters:
* t: tree
* h: page to free
*
* Returns:
* RET_ERROR, RET_SUCCESS
*
* Side-effect:
* mpool_put's the page.
*/
int
__bt_free(BTREE *t, PAGE *h)
{
/* Insert the page at the head of the free list. */
h->prevpg = P_INVALID;
h->nextpg = t->bt_free;
t->bt_free = h->pgno;
F_SET(t, B_METADIRTY);
/* Make sure the page gets written back. */
return (mpool_put(t->bt_mp, h, MPOOL_DIRTY));
}
/*
* __bt_new --
* Get a new page, preferably from the freelist.
*
* Parameters:
* t: tree
* npg: storage for page number.
*
* Returns:
* Pointer to a page, NULL on error.
*/
PAGE *
__bt_new(BTREE *t, pgno_t *npg)
{
PAGE *h;
if (t->bt_free != P_INVALID &&
(h = mpool_get(t->bt_mp, t->bt_free, 0)) != NULL) {
*npg = t->bt_free;
t->bt_free = h->nextpg;
F_SET(t, B_METADIRTY);
return (h);
}
return (mpool_new(t->bt_mp, npg, MPOOL_PAGE_NEXT));
}

View File

@ -0,0 +1,316 @@
#include "port_before.h"
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)bt_put.c 8.8 (Berkeley) 7/26/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <db.h>
#include "btree.h"
static EPG *bt_fast(BTREE *, const DBT *, const DBT *, int *);
/*
* __BT_PUT -- Add a btree item to the tree.
*
* Parameters:
* dbp: pointer to access method
* key: key
* data: data
* flag: R_NOOVERWRITE
*
* Returns:
* RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key is already in the
* tree and R_NOOVERWRITE specified.
*/
int
__bt_put(const DB *dbp, DBT *key, const DBT *data, u_int flags)
{
BTREE *t;
DBT tkey, tdata;
EPG *e;
PAGE *h;
indx_t idx, nxtindex;
pgno_t pg;
u_int32_t nbytes, tmp;
int dflags, exact, status;
char *dest, db[NOVFLSIZE], kb[NOVFLSIZE];
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
/* Check for change to a read-only tree. */
if (F_ISSET(t, B_RDONLY)) {
errno = EPERM;
return (RET_ERROR);
}
switch (flags) {
case 0:
case R_NOOVERWRITE:
break;
case R_CURSOR:
/*
* If flags is R_CURSOR, put the cursor. Must already
* have started a scan and not have already deleted it.
*/
if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
!F_ISSET(&t->bt_cursor,
CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE))
break;
/* FALLTHROUGH */
default:
errno = EINVAL;
return (RET_ERROR);
}
/*
* If the key/data pair won't fit on a page, store it on overflow
* pages. Only put the key on the overflow page if the pair are
* still too big after moving the data to an overflow page.
*
* XXX
* If the insert fails later on, the overflow pages aren't recovered.
*/
dflags = 0;
if (key->size + data->size > t->bt_ovflsize) {
if (key->size > t->bt_ovflsize) {
storekey: if (__ovfl_put(t, key, &pg) == RET_ERROR)
return (RET_ERROR);
tkey.data = kb;
tkey.size = NOVFLSIZE;
memmove(kb, &pg, sizeof(pgno_t));
tmp = key->size;
memmove(kb + sizeof(pgno_t),
&tmp, sizeof(u_int32_t));
dflags |= P_BIGKEY;
key = &tkey;
}
if (key->size + data->size > t->bt_ovflsize) {
if (__ovfl_put(t, data, &pg) == RET_ERROR)
return (RET_ERROR);
tdata.data = db;
tdata.size = NOVFLSIZE;
memmove(db, &pg, sizeof(pgno_t));
tmp = data->size;
memmove(db + sizeof(pgno_t),
&tmp, sizeof(u_int32_t));
dflags |= P_BIGDATA;
data = &tdata;
}
if (key->size + data->size > t->bt_ovflsize)
goto storekey;
}
/* Replace the cursor. */
if (flags == R_CURSOR) {
if ((h = mpool_get(t->bt_mp, t->bt_cursor.pg.pgno, 0)) == NULL)
return (RET_ERROR);
idx = t->bt_cursor.pg.index;
goto delete;
}
/*
* Find the key to delete, or, the location at which to insert.
* Bt_fast and __bt_search both pin the returned page.
*/
if (t->bt_order == NOT || (e = bt_fast(t, key, data, &exact)) == NULL)
if ((e = __bt_search(t, key, &exact)) == NULL)
return (RET_ERROR);
h = e->page;
idx = e->index;
/*
* Add the key/data pair to the tree. If an identical key is already
* in the tree, and R_NOOVERWRITE is set, an error is returned. If
* R_NOOVERWRITE is not set, the key is either added (if duplicates are
* permitted) or an error is returned.
*/
switch (flags) {
case R_NOOVERWRITE:
if (!exact)
break;
mpool_put(t->bt_mp, h, 0);
return (RET_SPECIAL);
default:
if (!exact || !F_ISSET(t, B_NODUPS))
break;
/*
* !!!
* Note, the delete may empty the page, so we need to put a
* new entry into the page immediately.
*/
delete: if (__bt_dleaf(t, key, h, idx) == RET_ERROR) {
mpool_put(t->bt_mp, h, 0);
return (RET_ERROR);
}
break;
}
/*
* If not enough room, or the user has put a ceiling on the number of
* keys permitted in the page, split the page. The split code will
* insert the key and data and unpin the current page. If inserting
* into the offset array, shift the pointers up.
*/
nbytes = NBLEAFDBT(key->size, data->size);
if ((u_int32_t)(h->upper - h->lower) < nbytes + sizeof(indx_t)) {
if ((status = __bt_split(t, h, key,
data, dflags, nbytes, idx)) != RET_SUCCESS)
return (status);
goto success;
}
if (idx < (nxtindex = NEXTINDEX(h)))
memmove(h->linp + idx + 1, h->linp + idx,
(nxtindex - idx) * sizeof(indx_t));
h->lower += sizeof(indx_t);
h->linp[idx] = h->upper -= nbytes;
dest = (char *)h + h->upper;
WR_BLEAF(dest, key, data, dflags);
/* If the cursor is on this page, adjust it as necessary. */
if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
!F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index >= idx)
++t->bt_cursor.pg.index;
if (t->bt_order == NOT) {
if (h->nextpg == P_INVALID) {
if (idx == NEXTINDEX(h) - 1) {
t->bt_order = FORWARD;
t->bt_last.index = idx;
t->bt_last.pgno = h->pgno;
}
} else if (h->prevpg == P_INVALID) {
if (idx == 0) {
t->bt_order = BACK;
t->bt_last.index = 0;
t->bt_last.pgno = h->pgno;
}
}
}
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
success:
if (flags == R_SETCURSOR)
__bt_setcur(t, e->page->pgno, e->index);
F_SET(t, B_MODIFIED);
return (RET_SUCCESS);
}
#ifdef STATISTICS
u_long bt_cache_hit, bt_cache_miss;
#endif
/*
* BT_FAST -- Do a quick check for sorted data.
*
* Parameters:
* t: tree
* key: key to insert
*
* Returns:
* EPG for new record or NULL if not found.
*/
static EPG *
bt_fast(BTREE *t, const DBT *key, const DBT *data, int *exactp)
{
PAGE *h;
u_int32_t nbytes;
int cmp;
if ((h = mpool_get(t->bt_mp, t->bt_last.pgno, 0)) == NULL) {
t->bt_order = NOT;
return (NULL);
}
t->bt_cur.page = h;
t->bt_cur.index = t->bt_last.index;
/*
* If won't fit in this page or have too many keys in this page,
* have to search to get split stack.
*/
nbytes = NBLEAFDBT(key->size, data->size);
if ((u_int32_t)(h->upper - h->lower) < nbytes + sizeof(indx_t))
goto miss;
if (t->bt_order == FORWARD) {
if (t->bt_cur.page->nextpg != P_INVALID)
goto miss;
if (t->bt_cur.index != NEXTINDEX(h) - 1)
goto miss;
if ((cmp = __bt_cmp(t, key, &t->bt_cur)) < 0)
goto miss;
t->bt_last.index = cmp ? ++t->bt_cur.index : t->bt_cur.index;
} else {
if (t->bt_cur.page->prevpg != P_INVALID)
goto miss;
if (t->bt_cur.index != 0)
goto miss;
if ((cmp = __bt_cmp(t, key, &t->bt_cur)) > 0)
goto miss;
t->bt_last.index = 0;
}
*exactp = cmp == 0;
#ifdef STATISTICS
++bt_cache_hit;
#endif
return (&t->bt_cur);
miss:
#ifdef STATISTICS
++bt_cache_miss;
#endif
t->bt_order = NOT;
mpool_put(t->bt_mp, h, 0);
return (NULL);
}

View File

@ -0,0 +1,202 @@
#include "port_before.h"
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)bt_search.c 8.8 (Berkeley) 7/31/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <stdio.h>
#include <db.h>
#include "btree.h"
static int __bt_snext(BTREE *, PAGE *, const DBT *, int *);
static int __bt_sprev(BTREE *, PAGE *, const DBT *, int *);
/*
* __bt_search --
* Search a btree for a key.
*
* Parameters:
* t: tree to search
* key: key to find
* exactp: pointer to exact match flag
*
* Returns:
* The EPG for matching record, if any, or the EPG for the location
* of the key, if it were inserted into the tree, is entered into
* the bt_cur field of the tree. A pointer to the field is returned.
*/
EPG *
__bt_search(BTREE *t, const DBT *key, int *exactp)
{
PAGE *h;
indx_t base, idx, lim;
pgno_t pg;
int cmp;
BT_CLR(t);
for (pg = P_ROOT;;) {
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (NULL);
/* Do a binary search on the current page. */
t->bt_cur.page = h;
for (base = 0, lim = NEXTINDEX(h); lim; lim >>= 1) {
t->bt_cur.index = idx = base + (lim >> 1);
if ((cmp = __bt_cmp(t, key, &t->bt_cur)) == 0) {
if (h->flags & P_BLEAF) {
*exactp = 1;
return (&t->bt_cur);
}
goto next;
}
if (cmp > 0) {
base = idx + 1;
--lim;
}
}
/*
* If it's a leaf page, we're almost done. If no duplicates
* are allowed, or we have an exact match, we're done. Else,
* it's possible that there were matching keys on this page,
* which later deleted, and we're on a page with no matches
* while there are matches on other pages. If at the start or
* end of a page, check the adjacent page.
*/
if (h->flags & P_BLEAF) {
if (!F_ISSET(t, B_NODUPS)) {
if (base == 0 &&
h->prevpg != P_INVALID &&
__bt_sprev(t, h, key, exactp))
return (&t->bt_cur);
if (base == NEXTINDEX(h) &&
h->nextpg != P_INVALID &&
__bt_snext(t, h, key, exactp))
return (&t->bt_cur);
}
*exactp = 0;
t->bt_cur.index = base;
return (&t->bt_cur);
}
/*
* No match found. Base is the smallest index greater than
* key and may be zero or a last + 1 index. If it's non-zero,
* decrement by one, and record the internal page which should
* be a parent page for the key. If a split later occurs, the
* inserted page will be to the right of the saved page.
*/
idx = base ? base - 1 : base;
next: BT_PUSH(t, h->pgno, idx);
pg = GETBINTERNAL(h, idx)->pgno;
mpool_put(t->bt_mp, h, 0);
}
}
/*
* __bt_snext --
* Check for an exact match after the key.
*
* Parameters:
* t: tree
* h: current page
* key: key
* exactp: pointer to exact match flag
*
* Returns:
* If an exact match found.
*/
static int
__bt_snext(BTREE *t, PAGE *h, const DBT *key, int *exactp)
{
EPG e;
/*
* Get the next page. The key is either an exact
* match, or not as good as the one we already have.
*/
if ((e.page = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
return (0);
e.index = 0;
if (__bt_cmp(t, key, &e) == 0) {
mpool_put(t->bt_mp, h, 0);
t->bt_cur = e;
*exactp = 1;
return (1);
}
mpool_put(t->bt_mp, e.page, 0);
return (0);
}
/*
* __bt_sprev --
* Check for an exact match before the key.
*
* Parameters:
* t: tree
* h: current page
* key: key
* exactp: pointer to exact match flag
*
* Returns:
* If an exact match found.
*/
static int
__bt_sprev(BTREE *t, PAGE *h, const DBT *key, int *exactp)
{
EPG e;
/*
* Get the previous page. The key is either an exact
* match, or not as good as the one we already have.
*/
if ((e.page = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
return (0);
e.index = NEXTINDEX(e.page) - 1;
if (__bt_cmp(t, key, &e) == 0) {
mpool_put(t->bt_mp, h, 0);
t->bt_cur = e;
*exactp = 1;
return (1);
}
mpool_put(t->bt_mp, e.page, 0);
return (0);
}

View File

@ -0,0 +1,443 @@
#include "port_before.h"
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)bt_seq.c 8.7 (Berkeley) 7/20/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <errno.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <db.h>
#include "btree.h"
static int __bt_first(BTREE *, const DBT *, EPG *, int *);
static int __bt_seqadv(BTREE *, EPG *, int);
static int __bt_seqset(BTREE *, EPG *, DBT *, int);
/*
* Sequential scan support.
*
* The tree can be scanned sequentially, starting from either end of the
* tree or from any specific key. A scan request before any scanning is
* done is initialized as starting from the least node.
*/
/*
* __bt_seq --
* Btree sequential scan interface.
*
* Parameters:
* dbp: pointer to access method
* key: key for positioning and return value
* data: data return value
* flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV.
*
* Returns:
* RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
*/
int
__bt_seq(const DB *dbp, DBT *key, DBT *data, u_int flags)
{
BTREE *t;
EPG e;
int status;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
/*
* If scan unitialized as yet, or starting at a specific record, set
* the scan to a specific key. Both __bt_seqset and __bt_seqadv pin
* the page the cursor references if they're successful.
*/
switch (flags) {
case R_NEXT:
case R_PREV:
if (F_ISSET(&t->bt_cursor, CURS_INIT)) {
status = __bt_seqadv(t, &e, flags);
break;
}
/* FALLTHROUGH */
case R_FIRST:
case R_LAST:
case R_CURSOR:
status = __bt_seqset(t, &e, key, flags);
break;
default:
errno = EINVAL;
return (RET_ERROR);
}
if (status == RET_SUCCESS) {
__bt_setcur(t, e.page->pgno, e.index);
status =
__bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0);
/*
* If the user is doing concurrent access, we copied the
* key/data, toss the page.
*/
if (F_ISSET(t, B_DB_LOCK))
mpool_put(t->bt_mp, e.page, 0);
else
t->bt_pinned = e.page;
}
return (status);
}
/*
* __bt_seqset --
* Set the sequential scan to a specific key.
*
* Parameters:
* t: tree
* ep: storage for returned key
* key: key for initial scan position
* flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV
*
* Side effects:
* Pins the page the cursor references.
*
* Returns:
* RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
*/
static int
__bt_seqset(BTREE *t, EPG *ep, DBT *key, int flags)
{
PAGE *h;
pgno_t pg;
int exact;
/*
* Find the first, last or specific key in the tree and point the
* cursor at it. The cursor may not be moved until a new key has
* been found.
*/
switch (flags) {
case R_CURSOR: /* Keyed scan. */
/*
* Find the first instance of the key or the smallest key
* which is greater than or equal to the specified key.
*/
if (key->data == NULL || key->size == 0) {
errno = EINVAL;
return (RET_ERROR);
}
return (__bt_first(t, key, ep, &exact));
case R_FIRST: /* First record. */
case R_NEXT:
/* Walk down the left-hand side of the tree. */
for (pg = P_ROOT;;) {
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (RET_ERROR);
/* Check for an empty tree. */
if (NEXTINDEX(h) == 0) {
mpool_put(t->bt_mp, h, 0);
return (RET_SPECIAL);
}
if (h->flags & (P_BLEAF | P_RLEAF))
break;
pg = GETBINTERNAL(h, 0)->pgno;
mpool_put(t->bt_mp, h, 0);
}
ep->page = h;
ep->index = 0;
break;
case R_LAST: /* Last record. */
case R_PREV:
/* Walk down the right-hand side of the tree. */
for (pg = P_ROOT;;) {
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (RET_ERROR);
/* Check for an empty tree. */
if (NEXTINDEX(h) == 0) {
mpool_put(t->bt_mp, h, 0);
return (RET_SPECIAL);
}
if (h->flags & (P_BLEAF | P_RLEAF))
break;
pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno;
mpool_put(t->bt_mp, h, 0);
}
ep->page = h;
ep->index = NEXTINDEX(h) - 1;
break;
}
return (RET_SUCCESS);
}
/*
* __bt_seqadvance --
* Advance the sequential scan.
*
* Parameters:
* t: tree
* flags: R_NEXT, R_PREV
*
* Side effects:
* Pins the page the new key/data record is on.
*
* Returns:
* RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
*/
static int
__bt_seqadv(BTREE *t, EPG *ep, int flags)
{
CURSOR *c;
PAGE *h;
indx_t idx;
pgno_t pg;
int exact;
/*
* There are a couple of states that we can be in. The cursor has
* been initialized by the time we get here, but that's all we know.
*/
c = &t->bt_cursor;
/*
* The cursor was deleted where there weren't any duplicate records,
* so the key was saved. Find out where that key would go in the
* current tree. It doesn't matter if the returned key is an exact
* match or not -- if it's an exact match, the record was added after
* the delete so we can just return it. If not, as long as there's
* a record there, return it.
*/
if (F_ISSET(c, CURS_ACQUIRE))
return (__bt_first(t, &c->key, ep, &exact));
/* Get the page referenced by the cursor. */
if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
return (RET_ERROR);
/*
* Find the next/previous record in the tree and point the cursor at
* it. The cursor may not be moved until a new key has been found.
*/
switch (flags) {
case R_NEXT: /* Next record. */
/*
* The cursor was deleted in duplicate records, and moved
* forward to a record that has yet to be returned. Clear
* that flag, and return the record.
*/
if (F_ISSET(c, CURS_AFTER))
goto usecurrent;
idx = c->pg.index;
if (++idx == NEXTINDEX(h)) {
pg = h->nextpg;
mpool_put(t->bt_mp, h, 0);
if (pg == P_INVALID)
return (RET_SPECIAL);
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (RET_ERROR);
idx = 0;
}
break;
case R_PREV: /* Previous record. */
/*
* The cursor was deleted in duplicate records, and moved
* backward to a record that has yet to be returned. Clear
* that flag, and return the record.
*/
if (F_ISSET(c, CURS_BEFORE)) {
usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE);
ep->page = h;
ep->index = c->pg.index;
return (RET_SUCCESS);
}
idx = c->pg.index;
if (idx == 0) {
pg = h->prevpg;
mpool_put(t->bt_mp, h, 0);
if (pg == P_INVALID)
return (RET_SPECIAL);
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (RET_ERROR);
idx = NEXTINDEX(h) - 1;
} else
--idx;
break;
}
ep->page = h;
ep->index = idx;
return (RET_SUCCESS);
}
/*
* __bt_first --
* Find the first entry.
*
* Parameters:
* t: the tree
* key: the key
* erval: return EPG
* exactp: pointer to exact match flag
*
* Returns:
* The first entry in the tree greater than or equal to key,
* or RET_SPECIAL if no such key exists.
*/
static int
__bt_first(BTREE *t, const DBT *key, EPG *erval, int *exactp)
{
PAGE *h;
EPG *ep, save;
pgno_t pg;
/*
* Find any matching record; __bt_search pins the page.
*
* If it's an exact match and duplicates are possible, walk backwards
* in the tree until we find the first one. Otherwise, make sure it's
* a valid key (__bt_search may return an index just past the end of a
* page) and return it.
*/
if ((ep = __bt_search(t, key, exactp)) == NULL)
return (0);
if (*exactp) {
if (F_ISSET(t, B_NODUPS)) {
*erval = *ep;
return (RET_SUCCESS);
}
/*
* Walk backwards, as long as the entry matches and there are
* keys left in the tree. Save a copy of each match in case
* we go too far.
*/
save = *ep;
h = ep->page;
do {
if (save.page->pgno != ep->page->pgno) {
mpool_put(t->bt_mp, save.page, 0);
save = *ep;
} else
save.index = ep->index;
/*
* Don't unpin the page the last (or original) match
* was on, but make sure it's unpinned if an error
* occurs.
*/
if (ep->index == 0) {
if (h->prevpg == P_INVALID)
break;
if (h->pgno != save.page->pgno)
mpool_put(t->bt_mp, h, 0);
if ((h = mpool_get(t->bt_mp,
h->prevpg, 0)) == NULL) {
if (h->pgno == save.page->pgno)
mpool_put(t->bt_mp,
save.page, 0);
return (RET_ERROR);
}
ep->page = h;
ep->index = NEXTINDEX(h);
}
--ep->index;
} while (__bt_cmp(t, key, ep) == 0);
/*
* Reach here with the last page that was looked at pinned,
* which may or may not be the same as the last (or original)
* match page. If it's not useful, release it.
*/
if (h->pgno != save.page->pgno)
mpool_put(t->bt_mp, h, 0);
*erval = save;
return (RET_SUCCESS);
}
/* If at the end of a page, find the next entry. */
if (ep->index == NEXTINDEX(ep->page)) {
h = ep->page;
pg = h->nextpg;
mpool_put(t->bt_mp, h, 0);
if (pg == P_INVALID)
return (RET_SPECIAL);
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (RET_ERROR);
ep->index = 0;
ep->page = h;
}
*erval = *ep;
return (RET_SUCCESS);
}
/*
* __bt_setcur --
* Set the cursor to an entry in the tree.
*
* Parameters:
* t: the tree
* pgno: page number
* idx: page index
*/
void
__bt_setcur(BTREE *t, pgno_t pgno, u_int idx)
{
/* Lose any already deleted key. */
if (t->bt_cursor.key.data != NULL) {
free(t->bt_cursor.key.data);
t->bt_cursor.key.size = 0;
t->bt_cursor.key.data = NULL;
}
F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE);
/* Update the cursor. */
t->bt_cursor.pg.pgno = pgno;
t->bt_cursor.pg.index = idx;
F_SET(&t->bt_cursor, CURS_INIT);
}

View File

@ -0,0 +1,798 @@
#include "port_before.h"
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)bt_split.c 8.10 (Berkeley) 1/9/95";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <db.h>
#include "btree.h"
static int bt_broot(BTREE *, PAGE *, PAGE *, PAGE *);
static PAGE *bt_page(BTREE *, PAGE *, PAGE **, PAGE **, indx_t *, size_t);
static int bt_preserve(BTREE *, pgno_t);
static PAGE *bt_psplit(BTREE *, PAGE *, PAGE *, PAGE *, indx_t *, size_t);
static PAGE *bt_root(BTREE *, PAGE *, PAGE **, PAGE **, indx_t *, size_t);
static int bt_rroot(BTREE *, PAGE *, PAGE *, PAGE *);
static recno_t rec_total(PAGE *);
#ifdef STATISTICS
u_long bt_rootsplit, bt_split, bt_sortsplit, bt_pfxsaved;
#endif
/*
* __BT_SPLIT -- Split the tree.
*
* Parameters:
* t: tree
* sp: page to split
* key: key to insert
* data: data to insert
* flags: BIGKEY/BIGDATA flags
* ilen: insert length
* skip: index to leave open
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
int
__bt_split(BTREE *t, PAGE *sp, const DBT *key, const DBT *data, int flags,
size_t ilen, u_int32_t argskip)
{
BINTERNAL *bi;
BLEAF *bl, *tbl;
DBT a, b;
EPGNO *parent;
PAGE *h, *l, *r, *lchild, *rchild;
indx_t nxtindex;
u_int16_t skip;
u_int32_t n, nbytes, nksize;
int parentsplit;
char *dest;
/*
* Split the page into two pages, l and r. The split routines return
* a pointer to the page into which the key should be inserted and with
* skip set to the offset which should be used. Additionally, l and r
* are pinned.
*/
skip = argskip;
h = sp->pgno == P_ROOT ?
bt_root(t, sp, &l, &r, &skip, ilen) :
bt_page(t, sp, &l, &r, &skip, ilen);
if (h == NULL)
return (RET_ERROR);
/*
* Insert the new key/data pair into the leaf page. (Key inserts
* always cause a leaf page to split first.)
*/
h->linp[skip] = h->upper -= ilen;
dest = (char *)h + h->upper;
if (F_ISSET(t, R_RECNO))
WR_RLEAF(dest, data, flags)
else
WR_BLEAF(dest, key, data, flags)
/* If the root page was split, make it look right. */
if (sp->pgno == P_ROOT &&
(F_ISSET(t, R_RECNO) ?
bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR)
goto err2;
/*
* Now we walk the parent page stack -- a LIFO stack of the pages that
* were traversed when we searched for the page that split. Each stack
* entry is a page number and a page index offset. The offset is for
* the page traversed on the search. We've just split a page, so we
* have to insert a new key into the parent page.
*
* If the insert into the parent page causes it to split, may have to
* continue splitting all the way up the tree. We stop if the root
* splits or the page inserted into didn't have to split to hold the
* new key. Some algorithms replace the key for the old page as well
* as the new page. We don't, as there's no reason to believe that the
* first key on the old page is any better than the key we have, and,
* in the case of a key being placed at index 0 causing the split, the
* key is unavailable.
*
* There are a maximum of 5 pages pinned at any time. We keep the left
* and right pages pinned while working on the parent. The 5 are the
* two children, left parent and right parent (when the parent splits)
* and the root page or the overflow key page when calling bt_preserve.
* This code must make sure that all pins are released other than the
* root page or overflow page which is unlocked elsewhere.
*/
while ((parent = BT_POP(t)) != NULL) {
lchild = l;
rchild = r;
/* Get the parent page. */
if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
goto err2;
/*
* The new key goes ONE AFTER the index, because the split
* was to the right.
*/
skip = parent->index + 1;
/*
* Calculate the space needed on the parent page.
*
* Prefix trees: space hack when inserting into BINTERNAL
* pages. Retain only what's needed to distinguish between
* the new entry and the LAST entry on the page to its left.
* If the keys compare equal, retain the entire key. Note,
* we don't touch overflow keys, and the entire key must be
* retained for the next-to-left most key on the leftmost
* page of each level, or the search will fail. Applicable
* ONLY to internal pages that have leaf pages as children.
* Further reduction of the key between pairs of internal
* pages loses too much information.
*/
switch (rchild->flags & P_TYPE) {
case P_BINTERNAL:
bi = GETBINTERNAL(rchild, 0);
nbytes = NBINTERNAL(bi->ksize);
break;
case P_BLEAF:
bl = GETBLEAF(rchild, 0);
nbytes = NBINTERNAL(bl->ksize);
if (t->bt_pfx && !(bl->flags & P_BIGKEY) &&
(h->prevpg != P_INVALID || skip > 1)) {
tbl = GETBLEAF(lchild, NEXTINDEX(lchild) - 1);
a.size = tbl->ksize;
a.data = tbl->bytes;
b.size = bl->ksize;
b.data = bl->bytes;
nksize = t->bt_pfx(&a, &b);
n = NBINTERNAL(nksize);
if (n < nbytes) {
#ifdef STATISTICS
bt_pfxsaved += nbytes - n;
#endif
nbytes = n;
} else
nksize = 0;
} else
nksize = 0;
break;
case P_RINTERNAL:
case P_RLEAF:
nbytes = NRINTERNAL;
break;
default:
abort();
}
/* Split the parent page if necessary or shift the indices. */
if ((u_int32_t)(h->upper - h->lower) < nbytes + sizeof(indx_t)) {
sp = h;
h = h->pgno == P_ROOT ?
bt_root(t, h, &l, &r, &skip, nbytes) :
bt_page(t, h, &l, &r, &skip, nbytes);
if (h == NULL)
goto err1;
parentsplit = 1;
} else {
if (skip < (nxtindex = NEXTINDEX(h)))
memmove(h->linp + skip + 1, h->linp + skip,
(nxtindex - skip) * sizeof(indx_t));
h->lower += sizeof(indx_t);
parentsplit = 0;
}
/* Insert the key into the parent page. */
switch (rchild->flags & P_TYPE) {
case P_BINTERNAL:
h->linp[skip] = h->upper -= nbytes;
dest = (char *)h + h->linp[skip];
memmove(dest, bi, nbytes);
((BINTERNAL *)dest)->pgno = rchild->pgno;
break;
case P_BLEAF:
h->linp[skip] = h->upper -= nbytes;
dest = (char *)h + h->linp[skip];
WR_BINTERNAL(dest, nksize ? nksize : bl->ksize,
rchild->pgno, bl->flags & P_BIGKEY);
memmove(dest, bl->bytes, nksize ? nksize : bl->ksize);
if (bl->flags & P_BIGKEY &&
bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR)
goto err1;
break;
case P_RINTERNAL:
/*
* Update the left page count. If split
* added at index 0, fix the correct page.
*/
if (skip > 0)
dest = (char *)h + h->linp[skip - 1];
else
dest = (char *)l + l->linp[NEXTINDEX(l) - 1];
((RINTERNAL *)dest)->nrecs = rec_total(lchild);
((RINTERNAL *)dest)->pgno = lchild->pgno;
/* Update the right page count. */
h->linp[skip] = h->upper -= nbytes;
dest = (char *)h + h->linp[skip];
((RINTERNAL *)dest)->nrecs = rec_total(rchild);
((RINTERNAL *)dest)->pgno = rchild->pgno;
break;
case P_RLEAF:
/*
* Update the left page count. If split
* added at index 0, fix the correct page.
*/
if (skip > 0)
dest = (char *)h + h->linp[skip - 1];
else
dest = (char *)l + l->linp[NEXTINDEX(l) - 1];
((RINTERNAL *)dest)->nrecs = NEXTINDEX(lchild);
((RINTERNAL *)dest)->pgno = lchild->pgno;
/* Update the right page count. */
h->linp[skip] = h->upper -= nbytes;
dest = (char *)h + h->linp[skip];
((RINTERNAL *)dest)->nrecs = NEXTINDEX(rchild);
((RINTERNAL *)dest)->pgno = rchild->pgno;
break;
default:
abort();
}
/* Unpin the held pages. */
if (!parentsplit) {
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
break;
}
/* If the root page was split, make it look right. */
if (sp->pgno == P_ROOT &&
(F_ISSET(t, R_RECNO) ?
bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR)
goto err1;
mpool_put(t->bt_mp, lchild, MPOOL_DIRTY);
mpool_put(t->bt_mp, rchild, MPOOL_DIRTY);
}
/* Unpin the held pages. */
mpool_put(t->bt_mp, l, MPOOL_DIRTY);
mpool_put(t->bt_mp, r, MPOOL_DIRTY);
/* Clear any pages left on the stack. */
return (RET_SUCCESS);
/*
* If something fails in the above loop we were already walking back
* up the tree and the tree is now inconsistent. Nothing much we can
* do about it but release any memory we're holding.
*/
err1: mpool_put(t->bt_mp, lchild, MPOOL_DIRTY);
mpool_put(t->bt_mp, rchild, MPOOL_DIRTY);
err2: mpool_put(t->bt_mp, l, 0);
mpool_put(t->bt_mp, r, 0);
__dbpanic(t->bt_dbp);
return (RET_ERROR);
}
/*
* BT_PAGE -- Split a non-root page of a btree.
*
* Parameters:
* t: tree
* h: root page
* lp: pointer to left page pointer
* rp: pointer to right page pointer
* skip: pointer to index to leave open
* ilen: insert length
*
* Returns:
* Pointer to page in which to insert or NULL on error.
*/
static PAGE *
bt_page(BTREE *t, PAGE *h, PAGE **lp, PAGE **rp, indx_t *skip, size_t ilen)
{
PAGE *l, *r, *tp;
pgno_t npg;
#ifdef STATISTICS
++bt_split;
#endif
/* Put the new right page for the split into place. */
if ((r = __bt_new(t, &npg)) == NULL)
return (NULL);
r->pgno = npg;
r->lower = BTDATAOFF;
r->upper = t->bt_psize;
r->nextpg = h->nextpg;
r->prevpg = h->pgno;
r->flags = h->flags & P_TYPE;
/*
* If we're splitting the last page on a level because we're appending
* a key to it (skip is NEXTINDEX()), it's likely that the data is
* sorted. Adding an empty page on the side of the level is less work
* and can push the fill factor much higher than normal. If we're
* wrong it's no big deal, we'll just do the split the right way next
* time. It may look like it's equally easy to do a similar hack for
* reverse sorted data, that is, split the tree left, but it's not.
* Don't even try.
*/
if (h->nextpg == P_INVALID && *skip == NEXTINDEX(h)) {
#ifdef STATISTICS
++bt_sortsplit;
#endif
h->nextpg = r->pgno;
r->lower = BTDATAOFF + sizeof(indx_t);
*skip = 0;
*lp = h;
*rp = r;
return (r);
}
/* Put the new left page for the split into place. */
if ((l = (PAGE *)calloc(1, t->bt_psize)) == NULL) {
mpool_put(t->bt_mp, r, 0);
return (NULL);
}
l->pgno = h->pgno;
l->nextpg = r->pgno;
l->prevpg = h->prevpg;
l->lower = BTDATAOFF;
l->upper = t->bt_psize;
l->flags = h->flags & P_TYPE;
/* Fix up the previous pointer of the page after the split page. */
if (h->nextpg != P_INVALID) {
if ((tp = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) {
free(l);
/* XXX mpool_free(t->bt_mp, r->pgno); */
return (NULL);
}
tp->prevpg = r->pgno;
mpool_put(t->bt_mp, tp, MPOOL_DIRTY);
}
/*
* Split right. The key/data pairs aren't sorted in the btree page so
* it's simpler to copy the data from the split page onto two new pages
* instead of copying half the data to the right page and compacting
* the left page in place. Since the left page can't change, we have
* to swap the original and the allocated left page after the split.
*/
tp = bt_psplit(t, h, l, r, skip, ilen);
/* Move the new left page onto the old left page. */
memmove(h, l, t->bt_psize);
if (tp == l)
tp = h;
free(l);
*lp = h;
*rp = r;
return (tp);
}
/*
* BT_ROOT -- Split the root page of a btree.
*
* Parameters:
* t: tree
* h: root page
* lp: pointer to left page pointer
* rp: pointer to right page pointer
* skip: pointer to index to leave open
* ilen: insert length
*
* Returns:
* Pointer to page in which to insert or NULL on error.
*/
static PAGE *
bt_root(BTREE *t, PAGE *h, PAGE **lp, PAGE **rp, indx_t *skip, size_t ilen)
{
PAGE *l, *r, *tp;
pgno_t lnpg, rnpg;
#ifdef STATISTICS
++bt_split;
++bt_rootsplit;
#endif
/* Put the new left and right pages for the split into place. */
if ((l = __bt_new(t, &lnpg)) == NULL ||
(r = __bt_new(t, &rnpg)) == NULL)
return (NULL);
l->pgno = lnpg;
r->pgno = rnpg;
l->nextpg = r->pgno;
r->prevpg = l->pgno;
l->prevpg = r->nextpg = P_INVALID;
l->lower = r->lower = BTDATAOFF;
l->upper = r->upper = t->bt_psize;
l->flags = r->flags = h->flags & P_TYPE;
/* Split the root page. */
tp = bt_psplit(t, h, l, r, skip, ilen);
*lp = l;
*rp = r;
return (tp);
}
/*
* BT_RROOT -- Fix up the recno root page after it has been split.
*
* Parameters:
* t: tree
* h: root page
* l: left page
* r: right page
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
static int
bt_rroot(BTREE *t, PAGE *h, PAGE *l, PAGE *r)
{
char *dest;
/* Insert the left and right keys, set the header information. */
h->linp[0] = h->upper = t->bt_psize - NRINTERNAL;
dest = (char *)h + h->upper;
WR_RINTERNAL(dest,
l->flags & P_RLEAF ? NEXTINDEX(l) : rec_total(l), l->pgno);
h->linp[1] = h->upper -= NRINTERNAL;
dest = (char *)h + h->upper;
WR_RINTERNAL(dest,
r->flags & P_RLEAF ? NEXTINDEX(r) : rec_total(r), r->pgno);
h->lower = BTDATAOFF + 2 * sizeof(indx_t);
/* Unpin the root page, set to recno internal page. */
h->flags &= ~P_TYPE;
h->flags |= P_RINTERNAL;
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
return (RET_SUCCESS);
}
/*
* BT_BROOT -- Fix up the btree root page after it has been split.
*
* Parameters:
* t: tree
* h: root page
* l: left page
* r: right page
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
static int
bt_broot(BTREE *t, PAGE *h, PAGE *l, PAGE *r)
{
BINTERNAL *bi;
BLEAF *bl;
u_int32_t nbytes;
char *dest;
/*
* If the root page was a leaf page, change it into an internal page.
* We copy the key we split on (but not the key's data, in the case of
* a leaf page) to the new root page.
*
* The btree comparison code guarantees that the left-most key on any
* level of the tree is never used, so it doesn't need to be filled in.
*/
nbytes = NBINTERNAL(0);
h->linp[0] = h->upper = t->bt_psize - nbytes;
dest = (char *)h + h->upper;
WR_BINTERNAL(dest, 0, l->pgno, 0);
switch (h->flags & P_TYPE) {
case P_BLEAF:
bl = GETBLEAF(r, 0);
nbytes = NBINTERNAL(bl->ksize);
h->linp[1] = h->upper -= nbytes;
dest = (char *)h + h->upper;
WR_BINTERNAL(dest, bl->ksize, r->pgno, 0);
memmove(dest, bl->bytes, bl->ksize);
/*
* If the key is on an overflow page, mark the overflow chain
* so it isn't deleted when the leaf copy of the key is deleted.
*/
if (bl->flags & P_BIGKEY &&
bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR)
return (RET_ERROR);
break;
case P_BINTERNAL:
bi = GETBINTERNAL(r, 0);
nbytes = NBINTERNAL(bi->ksize);
h->linp[1] = h->upper -= nbytes;
dest = (char *)h + h->upper;
memmove(dest, bi, nbytes);
((BINTERNAL *)dest)->pgno = r->pgno;
break;
default:
abort();
}
/* There are two keys on the page. */
h->lower = BTDATAOFF + 2 * sizeof(indx_t);
/* Unpin the root page, set to btree internal page. */
h->flags &= ~P_TYPE;
h->flags |= P_BINTERNAL;
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
return (RET_SUCCESS);
}
/*
* BT_PSPLIT -- Do the real work of splitting the page.
*
* Parameters:
* t: tree
* h: page to be split
* l: page to put lower half of data
* r: page to put upper half of data
* pskip: pointer to index to leave open
* ilen: insert length
*
* Returns:
* Pointer to page in which to insert.
*/
static PAGE *
bt_psplit(BTREE *t, PAGE *h, PAGE *l, PAGE *r, indx_t *pskip, size_t ilen)
{
BINTERNAL *bi;
BLEAF *bl;
CURSOR *c;
RLEAF *rl;
PAGE *rval;
void *src;
indx_t full, half, nxt, off, skip, top, used;
u_int32_t nbytes;
int bigkeycnt, isbigkey;
/*
* Split the data to the left and right pages. Leave the skip index
* open. Additionally, make some effort not to split on an overflow
* key. This makes internal page processing faster and can save
* space as overflow keys used by internal pages are never deleted.
*/
bigkeycnt = 0;
skip = *pskip;
full = t->bt_psize - BTDATAOFF;
half = full / 2;
used = 0;
for (nxt = off = 0, top = NEXTINDEX(h); nxt < top; ++off) {
if (skip == off) {
nbytes = ilen;
isbigkey = 0; /* XXX: not really known. */
} else
switch (h->flags & P_TYPE) {
case P_BINTERNAL:
src = bi = GETBINTERNAL(h, nxt);
nbytes = NBINTERNAL(bi->ksize);
isbigkey = bi->flags & P_BIGKEY;
break;
case P_BLEAF:
src = bl = GETBLEAF(h, nxt);
nbytes = NBLEAF(bl);
isbigkey = bl->flags & P_BIGKEY;
break;
case P_RINTERNAL:
src = GETRINTERNAL(h, nxt);
nbytes = NRINTERNAL;
isbigkey = 0;
break;
case P_RLEAF:
src = rl = GETRLEAF(h, nxt);
nbytes = NRLEAF(rl);
isbigkey = 0;
break;
default:
abort();
}
/*
* If the key/data pairs are substantial fractions of the max
* possible size for the page, it's possible to get situations
* where we decide to try and copy too much onto the left page.
* Make sure that doesn't happen.
*/
if ((skip <= off && used + nbytes + sizeof(indx_t) >= full) ||
nxt == top - 1) {
--off;
break;
}
/* Copy the key/data pair, if not the skipped index. */
if (skip != off) {
++nxt;
l->linp[off] = l->upper -= nbytes;
memmove((char *)l + l->upper, src, nbytes);
}
used += nbytes + sizeof(indx_t);
if (used >= half) {
if (!isbigkey || bigkeycnt == 3)
break;
else
++bigkeycnt;
}
}
/*
* Off is the last offset that's valid for the left page.
* Nxt is the first offset to be placed on the right page.
*/
l->lower += (off + 1) * sizeof(indx_t);
/*
* If splitting the page that the cursor was on, the cursor has to be
* adjusted to point to the same record as before the split. If the
* cursor is at or past the skipped slot, the cursor is incremented by
* one. If the cursor is on the right page, it is decremented by the
* number of records split to the left page.
*/
c = &t->bt_cursor;
if (F_ISSET(c, CURS_INIT) && c->pg.pgno == h->pgno) {
if (c->pg.index >= skip)
++c->pg.index;
if (c->pg.index < nxt) /* Left page. */
c->pg.pgno = l->pgno;
else { /* Right page. */
c->pg.pgno = r->pgno;
c->pg.index -= nxt;
}
}
/*
* If the skipped index was on the left page, just return that page.
* Otherwise, adjust the skip index to reflect the new position on
* the right page.
*/
if (skip <= off) {
skip = MAX_PAGE_OFFSET;
rval = l;
} else {
rval = r;
*pskip -= nxt;
}
for (off = 0; nxt < top; ++off) {
if (skip == nxt) {
++off;
skip = MAX_PAGE_OFFSET;
}
switch (h->flags & P_TYPE) {
case P_BINTERNAL:
src = bi = GETBINTERNAL(h, nxt);
nbytes = NBINTERNAL(bi->ksize);
break;
case P_BLEAF:
src = bl = GETBLEAF(h, nxt);
nbytes = NBLEAF(bl);
break;
case P_RINTERNAL:
src = GETRINTERNAL(h, nxt);
nbytes = NRINTERNAL;
break;
case P_RLEAF:
src = rl = GETRLEAF(h, nxt);
nbytes = NRLEAF(rl);
break;
default:
abort();
}
++nxt;
r->linp[off] = r->upper -= nbytes;
memmove((char *)r + r->upper, src, nbytes);
}
r->lower += off * sizeof(indx_t);
/* If the key is being appended to the page, adjust the index. */
if (skip == top)
r->lower += sizeof(indx_t);
return (rval);
}
/*
* BT_PRESERVE -- Mark a chain of pages as used by an internal node.
*
* Chains of indirect blocks pointed to by leaf nodes get reclaimed when the
* record that references them gets deleted. Chains pointed to by internal
* pages never get deleted. This routine marks a chain as pointed to by an
* internal page.
*
* Parameters:
* t: tree
* pg: page number of first page in the chain.
*
* Returns:
* RET_SUCCESS, RET_ERROR.
*/
static int
bt_preserve(BTREE *t, pgno_t pg)
{
PAGE *h;
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
return (RET_ERROR);
h->flags |= P_PRESERVE;
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
return (RET_SUCCESS);
}
/*
* REC_TOTAL -- Return the number of recno entries below a page.
*
* Parameters:
* h: page
*
* Returns:
* The number of recno entries below a page.
*
* XXX
* These values could be set by the bt_psplit routine. The problem is that the
* entry has to be popped off of the stack etc. or the values have to be passed
* all the way back to bt_split/bt_rroot and it's not very clean.
*/
static recno_t
rec_total(PAGE *h)
{
recno_t recs;
indx_t nxt, top;
for (recs = 0, nxt = 0, top = NEXTINDEX(h); nxt < top; ++nxt)
recs += GETRINTERNAL(h, nxt)->nrecs;
return (recs);
}

View File

@ -0,0 +1,248 @@
#include "port_before.h"
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)bt_utils.c 8.8 (Berkeley) 7/20/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <db.h>
#include "btree.h"
/*
* __bt_ret --
* Build return key/data pair.
*
* Parameters:
* t: tree
* e: key/data pair to be returned
* key: user's key structure (NULL if not to be filled in)
* rkey: memory area to hold key
* data: user's data structure (NULL if not to be filled in)
* rdata: memory area to hold data
* copy: always copy the key/data item
*
* Returns:
* RET_SUCCESS, RET_ERROR.
*/
int
__bt_ret(BTREE *t, EPG *e, DBT *key, DBT *rkey, DBT *data, DBT *rdata, int copy)
{
BLEAF *bl;
void *p;
bl = GETBLEAF(e->page, e->index);
/*
* We must copy big keys/data to make them contigous. Otherwise,
* leave the page pinned and don't copy unless the user specified
* concurrent access.
*/
if (key == NULL)
goto dataonly;
if (bl->flags & P_BIGKEY) {
if (__ovfl_get(t, bl->bytes,
&key->size, &rkey->data, &rkey->size))
return (RET_ERROR);
key->data = rkey->data;
} else if (copy || F_ISSET(t, B_DB_LOCK)) {
if (bl->ksize > rkey->size) {
p = realloc(rkey->data, bl->ksize);
if (p == NULL)
return (RET_ERROR);
rkey->data = p;
rkey->size = bl->ksize;
}
memmove(rkey->data, bl->bytes, bl->ksize);
key->size = bl->ksize;
key->data = rkey->data;
} else {
key->size = bl->ksize;
key->data = bl->bytes;
}
dataonly:
if (data == NULL)
return (RET_SUCCESS);
if (bl->flags & P_BIGDATA) {
if (__ovfl_get(t, bl->bytes + bl->ksize,
&data->size, &rdata->data, &rdata->size))
return (RET_ERROR);
data->data = rdata->data;
} else if (copy || F_ISSET(t, B_DB_LOCK)) {
/* Use +1 in case the first record retrieved is 0 length. */
if (bl->dsize + 1 > rdata->size) {
p = realloc(rdata->data, bl->dsize + 1);
if (p == NULL)
return (RET_ERROR);
rdata->data = p;
rdata->size = bl->dsize + 1;
}
memmove(rdata->data, bl->bytes + bl->ksize, bl->dsize);
data->size = bl->dsize;
data->data = rdata->data;
} else {
data->size = bl->dsize;
data->data = bl->bytes + bl->ksize;
}
return (RET_SUCCESS);
}
/*
* __BT_CMP -- Compare a key to a given record.
*
* Parameters:
* t: tree
* k1: DBT pointer of first arg to comparison
* e: pointer to EPG for comparison
*
* Returns:
* < 0 if k1 is < record
* = 0 if k1 is = record
* > 0 if k1 is > record
*/
int
__bt_cmp(BTREE *t, const DBT *k1, EPG *e)
{
BINTERNAL *bi;
BLEAF *bl;
DBT k2;
PAGE *h;
void *bigkey;
/*
* The left-most key on internal pages, at any level of the tree, is
* guaranteed by the following code to be less than any user key.
* This saves us from having to update the leftmost key on an internal
* page when the user inserts a new key in the tree smaller than
* anything we've yet seen.
*/
h = e->page;
if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & P_BLEAF))
return (1);
bigkey = NULL;
if (h->flags & P_BLEAF) {
bl = GETBLEAF(h, e->index);
if (bl->flags & P_BIGKEY)
bigkey = bl->bytes;
else {
k2.data = bl->bytes;
k2.size = bl->ksize;
}
} else {
bi = GETBINTERNAL(h, e->index);
if (bi->flags & P_BIGKEY)
bigkey = bi->bytes;
else {
k2.data = bi->bytes;
k2.size = bi->ksize;
}
}
if (bigkey) {
if (__ovfl_get(t, bigkey,
&k2.size, &t->bt_rdata.data, &t->bt_rdata.size))
return (RET_ERROR);
k2.data = t->bt_rdata.data;
}
return ((*t->bt_cmp)(k1, &k2));
}
/*
* __BT_DEFCMP -- Default comparison routine.
*
* Parameters:
* a: DBT #1
* b: DBT #2
*
* Returns:
* < 0 if a is < b
* = 0 if a is = b
* > 0 if a is > b
*/
int
__bt_defcmp(const DBT *a, const DBT *b)
{
size_t len;
u_char *p1, *p2;
/*
* XXX
* If a size_t doesn't fit in an int, this routine can lose.
* What we need is an integral type which is guaranteed to be
* larger than a size_t, and there is no such thing.
*/
len = MIN(a->size, b->size);
for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2)
if (*p1 != *p2)
return ((int)*p1 - (int)*p2);
return ((int)a->size - (int)b->size);
}
/*
* __BT_DEFPFX -- Default prefix routine.
*
* Parameters:
* a: DBT #1
* b: DBT #2
*
* Returns:
* Number of bytes needed to distinguish b from a.
*/
size_t
__bt_defpfx(const DBT *a, const DBT *b)
{
u_char *p1, *p2;
size_t cnt, len;
cnt = 1;
len = MIN(a->size, b->size);
for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2, ++cnt)
if (*p1 != *p2)
return (cnt);
/* a->size must be <= b->size, or they wouldn't be in this order. */
return (a->size < b->size ? a->size + 1 : a->size);
}

View File

@ -0,0 +1,380 @@
/*-
* Copyright (c) 1991, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)btree.h 8.11 (Berkeley) 8/17/94
* $FreeBSD$
*/
/* Macros to set/clear/test flags. */
#define F_SET(p, f) (p)->flags |= (f)
#define F_CLR(p, f) (p)->flags &= ~(f)
#define F_ISSET(p, f) ((p)->flags & (f))
#include <mpool.h>
#define DEFMINKEYPAGE (2) /* Minimum keys per page */
#define MINCACHE (5) /* Minimum cached pages */
#define MINPSIZE (512) /* Minimum page size */
/*
* Page 0 of a btree file contains a copy of the meta-data. This page is also
* used as an out-of-band page, i.e. page pointers that point to nowhere point
* to page 0. Page 1 is the root of the btree.
*/
#define P_INVALID 0 /* Invalid tree page number. */
#define P_META 0 /* Tree metadata page number. */
#define P_ROOT 1 /* Tree root page number. */
/*
* There are five page layouts in the btree: btree internal pages (BINTERNAL),
* btree leaf pages (BLEAF), recno internal pages (RINTERNAL), recno leaf pages
* (RLEAF) and overflow pages. All five page types have a page header (PAGE).
* This implementation requires that values within structures NOT be padded.
* (ANSI C permits random padding.) If your compiler pads randomly you'll have
* to do some work to get this package to run.
*/
typedef struct _page {
pgno_t pgno; /* this page's page number */
pgno_t prevpg; /* left sibling */
pgno_t nextpg; /* right sibling */
#define P_BINTERNAL 0x01 /* btree internal page */
#define P_BLEAF 0x02 /* leaf page */
#define P_OVERFLOW 0x04 /* overflow page */
#define P_RINTERNAL 0x08 /* recno internal page */
#define P_RLEAF 0x10 /* leaf page */
#define P_TYPE 0x1f /* type mask */
#define P_PRESERVE 0x20 /* never delete this chain of pages */
u_int32_t flags;
indx_t lower; /* lower bound of free space on page */
indx_t upper; /* upper bound of free space on page */
indx_t linp[1]; /* indx_t-aligned VAR. LENGTH DATA */
} PAGE;
/* First and next index. */
#define BTDATAOFF \
(sizeof(pgno_t) + sizeof(pgno_t) + sizeof(pgno_t) + \
sizeof(u_int32_t) + sizeof(indx_t) + sizeof(indx_t))
#define NEXTINDEX(p) (((p)->lower - BTDATAOFF) / sizeof(indx_t))
/*
* For pages other than overflow pages, there is an array of offsets into the
* rest of the page immediately following the page header. Each offset is to
* an item which is unique to the type of page. The h_lower offset is just
* past the last filled-in index. The h_upper offset is the first item on the
* page. Offsets are from the beginning of the page.
*
* If an item is too big to store on a single page, a flag is set and the item
* is a { page, size } pair such that the page is the first page of an overflow
* chain with size bytes of item. Overflow pages are simply bytes without any
* external structure.
*
* The page number and size fields in the items are pgno_t-aligned so they can
* be manipulated without copying. (This presumes that 32 bit items can be
* manipulated on this system.)
*/
#define LALIGN(n) (((n) + sizeof(pgno_t) - 1) & ~(sizeof(pgno_t) - 1))
#define NOVFLSIZE (sizeof(pgno_t) + sizeof(u_int32_t))
/*
* For the btree internal pages, the item is a key. BINTERNALs are {key, pgno}
* pairs, such that the key compares less than or equal to all of the records
* on that page. For a tree without duplicate keys, an internal page with two
* consecutive keys, a and b, will have all records greater than or equal to a
* and less than b stored on the page associated with a. Duplicate keys are
* somewhat special and can cause duplicate internal and leaf page records and
* some minor modifications of the above rule.
*/
typedef struct _binternal {
u_int32_t ksize; /* key size */
pgno_t pgno; /* page number stored on */
#define P_BIGDATA 0x01 /* overflow data */
#define P_BIGKEY 0x02 /* overflow key */
u_char flags;
char bytes[1]; /* data */
} BINTERNAL;
/* Get the page's BINTERNAL structure at index indx. */
#define GETBINTERNAL(pg, indx) \
((BINTERNAL *)((char *)(pg) + (pg)->linp[indx]))
/* Get the number of bytes in the entry. */
#define NBINTERNAL(len) \
LALIGN(sizeof(u_int32_t) + sizeof(pgno_t) + sizeof(u_char) + (len))
/* Copy a BINTERNAL entry to the page. */
#define WR_BINTERNAL(p, size, pgno, flags) { \
*(u_int32_t *)p = size; \
p += sizeof(u_int32_t); \
*(pgno_t *)p = pgno; \
p += sizeof(pgno_t); \
*(u_char *)p = flags; \
p += sizeof(u_char); \
}
/*
* For the recno internal pages, the item is a page number with the number of
* keys found on that page and below.
*/
typedef struct _rinternal {
recno_t nrecs; /* number of records */
pgno_t pgno; /* page number stored below */
} RINTERNAL;
/* Get the page's RINTERNAL structure at index indx. */
#define GETRINTERNAL(pg, indx) \
((RINTERNAL *)((char *)(pg) + (pg)->linp[indx]))
/* Get the number of bytes in the entry. */
#define NRINTERNAL \
LALIGN(sizeof(recno_t) + sizeof(pgno_t))
/* Copy a RINTERAL entry to the page. */
#define WR_RINTERNAL(p, nrecs, pgno) { \
*(recno_t *)p = nrecs; \
p += sizeof(recno_t); \
*(pgno_t *)p = pgno; \
}
/* For the btree leaf pages, the item is a key and data pair. */
typedef struct _bleaf {
u_int32_t ksize; /* size of key */
u_int32_t dsize; /* size of data */
u_char flags; /* P_BIGDATA, P_BIGKEY */
char bytes[1]; /* data */
} BLEAF;
/* Get the page's BLEAF structure at index indx. */
#define GETBLEAF(pg, indx) \
((BLEAF *)((char *)(pg) + (pg)->linp[indx]))
/* Get the number of bytes in the entry. */
#define NBLEAF(p) NBLEAFDBT((p)->ksize, (p)->dsize)
/* Get the number of bytes in the user's key/data pair. */
#define NBLEAFDBT(ksize, dsize) \
LALIGN(sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(u_char) + \
(ksize) + (dsize))
/* Copy a BLEAF entry to the page. */
#define WR_BLEAF(p, key, data, flags) { \
*(u_int32_t *)p = key->size; \
p += sizeof(u_int32_t); \
*(u_int32_t *)p = data->size; \
p += sizeof(u_int32_t); \
*(u_char *)p = flags; \
p += sizeof(u_char); \
memmove(p, key->data, key->size); \
p += key->size; \
memmove(p, data->data, data->size); \
}
/* For the recno leaf pages, the item is a data entry. */
typedef struct _rleaf {
u_int32_t dsize; /* size of data */
u_char flags; /* P_BIGDATA */
char bytes[1];
} RLEAF;
/* Get the page's RLEAF structure at index indx. */
#define GETRLEAF(pg, indx) \
((RLEAF *)((char *)(pg) + (pg)->linp[indx]))
/* Get the number of bytes in the entry. */
#define NRLEAF(p) NRLEAFDBT((p)->dsize)
/* Get the number of bytes from the user's data. */
#define NRLEAFDBT(dsize) \
LALIGN(sizeof(u_int32_t) + sizeof(u_char) + (dsize))
/* Copy a RLEAF entry to the page. */
#define WR_RLEAF(p, data, flags) { \
*(u_int32_t *)p = data->size; \
p += sizeof(u_int32_t); \
*(u_char *)p = flags; \
p += sizeof(u_char); \
memmove(p, data->data, data->size); \
}
/*
* A record in the tree is either a pointer to a page and an index in the page
* or a page number and an index. These structures are used as a cursor, stack
* entry and search returns as well as to pass records to other routines.
*
* One comment about searches. Internal page searches must find the largest
* record less than key in the tree so that descents work. Leaf page searches
* must find the smallest record greater than key so that the returned index
* is the record's correct position for insertion.
*/
typedef struct _epgno {
pgno_t pgno; /* the page number */
indx_t index; /* the index on the page */
} EPGNO;
typedef struct _epg {
PAGE *page; /* the (pinned) page */
indx_t index; /* the index on the page */
} EPG;
/*
* About cursors. The cursor (and the page that contained the key/data pair
* that it referenced) can be deleted, which makes things a bit tricky. If
* there are no duplicates of the cursor key in the tree (i.e. B_NODUPS is set
* or there simply aren't any duplicates of the key) we copy the key that it
* referenced when it's deleted, and reacquire a new cursor key if the cursor
* is used again. If there are duplicates keys, we move to the next/previous
* key, and set a flag so that we know what happened. NOTE: if duplicate (to
* the cursor) keys are added to the tree during this process, it is undefined
* if they will be returned or not in a cursor scan.
*
* The flags determine the possible states of the cursor:
*
* CURS_INIT The cursor references *something*.
* CURS_ACQUIRE The cursor was deleted, and a key has been saved so that
* we can reacquire the right position in the tree.
* CURS_AFTER, CURS_BEFORE
* The cursor was deleted, and now references a key/data pair
* that has not yet been returned, either before or after the
* deleted key/data pair.
* XXX
* This structure is broken out so that we can eventually offer multiple
* cursors as part of the DB interface.
*/
typedef struct _cursor {
EPGNO pg; /* B: Saved tree reference. */
DBT key; /* B: Saved key, or key.data == NULL. */
recno_t rcursor; /* R: recno cursor (1-based) */
#define CURS_ACQUIRE 0x01 /* B: Cursor needs to be reacquired. */
#define CURS_AFTER 0x02 /* B: Unreturned cursor after key. */
#define CURS_BEFORE 0x04 /* B: Unreturned cursor before key. */
#define CURS_INIT 0x08 /* RB: Cursor initialized. */
u_int8_t flags;
} CURSOR;
/*
* The metadata of the tree. The nrecs field is used only by the RECNO code.
* This is because the btree doesn't really need it and it requires that every
* put or delete call modify the metadata.
*/
typedef struct _btmeta {
u_int32_t magic; /* magic number */
u_int32_t version; /* version */
u_int32_t psize; /* page size */
u_int32_t free; /* page number of first free page */
u_int32_t nrecs; /* R: number of records */
#define SAVEMETA (B_NODUPS | R_RECNO)
u_int32_t flags; /* bt_flags & SAVEMETA */
} BTMETA;
/* The in-memory btree/recno data structure. */
typedef struct _btree {
MPOOL *bt_mp; /* memory pool cookie */
DB *bt_dbp; /* pointer to enclosing DB */
EPG bt_cur; /* current (pinned) page */
PAGE *bt_pinned; /* page pinned across calls */
CURSOR bt_cursor; /* cursor */
#define BT_PUSH(t, p, i) { \
t->bt_sp->pgno = p; \
t->bt_sp->index = i; \
++t->bt_sp; \
}
#define BT_POP(t) (t->bt_sp == t->bt_stack ? NULL : --t->bt_sp)
#define BT_CLR(t) (t->bt_sp = t->bt_stack)
EPGNO bt_stack[50]; /* stack of parent pages */
EPGNO *bt_sp; /* current stack pointer */
DBT bt_rkey; /* returned key */
DBT bt_rdata; /* returned data */
int bt_fd; /* tree file descriptor */
pgno_t bt_free; /* next free page */
u_int32_t bt_psize; /* page size */
indx_t bt_ovflsize; /* cut-off for key/data overflow */
int bt_lorder; /* byte order */
/* sorted order */
enum { NOT, BACK, FORWARD } bt_order;
EPGNO bt_last; /* last insert */
/* B: key comparison function */
int (*bt_cmp)(const DBT *, const DBT *);
/* B: prefix comparison function */
size_t (*bt_pfx)(const DBT *, const DBT *);
/* R: recno input function */
int (*bt_irec)(struct _btree *, recno_t);
FILE *bt_rfp; /* R: record FILE pointer */
int bt_rfd; /* R: record file descriptor */
caddr_t bt_cmap; /* R: current point in mapped space */
caddr_t bt_smap; /* R: start of mapped space */
caddr_t bt_emap; /* R: end of mapped space */
size_t bt_msize; /* R: size of mapped region. */
recno_t bt_nrecs; /* R: number of records */
size_t bt_reclen; /* R: fixed record length */
u_char bt_bval; /* R: delimiting byte/pad character */
/*
* NB:
* B_NODUPS and R_RECNO are stored on disk, and may not be changed.
*/
#define B_INMEM 0x00001 /* in-memory tree */
#define B_METADIRTY 0x00002 /* need to write metadata */
#define B_MODIFIED 0x00004 /* tree modified */
#define B_NEEDSWAP 0x00008 /* if byte order requires swapping */
#define B_RDONLY 0x00010 /* read-only tree */
#define B_NODUPS 0x00020 /* no duplicate keys permitted */
#define R_RECNO 0x00080 /* record oriented tree */
#define R_CLOSEFP 0x00040 /* opened a file pointer */
#define R_EOF 0x00100 /* end of input file reached. */
#define R_FIXLEN 0x00200 /* fixed length records */
#define R_MEMMAPPED 0x00400 /* memory mapped file. */
#define R_INMEM 0x00800 /* in-memory file */
#define R_MODIFIED 0x01000 /* modified file */
#define R_RDONLY 0x02000 /* read-only file */
#define B_DB_LOCK 0x04000 /* DB_LOCK specified. */
#define B_DB_SHMEM 0x08000 /* DB_SHMEM specified. */
#define B_DB_TXN 0x10000 /* DB_TXN specified. */
u_int32_t flags;
} BTREE;
#include "extern.h"

View File

@ -0,0 +1,67 @@
/*-
* Copyright (c) 1991, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)extern.h 8.10 (Berkeley) 7/20/94
* $FreeBSD$
*/
int __bt_close(DB *);
int __bt_cmp(BTREE *, const DBT *, EPG *);
int __bt_crsrdel(BTREE *, EPGNO *);
int __bt_defcmp(const DBT *, const DBT *);
size_t __bt_defpfx(const DBT *, const DBT *);
int __bt_delete(const DB *, const DBT *, u_int);
int __bt_dleaf(BTREE *, const DBT *, PAGE *, u_int);
int __bt_fd(const DB *);
int __bt_free(BTREE *, PAGE *);
int __bt_get(const DB *, const DBT *, DBT *, u_int);
PAGE *__bt_new(BTREE *, pgno_t *);
void __bt_pgin(void *, pgno_t, void *);
void __bt_pgout(void *, pgno_t, void *);
int __bt_push(BTREE *, pgno_t, int);
int __bt_put(const DB *dbp, DBT *, const DBT *, u_int);
int __bt_ret(BTREE *, EPG *, DBT *, DBT *, DBT *, DBT *, int);
EPG *__bt_search(BTREE *, const DBT *, int *);
int __bt_seq(const DB *, DBT *, DBT *, u_int);
void __bt_setcur(BTREE *, pgno_t, u_int);
int __bt_split(BTREE *, PAGE *,
const DBT *, const DBT *, int, size_t, u_int32_t);
int __bt_sync(const DB *, u_int);
int __ovfl_delete(BTREE *, void *);
int __ovfl_get(BTREE *, void *, size_t *, void **, size_t *);
int __ovfl_put(BTREE *, const DBT *, pgno_t *);
#ifdef DEBUG
void __bt_dnpage(DB *, pgno_t);
void __bt_dpage(PAGE *);
void __bt_dump(DB *);
#endif
#ifdef STATISTICS
void __bt_stat(DB *);
#endif

View File

@ -0,0 +1,96 @@
#include "port_before.h"
/*-
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)db.c 8.4 (Berkeley) 2/21/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <errno.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdio.h>
#include <db.h>
static int __dberr(void);
DB *
dbopen(const char *fname, int flags, int mode, DBTYPE type, const void *openinfo)
{
#define DB_FLAGS (DB_LOCK | DB_SHMEM | DB_TXN)
#define USE_OPEN_FLAGS \
(O_CREAT | O_EXCL | O_EXLOCK | O_NOFOLLOW | O_NONBLOCK | \
O_RDONLY | O_RDWR | O_SHLOCK | O_SYNC | O_TRUNC)
if ((flags & ~(USE_OPEN_FLAGS | DB_FLAGS)) == 0)
switch (type) {
case DB_BTREE:
return (__bt_open(fname, flags & USE_OPEN_FLAGS,
mode, openinfo, flags & DB_FLAGS));
case DB_HASH:
return (__hash_open(fname, flags & USE_OPEN_FLAGS,
mode, openinfo, flags & DB_FLAGS));
case DB_RECNO:
return (__rec_open(fname, flags & USE_OPEN_FLAGS,
mode, openinfo, flags & DB_FLAGS));
}
errno = EINVAL;
return (NULL);
}
static int
__dberr(void)
{
return (RET_ERROR);
}
/*
* __DBPANIC -- Stop.
*
* Parameters:
* dbp: pointer to the DB structure.
*/
void
__dbpanic(DB *dbp)
{
/* The only thing that can succeed is a close. */
dbp->del = (int (*)(const struct __db *, const DBT*, u_int))__dberr;
dbp->fd = (int (*)(const struct __db *))__dberr;
dbp->get = (int (*)(const struct __db *, const DBT*, DBT *, u_int))__dberr;
dbp->put = (int (*)(const struct __db *, DBT *, const DBT *, u_int))__dberr;
dbp->seq = (int (*)(const struct __db *, DBT *, DBT *, u_int))__dberr;
dbp->sync = (int (*)(const struct __db *, u_int))__dberr;
}

View File

@ -0,0 +1,43 @@
/*-
* Copyright (c) 2009 Xin LI <delphij@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <db.h>
#include <mpool.h>
void *__mpool_new__44bsd(MPOOL *, pgno_t *);
void *
__mpool_new__44bsd(MPOOL *mp, pgno_t *pgnoaddr)
{
return (mpool_new(mp, pgnoaddr, MPOOL_PAGE_NEXT));
}
__sym_compat(mpool_new, __mpool_new_44bsd, FBSD_1.0);

View File

@ -0,0 +1,495 @@
#include "port_before.h"
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)mpool.c 8.7 (Berkeley) 11/2/95";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "namespace.h"
#include <sys/param.h>
#include <sys/queue.h>
#include <sys/stat.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "un-namespace.h"
#include <db.h>
#define __MPOOLINTERFACE_PRIVATE
#include <mpool.h>
static BKT *mpool_bkt(MPOOL *);
static BKT *mpool_look(MPOOL *, pgno_t);
static int mpool_write(MPOOL *, BKT *);
/*
* mpool_open --
* Initialize a memory pool.
*/
/* ARGSUSED */
MPOOL *
mpool_open(void *key, int fd, pgno_t pagesize, pgno_t maxcache)
{
struct stat sb;
MPOOL *mp;
int entry;
/*
* Get information about the file.
*
* XXX
* We don't currently handle pipes, although we should.
*/
if (_fstat(fd, &sb))
return (NULL);
if (!S_ISREG(sb.st_mode)) {
errno = ESPIPE;
return (NULL);
}
/* Allocate and initialize the MPOOL cookie. */
if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
return (NULL);
TAILQ_INIT(&mp->lqh);
for (entry = 0; entry < HASHSIZE; ++entry)
TAILQ_INIT(&mp->hqh[entry]);
mp->maxcache = maxcache;
mp->npages = sb.st_size / pagesize;
mp->pagesize = pagesize;
mp->fd = fd;
return (mp);
}
/*
* mpool_filter --
* Initialize input/output filters.
*/
void
mpool_filter(MPOOL *mp, void (*pgin) (void *, pgno_t, void *),
void (*pgout) (void *, pgno_t, void *), void *pgcookie)
{
mp->pgin = pgin;
mp->pgout = pgout;
mp->pgcookie = pgcookie;
}
/*
* mpool_new --
* Get a new page of memory.
*/
void *
mpool_new(MPOOL *mp, pgno_t *pgnoaddr, u_int flags)
{
struct _hqh *head;
BKT *bp;
if (mp->npages == MAX_PAGE_NUMBER) {
(void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
abort();
}
#ifdef STATISTICS
++mp->pagenew;
#endif
/*
* Get a BKT from the cache. Assign a new page number, attach
* it to the head of the hash chain, the tail of the lru chain,
* and return.
*/
if ((bp = mpool_bkt(mp)) == NULL)
return (NULL);
if (flags == MPOOL_PAGE_REQUEST) {
mp->npages++;
bp->pgno = *pgnoaddr;
} else
bp->pgno = *pgnoaddr = mp->npages++;
bp->flags = MPOOL_PINNED | MPOOL_INUSE;
head = &mp->hqh[HASHKEY(bp->pgno)];
TAILQ_INSERT_HEAD(head, bp, hq);
TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
return (bp->page);
}
int
mpool_delete(MPOOL *mp, void *page)
{
struct _hqh *head;
BKT *bp;
bp = (BKT *)((char *)page - sizeof(BKT));
#ifdef DEBUG
if (!(bp->flags & MPOOL_PINNED)) {
(void)fprintf(stderr,
"mpool_delete: page %d not pinned\n", bp->pgno);
abort();
}
#endif
/* Remove from the hash and lru queues. */
head = &mp->hqh[HASHKEY(bp->pgno)];
TAILQ_REMOVE(head, bp, hq);
TAILQ_REMOVE(&mp->lqh, bp, q);
free(bp);
mp->curcache--;
return (RET_SUCCESS);
}
/*
* mpool_get
* Get a page.
*/
/* ARGSUSED */
void *
mpool_get(MPOOL *mp, pgno_t pgno,
u_int flags) /* XXX not used? */
{
struct _hqh *head;
BKT *bp;
off_t off;
int nr;
#ifdef STATISTICS
++mp->pageget;
#endif
/* Check for a page that is cached. */
if ((bp = mpool_look(mp, pgno)) != NULL) {
#ifdef DEBUG
if (!(flags & MPOOL_IGNOREPIN) && bp->flags & MPOOL_PINNED) {
(void)fprintf(stderr,
"mpool_get: page %d already pinned\n", bp->pgno);
abort();
}
#endif
/*
* Move the page to the head of the hash chain and the tail
* of the lru chain.
*/
head = &mp->hqh[HASHKEY(bp->pgno)];
TAILQ_REMOVE(head, bp, hq);
TAILQ_INSERT_HEAD(head, bp, hq);
TAILQ_REMOVE(&mp->lqh, bp, q);
TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
/* Return a pinned page. */
bp->flags |= MPOOL_PINNED;
return (bp->page);
}
/* Get a page from the cache. */
if ((bp = mpool_bkt(mp)) == NULL)
return (NULL);
/* Read in the contents. */
off = mp->pagesize * pgno;
if ((nr = pread(mp->fd, bp->page, mp->pagesize, off)) != (ssize_t)mp->pagesize) {
switch (nr) {
case -1:
/* errno is set for us by pread(). */
free(bp);
mp->curcache--;
return (NULL);
case 0:
/*
* A zero-length read means you need to create a
* new page.
*/
memset(bp->page, 0, mp->pagesize);
break;
default:
/* A partial read is definitely bad. */
free(bp);
mp->curcache--;
errno = EINVAL;
return (NULL);
}
}
#ifdef STATISTICS
++mp->pageread;
#endif
/* Set the page number, pin the page. */
bp->pgno = pgno;
if (!(flags & MPOOL_IGNOREPIN))
bp->flags = MPOOL_PINNED;
bp->flags |= MPOOL_INUSE;
/*
* Add the page to the head of the hash chain and the tail
* of the lru chain.
*/
head = &mp->hqh[HASHKEY(bp->pgno)];
TAILQ_INSERT_HEAD(head, bp, hq);
TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
/* Run through the user's filter. */
if (mp->pgin != NULL)
(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
return (bp->page);
}
/*
* mpool_put
* Return a page.
*/
/* ARGSUSED */
int
mpool_put(MPOOL *mp, void *page, u_int flags)
{
BKT *bp;
#ifdef STATISTICS
++mp->pageput;
#endif
bp = (BKT *)((char *)page - sizeof(BKT));
#ifdef DEBUG
if (!(bp->flags & MPOOL_PINNED)) {
(void)fprintf(stderr,
"mpool_put: page %d not pinned\n", bp->pgno);
abort();
}
#endif
bp->flags &= ~MPOOL_PINNED;
if (flags & MPOOL_DIRTY)
bp->flags |= flags & MPOOL_DIRTY;
return (RET_SUCCESS);
}
/*
* mpool_close
* Close the buffer pool.
*/
int
mpool_close(MPOOL *mp)
{
BKT *bp;
/* Free up any space allocated to the lru pages. */
while (!TAILQ_EMPTY(&mp->lqh)) {
bp = TAILQ_FIRST(&mp->lqh);
TAILQ_REMOVE(&mp->lqh, bp, q);
free(bp);
}
/* Free the MPOOL cookie. */
free(mp);
return (RET_SUCCESS);
}
/*
* mpool_sync
* Sync the pool to disk.
*/
int
mpool_sync(MPOOL *mp)
{
BKT *bp;
/* Walk the lru chain, flushing any dirty pages to disk. */
TAILQ_FOREACH(bp, &mp->lqh, q)
if (bp->flags & MPOOL_DIRTY &&
mpool_write(mp, bp) == RET_ERROR)
return (RET_ERROR);
/* Sync the file descriptor. */
return (_fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
}
/*
* mpool_bkt
* Get a page from the cache (or create one).
*/
static BKT *
mpool_bkt(MPOOL *mp)
{
struct _hqh *head;
BKT *bp;
/* If under the max cached, always create a new page. */
if (mp->curcache < mp->maxcache)
goto new;
/*
* If the cache is max'd out, walk the lru list for a buffer we
* can flush. If we find one, write it (if necessary) and take it
* off any lists. If we don't find anything we grow the cache anyway.
* The cache never shrinks.
*/
TAILQ_FOREACH(bp, &mp->lqh, q)
if (!(bp->flags & MPOOL_PINNED)) {
/* Flush if dirty. */
if (bp->flags & MPOOL_DIRTY &&
mpool_write(mp, bp) == RET_ERROR)
return (NULL);
#ifdef STATISTICS
++mp->pageflush;
#endif
/* Remove from the hash and lru queues. */
head = &mp->hqh[HASHKEY(bp->pgno)];
TAILQ_REMOVE(head, bp, hq);
TAILQ_REMOVE(&mp->lqh, bp, q);
#ifdef DEBUG
{ void *spage;
spage = bp->page;
memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
bp->page = spage;
}
#endif
bp->flags = 0;
return (bp);
}
new: if ((bp = (BKT *)calloc(1, sizeof(BKT) + mp->pagesize)) == NULL)
return (NULL);
#ifdef STATISTICS
++mp->pagealloc;
#endif
bp->page = (char *)bp + sizeof(BKT);
bp->flags = 0;
++mp->curcache;
return (bp);
}
/*
* mpool_write
* Write a page to disk.
*/
static int
mpool_write(MPOOL *mp, BKT *bp)
{
off_t off;
#ifdef STATISTICS
++mp->pagewrite;
#endif
/* Run through the user's filter. */
if (mp->pgout)
(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
off = mp->pagesize * bp->pgno;
if (pwrite(mp->fd, bp->page, mp->pagesize, off) != (ssize_t)mp->pagesize)
return (RET_ERROR);
/*
* Re-run through the input filter since this page may soon be
* accessed via the cache, and whatever the user's output filter
* did may screw things up if we don't let the input filter
* restore the in-core copy.
*/
if (mp->pgin)
(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
bp->flags &= ~MPOOL_DIRTY;
return (RET_SUCCESS);
}
/*
* mpool_look
* Lookup a page in the cache.
*/
static BKT *
mpool_look(MPOOL *mp, pgno_t pgno)
{
struct _hqh *head;
BKT *bp;
head = &mp->hqh[HASHKEY(pgno)];
TAILQ_FOREACH(bp, head, hq)
if ((bp->pgno == pgno) &&
((bp->flags & MPOOL_INUSE) == MPOOL_INUSE)) {
#ifdef STATISTICS
++mp->cachehit;
#endif
return (bp);
}
#ifdef STATISTICS
++mp->cachemiss;
#endif
return (NULL);
}
#ifdef STATISTICS
/*
* mpool_stat
* Print out cache statistics.
*/
void
mpool_stat(MPOOL *mp)
{
BKT *bp;
int cnt;
char *sep;
(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
(void)fprintf(stderr,
"page size %lu, cacheing %lu pages of %lu page max cache\n",
mp->pagesize, mp->curcache, mp->maxcache);
(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
mp->pageput, mp->pageget, mp->pagenew);
(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
mp->pagealloc, mp->pageflush);
if (mp->cachehit + mp->cachemiss)
(void)fprintf(stderr,
"%.0f%% cache hit rate (%lu hits, %lu misses)\n",
((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
* 100, mp->cachehit, mp->cachemiss);
(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
mp->pageread, mp->pagewrite);
sep = "";
cnt = 0;
TAILQ_FOREACH(bp, &mp->lqh, q) {
(void)fprintf(stderr, "%s%d", sep, bp->pgno);
if (bp->flags & MPOOL_DIRTY)
(void)fprintf(stderr, "d");
if (bp->flags & MPOOL_PINNED)
(void)fprintf(stderr, "P");
if (++cnt == 10) {
sep = "\n";
cnt = 0;
} else
sep = ", ";
}
(void)fprintf(stderr, "\n");
}
#endif

View File

@ -0,0 +1,51 @@
/*-
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)extern.h 8.3 (Berkeley) 6/4/94
* $FreeBSD$
*/
#include "../btree/extern.h"
int __rec_close(DB *);
int __rec_delete(const DB *, const DBT *, u_int);
int __rec_dleaf(BTREE *, PAGE *, u_int32_t);
int __rec_fd(const DB *);
int __rec_fmap(BTREE *, recno_t);
int __rec_fout(BTREE *);
int __rec_fpipe(BTREE *, recno_t);
int __rec_get(const DB *, const DBT *, DBT *, u_int);
int __rec_iput(BTREE *, recno_t, const DBT *, u_int);
int __rec_put(const DB *dbp, DBT *, const DBT *, u_int);
int __rec_ret(BTREE *, EPG *, recno_t, DBT *, DBT *);
EPG *__rec_search(BTREE *, recno_t, enum SRCHOP);
int __rec_seq(const DB *, DBT *, DBT *, u_int);
int __rec_sync(const DB *, u_int);
int __rec_vmap(BTREE *, recno_t);
int __rec_vout(BTREE *);
int __rec_vpipe(BTREE *, recno_t);

View File

@ -0,0 +1,184 @@
#include "port_before.h"
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)rec_close.c 8.6 (Berkeley) 8/18/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "namespace.h"
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/mman.h>
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <unistd.h>
#include "un-namespace.h"
#include <db.h>
#include "recno.h"
/*
* __REC_CLOSE -- Close a recno tree.
*
* Parameters:
* dbp: pointer to access method
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
int
__rec_close(DB *dbp)
{
BTREE *t;
int status;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
if (__rec_sync(dbp, 0) == RET_ERROR)
return (RET_ERROR);
/* Committed to closing. */
status = RET_SUCCESS;
if (F_ISSET(t, R_MEMMAPPED) && munmap(t->bt_smap, t->bt_msize))
status = RET_ERROR;
if (!F_ISSET(t, R_INMEM)) {
if (F_ISSET(t, R_CLOSEFP)) {
if (fclose(t->bt_rfp))
status = RET_ERROR;
} else {
if (_close(t->bt_rfd))
status = RET_ERROR;
}
}
if (__bt_close(dbp) == RET_ERROR)
status = RET_ERROR;
return (status);
}
/*
* __REC_SYNC -- sync the recno tree to disk.
*
* Parameters:
* dbp: pointer to access method
*
* Returns:
* RET_SUCCESS, RET_ERROR.
*/
int
__rec_sync(const DB *dbp, u_int flags)
{
struct iovec iov[2];
BTREE *t;
DBT data, key;
off_t off;
recno_t scursor, trec;
int status;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
if (flags == R_RECNOSYNC)
return (__bt_sync(dbp, 0));
if (F_ISSET(t, R_RDONLY | R_INMEM) || !F_ISSET(t, R_MODIFIED))
return (RET_SUCCESS);
/* Read any remaining records into the tree. */
if (!F_ISSET(t, R_EOF) && t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR)
return (RET_ERROR);
/* Rewind the file descriptor. */
if (lseek(t->bt_rfd, (off_t)0, SEEK_SET) != 0)
return (RET_ERROR);
/* Save the cursor. */
scursor = t->bt_cursor.rcursor;
key.size = sizeof(recno_t);
key.data = &trec;
if (F_ISSET(t, R_FIXLEN)) {
/*
* We assume that fixed length records are all fixed length.
* Any that aren't are either EINVAL'd or corrected by the
* record put code.
*/
status = (dbp->seq)(dbp, &key, &data, R_FIRST);
while (status == RET_SUCCESS) {
if (_write(t->bt_rfd, data.data, data.size) !=
(ssize_t)data.size)
return (RET_ERROR);
status = (dbp->seq)(dbp, &key, &data, R_NEXT);
}
} else {
iov[1].iov_base = &t->bt_bval;
iov[1].iov_len = 1;
status = (dbp->seq)(dbp, &key, &data, R_FIRST);
while (status == RET_SUCCESS) {
iov[0].iov_base = data.data;
iov[0].iov_len = data.size;
if (_writev(t->bt_rfd, iov, 2) != (ssize_t)(data.size + 1))
return (RET_ERROR);
status = (dbp->seq)(dbp, &key, &data, R_NEXT);
}
}
/* Restore the cursor. */
t->bt_cursor.rcursor = scursor;
if (status == RET_ERROR)
return (RET_ERROR);
if ((off = lseek(t->bt_rfd, (off_t)0, SEEK_CUR)) == -1)
return (RET_ERROR);
if (ftruncate(t->bt_rfd, off))
return (RET_ERROR);
F_CLR(t, R_MODIFIED);
return (RET_SUCCESS);
}

View File

@ -0,0 +1,189 @@
#include "port_before.h"
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)rec_delete.c 8.7 (Berkeley) 7/14/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <db.h>
#include "recno.h"
static int rec_rdelete(BTREE *, recno_t);
/*
* __REC_DELETE -- Delete the item(s) referenced by a key.
*
* Parameters:
* dbp: pointer to access method
* key: key to delete
* flags: R_CURSOR if deleting what the cursor references
*
* Returns:
* RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
*/
int
__rec_delete(const DB *dbp, const DBT *key, u_int flags)
{
BTREE *t;
recno_t nrec;
int status;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
switch(flags) {
case 0:
if ((nrec = *(recno_t *)key->data) == 0)
goto einval;
if (nrec > t->bt_nrecs)
return (RET_SPECIAL);
--nrec;
status = rec_rdelete(t, nrec);
break;
case R_CURSOR:
if (!F_ISSET(&t->bt_cursor, CURS_INIT))
goto einval;
if (t->bt_nrecs == 0)
return (RET_SPECIAL);
status = rec_rdelete(t, t->bt_cursor.rcursor - 1);
if (status == RET_SUCCESS)
--t->bt_cursor.rcursor;
break;
default:
einval: errno = EINVAL;
return (RET_ERROR);
}
if (status == RET_SUCCESS)
F_SET(t, B_MODIFIED | R_MODIFIED);
return (status);
}
/*
* REC_RDELETE -- Delete the data matching the specified key.
*
* Parameters:
* tree: tree
* nrec: record to delete
*
* Returns:
* RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
*/
static int
rec_rdelete(BTREE *t, recno_t nrec)
{
EPG *e;
PAGE *h;
int status;
/* Find the record; __rec_search pins the page. */
if ((e = __rec_search(t, nrec, SDELETE)) == NULL)
return (RET_ERROR);
/* Delete the record. */
h = e->page;
status = __rec_dleaf(t, h, e->index);
if (status != RET_SUCCESS) {
mpool_put(t->bt_mp, h, 0);
return (status);
}
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
return (RET_SUCCESS);
}
/*
* __REC_DLEAF -- Delete a single record from a recno leaf page.
*
* Parameters:
* t: tree
* idx: index on current page to delete
*
* Returns:
* RET_SUCCESS, RET_ERROR.
*/
int
__rec_dleaf(BTREE *t, PAGE *h, u_int32_t idx)
{
RLEAF *rl;
indx_t *ip, cnt, offset;
u_int32_t nbytes;
char *from;
void *to;
/*
* Delete a record from a recno leaf page. Internal records are never
* deleted from internal pages, regardless of the records that caused
* them to be added being deleted. Pages made empty by deletion are
* not reclaimed. They are, however, made available for reuse.
*
* Pack the remaining entries at the end of the page, shift the indices
* down, overwriting the deleted record and its index. If the record
* uses overflow pages, make them available for reuse.
*/
to = rl = GETRLEAF(h, idx);
if (rl->flags & P_BIGDATA && __ovfl_delete(t, rl->bytes) == RET_ERROR)
return (RET_ERROR);
nbytes = NRLEAF(rl);
/*
* Compress the key/data pairs. Compress and adjust the [BR]LEAF
* offsets. Reset the headers.
*/
from = (char *)h + h->upper;
memmove(from + nbytes, from, (char *)to - from);
h->upper += nbytes;
offset = h->linp[idx];
for (cnt = &h->linp[idx] - (ip = &h->linp[0]); cnt--; ++ip)
if (ip[0] < offset)
ip[0] += nbytes;
for (cnt = &h->linp[NEXTINDEX(h)] - ip; --cnt; ++ip)
ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
h->lower -= sizeof(indx_t);
--t->bt_nrecs;
return (RET_SUCCESS);
}

View File

@ -0,0 +1,293 @@
#include "port_before.h"
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)rec_get.c 8.9 (Berkeley) 8/18/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <errno.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <db.h>
#include "recno.h"
/*
* __REC_GET -- Get a record from the btree.
*
* Parameters:
* dbp: pointer to access method
* key: key to find
* data: data to return
* flag: currently unused
*
* Returns:
* RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
*/
int
__rec_get(const DB *dbp, const DBT *key, DBT *data, u_int flags)
{
BTREE *t;
EPG *e;
recno_t nrec;
int status;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
/* Get currently doesn't take any flags, and keys of 0 are illegal. */
if (flags || (nrec = *(recno_t *)key->data) == 0) {
errno = EINVAL;
return (RET_ERROR);
}
/*
* If we haven't seen this record yet, try to find it in the
* original file.
*/
if (nrec > t->bt_nrecs) {
if (F_ISSET(t, R_EOF | R_INMEM))
return (RET_SPECIAL);
if ((status = t->bt_irec(t, nrec)) != RET_SUCCESS)
return (status);
}
--nrec;
if ((e = __rec_search(t, nrec, SEARCH)) == NULL)
return (RET_ERROR);
status = __rec_ret(t, e, 0, NULL, data);
if (F_ISSET(t, B_DB_LOCK))
mpool_put(t->bt_mp, e->page, 0);
else
t->bt_pinned = e->page;
return (status);
}
/*
* __REC_FPIPE -- Get fixed length records from a pipe.
*
* Parameters:
* t: tree
* cnt: records to read
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
int
__rec_fpipe(BTREE *t, recno_t top)
{
DBT data;
recno_t nrec;
size_t len;
int ch;
u_char *p;
if (t->bt_rdata.size < t->bt_reclen) {
t->bt_rdata.data = reallocf(t->bt_rdata.data, t->bt_reclen);
if (t->bt_rdata.data == NULL)
return (RET_ERROR);
t->bt_rdata.size = t->bt_reclen;
}
data.data = t->bt_rdata.data;
data.size = t->bt_reclen;
for (nrec = t->bt_nrecs; nrec < top;) {
len = t->bt_reclen;
for (p = t->bt_rdata.data;; *p++ = ch)
if ((ch = getc(t->bt_rfp)) == EOF || !--len) {
if (ch != EOF)
*p = ch;
if (len != 0)
memset(p, t->bt_bval, len);
if (__rec_iput(t,
nrec, &data, 0) != RET_SUCCESS)
return (RET_ERROR);
++nrec;
break;
}
if (ch == EOF)
break;
}
if (nrec < top) {
F_SET(t, R_EOF);
return (RET_SPECIAL);
}
return (RET_SUCCESS);
}
/*
* __REC_VPIPE -- Get variable length records from a pipe.
*
* Parameters:
* t: tree
* cnt: records to read
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
int
__rec_vpipe(BTREE *t, recno_t top)
{
DBT data;
recno_t nrec;
size_t len;
size_t sz;
int bval, ch;
u_char *p;
bval = t->bt_bval;
for (nrec = t->bt_nrecs; nrec < top; ++nrec) {
for (p = t->bt_rdata.data,
sz = t->bt_rdata.size;; *p++ = ch, --sz) {
if ((ch = getc(t->bt_rfp)) == EOF || ch == bval) {
data.data = t->bt_rdata.data;
data.size = p - (u_char *)t->bt_rdata.data;
if (ch == EOF && data.size == 0)
break;
if (__rec_iput(t, nrec, &data, 0)
!= RET_SUCCESS)
return (RET_ERROR);
break;
}
if (sz == 0) {
len = p - (u_char *)t->bt_rdata.data;
t->bt_rdata.size += (sz = 256);
t->bt_rdata.data = reallocf(t->bt_rdata.data, t->bt_rdata.size);
if (t->bt_rdata.data == NULL)
return (RET_ERROR);
p = (u_char *)t->bt_rdata.data + len;
}
}
if (ch == EOF)
break;
}
if (nrec < top) {
F_SET(t, R_EOF);
return (RET_SPECIAL);
}
return (RET_SUCCESS);
}
/*
* __REC_FMAP -- Get fixed length records from a file.
*
* Parameters:
* t: tree
* cnt: records to read
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
int
__rec_fmap(BTREE *t, recno_t top)
{
DBT data;
recno_t nrec;
u_char *sp, *ep, *p;
size_t len;
if (t->bt_rdata.size < t->bt_reclen) {
t->bt_rdata.data = reallocf(t->bt_rdata.data, t->bt_reclen);
if (t->bt_rdata.data == NULL)
return (RET_ERROR);
t->bt_rdata.size = t->bt_reclen;
}
data.data = t->bt_rdata.data;
data.size = t->bt_reclen;
sp = (u_char *)t->bt_cmap;
ep = (u_char *)t->bt_emap;
for (nrec = t->bt_nrecs; nrec < top; ++nrec) {
if (sp >= ep) {
F_SET(t, R_EOF);
return (RET_SPECIAL);
}
len = t->bt_reclen;
for (p = t->bt_rdata.data;
sp < ep && len > 0; *p++ = *sp++, --len);
if (len != 0)
memset(p, t->bt_bval, len);
if (__rec_iput(t, nrec, &data, 0) != RET_SUCCESS)
return (RET_ERROR);
}
t->bt_cmap = (caddr_t)sp;
return (RET_SUCCESS);
}
/*
* __REC_VMAP -- Get variable length records from a file.
*
* Parameters:
* t: tree
* cnt: records to read
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
int
__rec_vmap(BTREE *t, recno_t top)
{
DBT data;
u_char *sp, *ep;
recno_t nrec;
int bval;
sp = (u_char *)t->bt_cmap;
ep = (u_char *)t->bt_emap;
bval = t->bt_bval;
for (nrec = t->bt_nrecs; nrec < top; ++nrec) {
if (sp >= ep) {
F_SET(t, R_EOF);
return (RET_SPECIAL);
}
for (data.data = sp; sp < ep && *sp != bval; ++sp);
data.size = sp - (u_char *)data.data;
if (__rec_iput(t, nrec, &data, 0) != RET_SUCCESS)
return (RET_ERROR);
++sp;
}
t->bt_cmap = (caddr_t)sp;
return (RET_SUCCESS);
}

View File

@ -0,0 +1,240 @@
#include "port_before.h"
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)rec_open.c 8.10 (Berkeley) 9/1/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "namespace.h"
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <stddef.h>
#include <stdio.h>
#include <unistd.h>
#include "un-namespace.h"
#include <db.h>
#include "recno.h"
DB *
__rec_open(const char *fname, int flags, int mode, const RECNOINFO *openinfo,
int dflags)
{
BTREE *t;
BTREEINFO btopeninfo;
DB *dbp;
PAGE *h;
struct stat sb;
int rfd, sverrno;
/* Open the user's file -- if this fails, we're done. */
if (fname != NULL && (rfd = _open(fname, flags, mode)) < 0)
return (NULL);
/* Create a btree in memory (backed by disk). */
dbp = NULL;
if (openinfo) {
if (openinfo->flags & ~(R_FIXEDLEN | R_NOKEY | R_SNAPSHOT))
goto einval;
btopeninfo.flags = 0;
btopeninfo.cachesize = openinfo->cachesize;
btopeninfo.maxkeypage = 0;
btopeninfo.minkeypage = 0;
btopeninfo.psize = openinfo->psize;
btopeninfo.compare = NULL;
btopeninfo.prefix = NULL;
btopeninfo.lorder = openinfo->lorder;
dbp = __bt_open(openinfo->bfname,
O_RDWR, S_IRUSR | S_IWUSR, &btopeninfo, dflags);
} else
dbp = __bt_open(NULL, O_RDWR, S_IRUSR | S_IWUSR, NULL, dflags);
if (dbp == NULL)
goto err;
/*
* Some fields in the tree structure are recno specific. Fill them
* in and make the btree structure look like a recno structure. We
* don't change the bt_ovflsize value, it's close enough and slightly
* bigger.
*/
t = dbp->internal;
if (openinfo) {
if (openinfo->flags & R_FIXEDLEN) {
F_SET(t, R_FIXLEN);
t->bt_reclen = openinfo->reclen;
if (t->bt_reclen == 0)
goto einval;
}
t->bt_bval = openinfo->bval;
} else
t->bt_bval = '\n';
F_SET(t, R_RECNO);
if (fname == NULL)
F_SET(t, R_EOF | R_INMEM);
else
t->bt_rfd = rfd;
if (fname != NULL) {
/*
* In 4.4BSD, stat(2) returns true for ISSOCK on pipes.
* Unfortunately, that's not portable, so we use lseek
* and check the errno values.
*/
errno = 0;
if (lseek(rfd, (off_t)0, SEEK_CUR) == -1 && errno == ESPIPE) {
switch (flags & O_ACCMODE) {
case O_RDONLY:
F_SET(t, R_RDONLY);
break;
default:
goto einval;
}
slow: if ((t->bt_rfp = fdopen(rfd, "r")) == NULL)
goto err;
F_SET(t, R_CLOSEFP);
t->bt_irec =
F_ISSET(t, R_FIXLEN) ? __rec_fpipe : __rec_vpipe;
} else {
switch (flags & O_ACCMODE) {
case O_RDONLY:
F_SET(t, R_RDONLY);
break;
case O_RDWR:
break;
default:
goto einval;
}
if (_fstat(rfd, &sb))
goto err;
/*
* Kluge -- we'd like to test to see if the file is too
* big to mmap. Since, we don't know what size or type
* off_t's or size_t's are, what the largest unsigned
* integral type is, or what random insanity the local
* C compiler will perpetrate, doing the comparison in
* a portable way is flatly impossible. Hope that mmap
* fails if the file is too large.
*/
if (sb.st_size == 0)
F_SET(t, R_EOF);
else {
#ifdef MMAP_NOT_AVAILABLE
/*
* XXX
* Mmap doesn't work correctly on many current
* systems. In particular, it can fail subtly,
* with cache coherency problems. Don't use it
* for now.
*/
t->bt_msize = sb.st_size;
if ((t->bt_smap = mmap(NULL, t->bt_msize,
PROT_READ, MAP_PRIVATE, rfd,
(off_t)0)) == MAP_FAILED)
goto slow;
t->bt_cmap = t->bt_smap;
t->bt_emap = t->bt_smap + sb.st_size;
t->bt_irec = F_ISSET(t, R_FIXLEN) ?
__rec_fmap : __rec_vmap;
F_SET(t, R_MEMMAPPED);
#else
goto slow;
#endif
}
}
}
/* Use the recno routines. */
dbp->close = __rec_close;
dbp->del = __rec_delete;
dbp->fd = __rec_fd;
dbp->get = __rec_get;
dbp->put = __rec_put;
dbp->seq = __rec_seq;
dbp->sync = __rec_sync;
/* If the root page was created, reset the flags. */
if ((h = mpool_get(t->bt_mp, P_ROOT, 0)) == NULL)
goto err;
if ((h->flags & P_TYPE) == P_BLEAF) {
F_CLR(h, P_TYPE);
F_SET(h, P_RLEAF);
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
} else
mpool_put(t->bt_mp, h, 0);
if (openinfo && openinfo->flags & R_SNAPSHOT &&
!F_ISSET(t, R_EOF | R_INMEM) &&
t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR)
goto err;
return (dbp);
einval: errno = EINVAL;
err: sverrno = errno;
if (dbp != NULL)
(void)__bt_close(dbp);
if (fname != NULL)
(void)_close(rfd);
errno = sverrno;
return (NULL);
}
int
__rec_fd(const DB *dbp)
{
BTREE *t;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
/* In-memory database can't have a file descriptor. */
if (F_ISSET(t, R_INMEM)) {
errno = ENOENT;
return (-1);
}
return (t->bt_rfd);
}

View File

@ -0,0 +1,277 @@
#include "port_before.h"
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)rec_put.c 8.7 (Berkeley) 8/18/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <db.h>
#include "recno.h"
/*
* __REC_PUT -- Add a recno item to the tree.
*
* Parameters:
* dbp: pointer to access method
* key: key
* data: data
* flag: R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE
*
* Returns:
* RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key is
* already in the tree and R_NOOVERWRITE specified.
*/
int
__rec_put(const DB *dbp, DBT *key, const DBT *data, u_int flags)
{
BTREE *t;
DBT fdata, tdata;
recno_t nrec;
int status;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
/*
* If using fixed-length records, and the record is long, return
* EINVAL. If it's short, pad it out. Use the record data return
* memory, it's only short-term.
*/
if (F_ISSET(t, R_FIXLEN) && data->size != t->bt_reclen) {
if (data->size > t->bt_reclen)
goto einval;
if (t->bt_rdata.size < t->bt_reclen) {
t->bt_rdata.data =
reallocf(t->bt_rdata.data, t->bt_reclen);
if (t->bt_rdata.data == NULL)
return (RET_ERROR);
t->bt_rdata.size = t->bt_reclen;
}
memmove(t->bt_rdata.data, data->data, data->size);
memset((char *)t->bt_rdata.data + data->size,
t->bt_bval, t->bt_reclen - data->size);
fdata.data = t->bt_rdata.data;
fdata.size = t->bt_reclen;
} else {
fdata.data = data->data;
fdata.size = data->size;
}
switch (flags) {
case R_CURSOR:
if (!F_ISSET(&t->bt_cursor, CURS_INIT))
goto einval;
nrec = t->bt_cursor.rcursor;
break;
case R_SETCURSOR:
if ((nrec = *(recno_t *)key->data) == 0)
goto einval;
break;
case R_IAFTER:
if ((nrec = *(recno_t *)key->data) == 0) {
nrec = 1;
flags = R_IBEFORE;
}
break;
case 0:
case R_IBEFORE:
if ((nrec = *(recno_t *)key->data) == 0)
goto einval;
break;
case R_NOOVERWRITE:
if ((nrec = *(recno_t *)key->data) == 0)
goto einval;
if (nrec <= t->bt_nrecs)
return (RET_SPECIAL);
break;
default:
einval: errno = EINVAL;
return (RET_ERROR);
}
/*
* Make sure that records up to and including the put record are
* already in the database. If skipping records, create empty ones.
*/
if (nrec > t->bt_nrecs) {
if (!F_ISSET(t, R_EOF | R_INMEM) &&
t->bt_irec(t, nrec) == RET_ERROR)
return (RET_ERROR);
if (nrec > t->bt_nrecs + 1) {
if (F_ISSET(t, R_FIXLEN)) {
if ((tdata.data =
(void *)malloc(t->bt_reclen)) == NULL)
return (RET_ERROR);
tdata.size = t->bt_reclen;
memset(tdata.data, t->bt_bval, tdata.size);
} else {
tdata.data = NULL;
tdata.size = 0;
}
while (nrec > t->bt_nrecs + 1)
if (__rec_iput(t,
t->bt_nrecs, &tdata, 0) != RET_SUCCESS)
return (RET_ERROR);
if (F_ISSET(t, R_FIXLEN))
free(tdata.data);
}
}
if ((status = __rec_iput(t, nrec - 1, &fdata, flags)) != RET_SUCCESS)
return (status);
switch (flags) {
case R_IAFTER:
nrec++;
break;
case R_SETCURSOR:
t->bt_cursor.rcursor = nrec;
break;
}
F_SET(t, R_MODIFIED);
return (__rec_ret(t, NULL, nrec, key, NULL));
}
/*
* __REC_IPUT -- Add a recno item to the tree.
*
* Parameters:
* t: tree
* nrec: record number
* data: data
*
* Returns:
* RET_ERROR, RET_SUCCESS
*/
int
__rec_iput(BTREE *t, recno_t nrec, const DBT *data, u_int flags)
{
DBT tdata;
EPG *e;
PAGE *h;
indx_t idx, nxtindex;
pgno_t pg;
u_int32_t nbytes;
int dflags, status;
char *dest, db[NOVFLSIZE];
/*
* If the data won't fit on a page, store it on indirect pages.
*
* XXX
* If the insert fails later on, these pages aren't recovered.
*/
if (data->size > t->bt_ovflsize) {
if (__ovfl_put(t, data, &pg) == RET_ERROR)
return (RET_ERROR);
tdata.data = db;
tdata.size = NOVFLSIZE;
*(pgno_t *)db = pg;
*(u_int32_t *)(db + sizeof(pgno_t)) = data->size;
dflags = P_BIGDATA;
data = &tdata;
} else
dflags = 0;
/* __rec_search pins the returned page. */
if ((e = __rec_search(t, nrec,
nrec > t->bt_nrecs || flags == R_IAFTER || flags == R_IBEFORE ?
SINSERT : SEARCH)) == NULL)
return (RET_ERROR);
h = e->page;
idx = e->index;
/*
* Add the specified key/data pair to the tree. The R_IAFTER and
* R_IBEFORE flags insert the key after/before the specified key.
*
* Pages are split as required.
*/
switch (flags) {
case R_IAFTER:
++idx;
break;
case R_IBEFORE:
break;
default:
if (nrec < t->bt_nrecs &&
__rec_dleaf(t, h, idx) == RET_ERROR) {
mpool_put(t->bt_mp, h, 0);
return (RET_ERROR);
}
break;
}
/*
* If not enough room, split the page. The split code will insert
* the key and data and unpin the current page. If inserting into
* the offset array, shift the pointers up.
*/
nbytes = NRLEAFDBT(data->size);
if ((u_int32_t)(h->upper - h->lower) < nbytes + sizeof(indx_t)) {
status = __bt_split(t, h, NULL, data, dflags, nbytes, idx);
if (status == RET_SUCCESS)
++t->bt_nrecs;
return (status);
}
if (idx < (nxtindex = NEXTINDEX(h)))
memmove(h->linp + idx + 1, h->linp + idx,
(nxtindex - idx) * sizeof(indx_t));
h->lower += sizeof(indx_t);
h->linp[idx] = h->upper -= nbytes;
dest = (char *)h + h->upper;
WR_RLEAF(dest, data, dflags);
++t->bt_nrecs;
F_SET(t, B_MODIFIED);
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
return (RET_SUCCESS);
}

View File

@ -0,0 +1,123 @@
#include "port_before.h"
/*-
* Copyright (c) 1990, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)rec_search.c 8.4 (Berkeley) 7/14/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <errno.h>
#include <stdio.h>
#include <db.h>
#include "recno.h"
/*
* __REC_SEARCH -- Search a btree for a key.
*
* Parameters:
* t: tree to search
* recno: key to find
* op: search operation
*
* Returns:
* EPG for matching record, if any, or the EPG for the location of the
* key, if it were inserted into the tree.
*
* Returns:
* The EPG for matching record, if any, or the EPG for the location
* of the key, if it were inserted into the tree, is entered into
* the bt_cur field of the tree. A pointer to the field is returned.
*/
EPG *
__rec_search(BTREE *t, recno_t recno, enum SRCHOP op)
{
indx_t idx;
PAGE *h;
EPGNO *parent;
RINTERNAL *r;
pgno_t pg;
indx_t top;
recno_t total;
int sverrno;
BT_CLR(t);
for (pg = P_ROOT, total = 0;;) {
if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
goto err;
if (h->flags & P_RLEAF) {
t->bt_cur.page = h;
t->bt_cur.index = recno - total;
return (&t->bt_cur);
}
for (idx = 0, top = NEXTINDEX(h);;) {
r = GETRINTERNAL(h, idx);
if (++idx == top || total + r->nrecs > recno)
break;
total += r->nrecs;
}
BT_PUSH(t, pg, idx - 1);
pg = r->pgno;
switch (op) {
case SDELETE:
--GETRINTERNAL(h, (idx - 1))->nrecs;
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
break;
case SINSERT:
++GETRINTERNAL(h, (idx - 1))->nrecs;
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
break;
case SEARCH:
mpool_put(t->bt_mp, h, 0);
break;
}
}
/* Try and recover the tree. */
err: sverrno = errno;
if (op != SEARCH)
while ((parent = BT_POP(t)) != NULL) {
if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
break;
if (op == SINSERT)
--GETRINTERNAL(h, parent->index)->nrecs;
else
++GETRINTERNAL(h, parent->index)->nrecs;
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
}
errno = sverrno;
return (NULL);
}

View File

@ -0,0 +1,129 @@
#include "port_before.h"
/*-
* Copyright (c) 1991, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
#ifndef lint
/* XXX use __SCCSID */
static char sccsid[] __unused = "@(#)rec_seq.c 8.3 (Berkeley) 7/14/94";
#endif /* not lint */
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <string.h>
#include <db.h>
#include "recno.h"
/*
* __REC_SEQ -- Recno sequential scan interface.
*
* Parameters:
* dbp: pointer to access method
* key: key for positioning and return value
* data: data return value
* flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV.
*
* Returns:
* RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
*/
int
__rec_seq(const DB *dbp, DBT *key, DBT *data, u_int flags)
{
BTREE *t;
EPG *e;
recno_t nrec;
int status;
t = dbp->internal;
/* Toss any page pinned across calls. */
if (t->bt_pinned != NULL) {
mpool_put(t->bt_mp, t->bt_pinned, 0);
t->bt_pinned = NULL;
}
switch(flags) {
case R_CURSOR:
if ((nrec = *(recno_t *)key->data) == 0)
goto einval;
break;
case R_NEXT:
if (F_ISSET(&t->bt_cursor, CURS_INIT)) {
nrec = t->bt_cursor.rcursor + 1;
break;
}
/* FALLTHROUGH */
case R_FIRST:
nrec = 1;
break;
case R_PREV:
if (F_ISSET(&t->bt_cursor, CURS_INIT)) {
if ((nrec = t->bt_cursor.rcursor - 1) == 0)
return (RET_SPECIAL);
break;
}
/* FALLTHROUGH */
case R_LAST:
if (!F_ISSET(t, R_EOF | R_INMEM) &&
t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR)
return (RET_ERROR);
nrec = t->bt_nrecs;
break;
default:
einval: errno = EINVAL;
return (RET_ERROR);
}
if (t->bt_nrecs == 0 || nrec > t->bt_nrecs) {
if (!F_ISSET(t, R_EOF | R_INMEM) &&
(status = t->bt_irec(t, nrec)) != RET_SUCCESS)
return (status);
if (t->bt_nrecs == 0 || nrec > t->bt_nrecs)
return (RET_SPECIAL);
}
if ((e = __rec_search(t, nrec - 1, SEARCH)) == NULL)
return (RET_ERROR);
F_SET(&t->bt_cursor, CURS_INIT);
t->bt_cursor.rcursor = nrec;
status = __rec_ret(t, e, nrec, key, data);
if (F_ISSET(t, B_DB_LOCK))
mpool_put(t->bt_mp, e->page, 0);
else
t->bt_pinned = e->page;
return (status);
}

View File

@ -0,0 +1,114 @@
#include "port_before.h"
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)rec_utils.c 8.6 (Berkeley) 7/16/94";
#endif /* LIBC_SCCS and not lint */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <db.h>
#include "recno.h"
/*
* __rec_ret --
* Build return data.
*
* Parameters:
* t: tree
* e: key/data pair to be returned
* nrec: record number
* key: user's key structure
* data: user's data structure
*
* Returns:
* RET_SUCCESS, RET_ERROR.
*/
int
__rec_ret(BTREE *t, EPG *e, recno_t nrec, DBT *key, DBT *data)
{
RLEAF *rl;
void *p;
if (key == NULL)
goto dataonly;
/* We have to copy the key, it's not on the page. */
if (sizeof(recno_t) > t->bt_rkey.size) {
p = realloc(t->bt_rkey.data, sizeof(recno_t));
if (p == NULL)
return (RET_ERROR);
t->bt_rkey.data = p;
t->bt_rkey.size = sizeof(recno_t);
}
memmove(t->bt_rkey.data, &nrec, sizeof(recno_t));
key->size = sizeof(recno_t);
key->data = t->bt_rkey.data;
dataonly:
if (data == NULL)
return (RET_SUCCESS);
/*
* We must copy big keys/data to make them contigous. Otherwise,
* leave the page pinned and don't copy unless the user specified
* concurrent access.
*/
rl = GETRLEAF(e->page, e->index);
if (rl->flags & P_BIGDATA) {
if (__ovfl_get(t, rl->bytes,
&data->size, &t->bt_rdata.data, &t->bt_rdata.size))
return (RET_ERROR);
data->data = t->bt_rdata.data;
} else if (F_ISSET(t, B_DB_LOCK)) {
/* Use +1 in case the first record retrieved is 0 length. */
if (rl->dsize + 1 > t->bt_rdata.size) {
p = realloc(t->bt_rdata.data, rl->dsize + 1);
if (p == NULL)
return (RET_ERROR);
t->bt_rdata.data = p;
t->bt_rdata.size = rl->dsize + 1;
}
memmove(t->bt_rdata.data, rl->bytes, rl->dsize);
data->size = rl->dsize;
data->data = t->bt_rdata.data;
} else {
data->size = rl->dsize;
data->data = rl->bytes;
}
return (RET_SUCCESS);
}

View File

@ -0,0 +1,36 @@
/*-
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)recno.h 8.1 (Berkeley) 6/4/93
* $FreeBSD$
*/
enum SRCHOP { SDELETE, SINSERT, SEARCH}; /* Rec_search operation. */
#include "../btree/btree.h"
#include "extern.h"