//-< BTREE.CPP >-----------------------------------------------------*--------*
// GigaBASE                  Version 1.0         (c) 1999  GARRET    *     ?  *
// (Post Relational Database Management System)                      *   /\|  *
//                                                                   *  /  \  *
//                          Created:      1-Jan-99    K.A. Knizhnik  * / [] \ *
//                          Last update:  7-Jan-99    K.A. Knizhnik  * GARRET *
//-------------------------------------------------------------------*--------*
// B-Tree implementation
//-------------------------------------------------------------------*--------*

#include "gigabase.h"
#include "btree.h"

const pkey_t maxPkey = (1UL << (sizeof(pkey_t)*8-1))-1;

static const keySize[] = {
    sizeof(bool),  // tpBool
    sizeof(int1),  // tpInt1
    sizeof(int2),  // tpInt2
    sizeof(int4),  // tpInt4
    sizeof(int8),  // tpInt8
    sizeof(real4), // tpReal4 
    sizeof(real8)  // tpReal8
};

inline int keycmp(void* p, void* q, int type)
{
    switch (type) {
      case dbField::tpBool:
	return *(bool*)p - *(bool*)q;
      case dbField::tpInt1:
	return *(int1*)p - *(int1*)q;
      case dbField::tpInt2:
	return *(int2*)p - *(int2*)q;
      case dbField::tpInt4:
	return *(int4*)p - *(int4*)q;
      case dbField::tpInt8:
        return *(int8*)p < *(int8*)q ? -1 : *(int8*)p == *(int8*)q ? 0 : 1;
      case dbField::tpReal4:
        return *(real4*)p < *(real4*)q ? -1 : *(real4*)p == *(real4*)q ? 0 : 1;
      case dbField::tpReal8:
        return *(real8*)p < *(real8*)q ? -1 : *(real8*)p == *(real8*)q ? 0 : 1;
      case dbField::tpString:
	return strcmp((char*)p, (char*)q);
      default:
	assert(false);
	return 0;
    }
}

inline pkey_t packStrKey(void* str) 
{
#ifdef USE_LOCALE_SETTINGS
    char buf[sizeof(pkey_t)];
    strxfrm(buf, (char*)str, sizeof buf);
    str = buf;
#endif
    byte* p = (byte*)str;
    pkey_t pkey = 0;
    for (size_t i = 0; i < sizeof(pkey_t) && p[i] != 0; i++) { 
        pkey |= pkey_t(p[i]) << ((sizeof(pkey_t)-i-1)*8);
    }
    // As far as signed comparison is used for packed key, 
    // and strcmp compares characters as unsign, we should make this 
    // correction
    return pkey - (pkey_t(-1) << (sizeof(pkey_t)*8-1));
}

inline pkey_t packStrFrmKey(void* str) 
{
    byte* p = (byte*)str;
    pkey_t pkey = 0;
    for (size_t i = 0; i < sizeof(pkey_t) && p[i] != 0; i++) { 
        pkey |= pkey_t(p[i]) << ((sizeof(pkey_t)-i-1)*8);
    }
    // As far as signed comparison is used for packed key, 
    // and strcmp compares characters as unsign, we should make this 
    // correction
    return pkey - (pkey_t(-1) << (sizeof(pkey_t)*8-1));
}

inline pkey_t packKey(void* p, int type)
{
    switch (type) {
      case dbField::tpBool:
	return *(bool*)p;
      case dbField::tpInt1:
	return *(int1*)p;
      case dbField::tpInt2:
	return *(int2*)p;
      case dbField::tpInt4:
	return *(int4*)p;
      case dbField::tpInt8:
        return sizeof(pkey_t) < sizeof(int8) ? 
	    pkey_t(*(int8*)p >> 32) : pkey_t(*(int8*)p);
      case dbField::tpReal4:
	return *(int4*)p;
      case dbField::tpReal8:
        return sizeof(pkey_t) < sizeof(int8) ?
  	    pkey_t(*(int8*)p >> 32) : pkey_t(*(int8*)p);
      case dbField::tpString:
	return packStrKey(p);
      default:
	assert(false);
	return 0;
    }  
}


void dbBtree::find(dbDatabase* db, oid_t treeId, dbSearchContext& sc)
{
    dbBtree* tree = (dbBtree*)db->get(treeId);
    oid_t rootId = tree->root;
    int   height = tree->height;
    if (rootId != 0) { 
        if (height == 1) { 
	    dbBtreeLeafPage* page = (dbBtreeLeafPage*)db->get(rootId);
	    page->find(db, sc);
	    db->pool.unfix(page);
	} else { 
	    if (sc.firstKey != NULL) { 
	        sc.firstPKey = packKey(sc.firstKey, sc.type);
	    }
	    if (sc.lastKey != NULL) { 
	        sc.lastPKey = packKey(sc.lastKey, sc.type);
	    }
	    dbBtreePage* page = (dbBtreePage*)db->get(rootId);
	    page->find(db, sc, height);
	    db->pool.unfix(page);
	}
    }
    db->pool.unfix(tree);
}

oid_t dbBtree::allocate(dbDatabase* db)
{
    oid_t oid = db->allocateId();
    db->setPos(oid, db->allocate(sizeof(dbBtree)) | dbModifiedFlag);
    dbBtree* tree = (dbBtree*)db->put(oid);
    tree->size = sizeof(dbBtree);
    tree->root = 0;
    tree->height = 0;
    db->pool.unfix(tree);
    return oid;
}

void dbBtree::insert(dbDatabase* db, oid_t treeId, oid_t recordId, 
		     int type, int offs)
{
    dbBtree* tree = (dbBtree*)db->get(treeId);
    oid_t rootId = tree->root;
    int   height = tree->height;
    if (rootId == 0) {
	dbPutTie tie;
        dbBtree* t = (dbBtree*)db->putRow(tie, treeId);
	t->root = dbBtreeLeafPage::allocate(db, recordId, type, offs);
	t->height = 1;
    } else { 
	dbBtreePage::item ins;
	int result;
	dbGetTie tie;
	byte* p = (byte*)db->getRow(tie, recordId);
	ins.oid = recordId;
	if (height == 1) { 
	    result = dbBtreeLeafPage::insert(db, rootId, rootId, 
					     type, offs, p, ins);
	} else { 
	    if (type == dbField::tpString) { 
		ins.pkey = packStrKey(p + ((dbVarying*)(p + offs))->offs);
	    } else { 
	        ins.pkey = packKey(p + offs, type);
	    }
	    result = dbBtreePage::insert(db, rootId, rootId, 
					 type, offs, p, ins, height);
	}
	assert(result != not_found);
	if (result == dbBtree::overflow) { 
	    dbPutTie tie;
	    dbBtree* t = (dbBtree*)db->putRow(tie, treeId);
	    t->root = dbBtreePage::allocate(db, rootId, ins);
	    t->height += 1;
	}
    }
    db->pool.unfix(tree);
}


void dbBtree::remove(dbDatabase* db, oid_t treeId, oid_t recordId, 
		     int type, int offs)
{
    dbBtree* tree = (dbBtree*)db->get(treeId);
    dbBtreePage::item rem;
    dbGetTie tie;
    oid_t rootId = tree->root;
    int   height = tree->height;
    rem.oid = recordId;
    byte* p = (byte*)db->getRow(tie, recordId);
    if (height == 1) { 
	int result = dbBtreeLeafPage::remove(db, rootId, rootId, 
					     type, offs, p, rem);
	assert(result != not_found);
	if (result == underflow) { 
	    dbBtreeLeafPage* page = (dbBtreeLeafPage*)db->get(rootId);
	    if (page->nItems == 0) { 	
		dbPutTie tie;
		dbBtree* t = (dbBtree*)db->putRow(tie, treeId);
		t->root = 0;
		t->height = 0;
		db->freePage(rootId);
	    }
	    db->pool.unfix(page);
	} 
    } else { 
	if (type == dbField::tpString) { 
	    rem.pkey = packStrKey(p + ((dbVarying*)(p + offs))->offs);
	} else { 
	    rem.pkey = packKey(p + offs, type);
	}
	int result = dbBtreePage::remove(db, rootId, rootId, type, offs, p, 
					 rem, height);
	assert(result != not_found);
	if (result == underflow) { 
	    dbBtreePage* page = (dbBtreePage*)db->get(rootId);
	    if (page->m == dbBtreePage::maxItems-1) { 	
		dbPutTie tie;
		dbBtree* t = (dbBtree*)db->putRow(tie, treeId);
		t->root = page->e[dbBtreePage::maxItems-1].oid;
		t->height -= 1;
		db->freePage(rootId);
	    }
	    db->pool.unfix(page);
	}
    }
    db->pool.unfix(tree);
}

void dbBtree::purge(dbDatabase* db, oid_t treeId) 
{
    dbPutTie tie;
    dbBtree* tree = (dbBtree*)db->putRow(tie, treeId);
    if (tree->root != 0) { 
        if (tree->height == 1) {
	    db->freePage(tree->root);
	} else {
	    dbBtreePage::purge(db, tree->root, tree->height);
	}
	tree->root = 0;
	tree->height = 0;
    }
}
    
void dbBtree::drop(dbDatabase* db, oid_t treeId) 
{
    purge(db, treeId);
    db->free(db->getPos(treeId) & ~dbFlagsMask, sizeof(dbBtree));
    db->freeId(treeId);
}
    
void dbBtree::traverseForward(dbDatabase* db, oid_t treeId,
			      dbAnyCursor* cursor, dbExprNode* condition)
{
    dbBtree* tree = (dbBtree*)db->get(treeId);
    if (tree->root != 0) { 
        if (tree->height == 1) { 
	    dbBtreeLeafPage* page = (dbBtreeLeafPage*)db->get(tree->root);
	    page->traverseForward(db, cursor, condition);
	    db->pool.unfix(page);
	} else { 
	    dbBtreePage* page = (dbBtreePage*)db->get(tree->root);
	    page->traverseForward(db, cursor, condition, tree->height);
	    db->pool.unfix(page);
	}
    }
    db->pool.unfix(tree);
}


void dbBtree::traverseBackward(dbDatabase* db, oid_t treeId,
			       dbAnyCursor* cursor, dbExprNode* condition)
{
    dbBtree* tree = (dbBtree*)db->get(treeId);
    if (tree->root != 0) { 
        if (tree->height == 1) { 
	    dbBtreeLeafPage* page = (dbBtreeLeafPage*)db->get(tree->root);
	    page->traverseBackward(db, cursor, condition);
	    db->pool.unfix(page);
	} else { 
	    dbBtreePage* page = (dbBtreePage*)db->get(tree->root);
	    page->traverseBackward(db, cursor, condition, tree->height);
	    db->pool.unfix(page);
	}
    }
    db->pool.unfix(tree);
}




bool dbBtreePage::find(dbDatabase* db, dbSearchContext& sc, int height)
{
    sc.probes += 1;
    int l = m, r = maxItems-1;
    if (sc.firstKey != NULL) { 
        while (l < r)  {
	    int i = (l+r) >> 1;
	    if (sc.firstPKey > e[i].pkey) l = i+1; else r = i;
	}
        assert(r == l && e[r].pkey >= sc.firstPKey); 
    }
    if (--height == 1) { 
        if (sc.lastKey == NULL) { 
	    while (l < maxItems) { 
	        dbBtreeLeafPage* page = (dbBtreeLeafPage*)db->get(e[l].oid);
		if (!page->find(db, sc)) { 
		    db->pool.unfix(page);
		    return false;
		}
		db->pool.unfix(page);
		l += 1;
	    }
	} else { 
	    while (l < maxItems) { 
		dbBtreeLeafPage* page = (dbBtreeLeafPage*)db->get(e[l].oid);
		if (!page->find(db, sc)) { 
		    db->pool.unfix(page);
		    return false;
		}
		db->pool.unfix(page);
	        if (sc.lastPKey < e[l].pkey) { 
		    return false;
		}
		l += 1;
	    }
	}
    } else { 
        if (sc.lastKey == NULL) { 
	    while (l < maxItems) { 
	        dbBtreePage* page = (dbBtreePage*)db->get(e[l].oid);
		if (!page->find(db, sc, height)) { 
		    db->pool.unfix(page);
		    return false;
		}
		db->pool.unfix(page);
		l += 1;
	    }
	} else { 
	    while (l < maxItems) { 
		dbBtreePage* page = (dbBtreePage*)db->get(e[l].oid);
		if (!page->find(db, sc, height)) { 
		    db->pool.unfix(page);
		    return false;
		}
		db->pool.unfix(page);
	        if (sc.lastPKey < e[l].pkey) { 
		    return false;
		}
		l += 1;
	    }
	}
    }
    return true;
}


bool dbBtreeLeafPage::find(dbDatabase* db, dbSearchContext& sc)
{
    int l = 0, n = nItems, r = n;
    dbTableDescriptor* table = &sc.cursor->table;
    switch (sc.type) { 
      case dbField::tpBool:
      case dbField::tpInt1:
	if (sc.firstKey != NULL) { 
	    int1 key = *(int1*)sc.firstKey;
	    while (l < r)  {
		int i = (l+r) >> 1;
		if (key - keyInt1[i] >= sc.firstKeyInclusion) {
		    l = i+1; 
		} else { 
		    r = i;
		}
	    }
	    assert(r == l); 
	}
	if (sc.lastKey != NULL) { 
	    int1 key = *(int1*)sc.lastKey;
	    while (l < n) { 
		if (keyInt1[l] - key >= sc.lastKeyInclusion) { 
		    return false;
		}
		if (!sc.condition 
		    || db->evaluate(sc.condition, record[maxItems-1-l], table))
		{
		    if (!sc.cursor->add(record[maxItems-1-l])) { 
			return false;
		    }
		}
		l += 1;
	    }
	    return true;
	}  
	break;
      case dbField::tpInt2:
	if (sc.firstKey != NULL) { 
	    int2 key = *(int2*)sc.firstKey;
	    while (l < r)  {
		int i = (l+r) >> 1;
		if (key - keyInt2[i] >= sc.firstKeyInclusion) {
		    l = i+1; 
		} else { 
		    r = i;
		}
	    }
	    assert(r == l); 
	}
	if (sc.lastKey != NULL) { 
	    int2 key = *(int2*)sc.lastKey;
	    while (l < n) { 
		if (keyInt2[l] - key >= sc.lastKeyInclusion) { 
		    return false;
		}
		if (!sc.condition 
		    || db->evaluate(sc.condition, record[maxItems-1-l], table))
		{
		    if (!sc.cursor->add(record[maxItems-1-l])) { 
			return false;
		    }
		}
		l += 1;
	    }
	    return true;
	} 
	break;	
      case dbField::tpInt4:
	if (sc.firstKey != NULL) { 
	    int4 key = *(int4*)sc.firstKey;
	    while (l < r)  {
		int i = (l+r) >> 1;
		if (key - keyInt4[i] >= sc.firstKeyInclusion) {
		    l = i+1; 
		} else { 
		    r = i;
		}
	    }
	    assert(r == l); 
	}
	if (sc.lastKey != NULL) { 
	    int4 key = *(int4*)sc.lastKey;
	    while (l < n) { 
		if (keyInt4[l] - key >= sc.lastKeyInclusion) { 
		    return false;
		}
		if (!sc.condition 
		    || db->evaluate(sc.condition, record[maxItems-1-l], table))
		{
		    if (!sc.cursor->add(record[maxItems-1-l])) { 
			return false;
		    }
		}
		l += 1;
	    }
	    return true;
	} 
	break;
      case dbField::tpInt8:
	if (sc.firstKey != NULL) { 
	    int8 key = *(int8*)sc.firstKey;
	    while (l < r)  {
		int i = (l+r) >> 1;
		if (key - keyInt8[i] >= sc.firstKeyInclusion) {
		    l = i+1; 
		} else { 
		    r = i;
		}
	    }
	    assert(r == l); 
	}
	if (sc.lastKey != NULL) { 
	    int8 key = *(int8*)sc.lastKey;
	    while (l < n) { 
		if (keyInt8[l] - key >= sc.lastKeyInclusion) { 
		    return false;
		}
		if (!sc.condition 
		    || db->evaluate(sc.condition, record[maxItems-1-l], table))
		{
		    if (!sc.cursor->add(record[maxItems-1-l])) { 
			return false;
		    }
		}
		l += 1;
	    }
	    return true;
	} 
	break;
      case dbField::tpReal4:
	if (sc.firstKey != NULL) { 
	    real4 key = *(real4*)sc.firstKey;
	    while (l < r)  {
		int i = (l+r) >> 1;
		if (key > keyReal4[i]
		    || (key == keyReal4[i] && !sc.firstKeyInclusion)) 
		{
		    l = i+1; 
		} else { 
		    r = i;
		}
	    }
	    assert(r == l); 
	}
	if (sc.lastKey != NULL) { 
	    real4 key = *(real4*)sc.lastKey;
	    while (l < n) { 
		if (keyReal4[l] > key 
		    || (keyReal4[l] == key && !sc.lastKeyInclusion))
		{ 
		    return false;
		}
		if (!sc.condition 
		    || db->evaluate(sc.condition, record[maxItems-1-l], table))
		{
		    if (!sc.cursor->add(record[maxItems-1-l])) { 
			return false;
		    }
		}
		l += 1;
	    }
	    return true;
	}  
	break;
      case dbField::tpReal8:
	if (sc.firstKey != NULL) { 
	    real8 key = *(real8*)sc.firstKey;
	    while (l < r)  {
		int i = (l+r) >> 1;
		if (key > keyReal8[i]
		    || (key == keyReal8[i] && !sc.firstKeyInclusion)) 
		{
		    l = i+1; 
		} else { 
		    r = i;
		}
	    }
	    assert(r == l); 
	}
	if (sc.lastKey != NULL) { 
	    real8 key = *(real8*)sc.lastKey;
	    while (l < n) { 
		if (keyReal8[l] > key 
		    || (keyReal8[l] == key && !sc.lastKeyInclusion))
		{ 
		    return false;
		}
		if (!sc.condition 
		    || db->evaluate(sc.condition, record[maxItems-1-l], table)) 
		{
		    if (!sc.cursor->add(record[maxItems-1-l])) { 
			return false;
		    }
		}
		l += 1;
	    }
	    return true;
	}  
	break;
      case dbField::tpString:
	if (sc.firstKey != NULL) { 
	    char* firstKey = sc.firstKey;
#ifdef USE_LOCALE_SETTINGS
	    char buf[dbMaxKeyLen];
	    strxfrm(buf, firstKey, sizeof buf);
	    firstKey = buf;
#endif
	    while (l < r)  {
		int i = (l+r) >> 1;
		if (strcmp(firstKey, &keyChar[keyStr[i].offs])
		    >= sc.firstKeyInclusion) 
		{
		    l = i + 1; 
		} else { 
		    r = i;
		}
	    }
	    assert(r == l); 
	}
	if (sc.lastKey != NULL) { 
	    char* lastKey = sc.lastKey;
#ifdef USE_LOCALE_SETTINGS
	    char buf[dbMaxKeyLen];
	    strxfrm(buf, lastKey, sizeof buf);
	    lastKey = buf;
#endif
	    while (l < n) { 
		if (strcmp(&keyChar[keyStr[l].offs], 
			   (char*)lastKey) >= sc.lastKeyInclusion) 
		{ 
		    return false;
		}
		if (!sc.condition 
		    || db->evaluate(sc.condition, keyStr[l].oid, table))
		{
		    if (!sc.cursor->add(keyStr[l].oid)) { 
			return false;
		    }
		}
		l += 1;
	    }
	} else { 
	    if (sc.condition) { 
	        while (l < n) { 
		    if (db->evaluate(sc.condition, keyStr[l].oid, table)) {
		        if (!sc.cursor->add(keyStr[l].oid)) { 
			    return false;
			}
		    }
		    l += 1;
		}
	    } else { 
	        while (l < n) { 
		    if (!sc.cursor->add(keyStr[l].oid)) { 
		        return false;
		    }
		    l += 1;
		}
	    }
	}
	return true;
      default:
	assert(false);
    }
    if (sc.condition) { 
	while (l < n) { 
	    if (db->evaluate(sc.condition, record[maxItems-1-l], table)) {
		if (!sc.cursor->add(record[maxItems-1-l])) { 
		    return false;
		}
	    }
	    l += 1;
	}
    } else { 
	while (l < n) { 
	    if (!sc.cursor->add(record[maxItems-1-l])) { 
		return false;
	    }
	    l += 1;
	}
    }
    return true;
}


oid_t dbBtreePage::allocate(dbDatabase* db, oid_t root, item& ins)
{
    oid_t pageId = db->allocatePage();
    dbBtreePage* page = (dbBtreePage*)db->put(pageId);
    page->m = maxItems-2;
    page->e[maxItems-2] = ins;
    page->e[maxItems-1].pkey = maxPkey;
    page->e[maxItems-1].oid = root;
    db->pool.unfix(page);
    return pageId;
}


oid_t dbBtreeLeafPage::allocate(dbDatabase* db, oid_t recordId, 
				int type, int offs)
{
    oid_t pageId = db->allocatePage();
    dbBtreeLeafPage* page = (dbBtreeLeafPage*)db->put(pageId);
    dbGetTie tie;
    byte* p = (byte*)db->getRow(tie, recordId);
    page->nItems = 1;
    page->size = 0;
    switch (type) { 
      case dbField::tpBool:
      case dbField::tpInt1:
	page->keyInt1[0] = *(int1*)(p+offs);
	page->record[maxItems-1] = recordId;
	break;
      case dbField::tpInt2:
	page->keyInt2[0] = *(int2*)(p+offs);
	page->record[maxItems-1] = recordId;
	break;
      case dbField::tpInt4:
	page->keyInt4[0] = *(int4*)(p+offs);
	page->record[maxItems-1] = recordId;
	break;
      case dbField::tpInt8:
	page->keyInt8[0] = *(int8*)(p+offs);
	page->record[maxItems-1] = recordId;
	break;
      case dbField::tpReal4:
	page->keyReal4[0] = *(real4*)(p+offs);
	page->record[maxItems-1] = recordId;
	break;
      case dbField::tpReal8:
	page->keyReal8[0] = *(real8*)(p+offs);
	page->record[maxItems-1] = recordId;
	break;
      case dbField::tpString:
      { 
	size_t len = ((dbVarying*)(p+offs))->size;
	assert(((void)"Strings fits in the page",
		len <= sizeof(page->keyChar) - sizeof(str)));
	char* key = (char*)p + ((dbVarying*)(p+offs))->offs;
#ifdef USE_LOCALE_SETTINGS
	char buf[dbMaxKeyLen];
	len = strxfrm(buf, key, sizeof buf) + 1;
	key = buf;
#endif
	page->size = len;
	page->keyStr[0].offs = sizeof(page->keyChar) - len;
	page->keyStr[0].size = len;
	page->keyStr[0].oid = recordId;
	strcpy(&page->keyChar[page->keyStr[0].offs], key); 
      }
    }
    db->pool.unfix(page);
    return pageId;
}


int dbBtreePage::insert(dbDatabase* db, oid_t pageId, oid_t lastPageId, 
			int type, int offs, byte* p, item& ins, int height)
{
    dbBtreePage* pg = (dbBtreePage*)db->get(pageId);
    int i, m = pg->m, l = m, r = maxItems-1;
    int result;
    pkey_t pkey = ins.pkey;
    while (l < r)  {
	i = (l+r) >> 1;
	if (pkey > pg->e[i].pkey) l = i+1; else r = i;
    }
    assert(r == l && pg->e[r].pkey >= pkey); 
    if (pageId == lastPageId) {
	lastPageId = pg->e[maxItems-1].oid;
    }
    if (--height == 1) { 
	dbPutTie tie;
	do { 
	    result = dbBtreeLeafPage::insert(db, pg->e[r].oid, lastPageId, 
					     type, offs, p, ins);
	} while (result == dbBtree::not_found && ++r < maxItems);
    } else { 
	do { 
	    result = insert(db, pg->e[r].oid, lastPageId, 
			    type, offs, p, ins, height);
	} while (result == dbBtree::not_found && ++r < maxItems);
    }
    db->pool.unfix(pg);
    if (result != dbBtree::overflow) { 
	return result;
    }

    dbPutTie tie;
    pg = (dbBtreePage*)db->put(tie, pageId); 
    
    // insert before e[r]
    if (m > 0) {
	memmove(&pg->e[m-1], &pg->e[m], (r - m)*sizeof(item));
	pg->m -= 1;
	pg->e[r-1] = ins;
	return dbBtree::done;
    } else { // page is full then divide page
        oid_t childId = db->allocatePage();
	dbBtreePage* b = (dbBtreePage*)db->put(childId);
	const int n = maxItems/2;
	if (r < n) {
	    memcpy(&b->e[maxItems-n], pg->e, r*sizeof(item));
	    b->e[maxItems-n+r] = ins;
	    memcpy(&b->e[maxItems-n+r+1], &pg->e[r], (n-r-1)*sizeof(item));
	} else {
	    memcpy(&b->e[maxItems-n], pg->e, n*sizeof(item));
	    memmove(&pg->e[n-1], &pg->e[n], (r-n)*sizeof(item));
	    pg->e[r-1] = ins;
	}
	ins.pkey = b->e[maxItems-1].pkey;
	ins.oid = childId;
	pg->m = n - 1;
	b->m = maxItems - n;
	db->pool.unfix(b);
	return dbBtree::overflow;
    }
}


#define INSERT(KEY, TYPE, PKEY, SHIFT) {                                    \
    TYPE key = *(TYPE*)(p+offs);                                            \
    while (l < r)  {                                                        \
	int i = (l+r) >> 1;                                                 \
	if (key > pg->KEY[i]) l = i+1; else r = i;                          \
    }                                                                       \
    /* insert before e[r] */                                                \
    db->pool.unfix(pg);                                                     \
    if (l == n && pageId != lastPageId) {                                   \
	return dbBtree::not_found;                                          \
    }                                                                       \
    pg = (dbBtreeLeafPage*)db->put(tie, pageId);                            \
    const int max = sizeof(pg->KEY) / (sizeof(oid_t) + sizeof(TYPE));       \
    if (n < max) {                                                          \
	memmove(&pg->KEY[r+1], &pg->KEY[r], (n - r)*sizeof(TYPE));          \
	memcpy(&pg->record[maxItems-n-1], &pg->record[maxItems-n],          \
	       (n-r)*sizeof(oid_t));                                        \
	pg->KEY[r] = key;                                                   \
	pg->record[maxItems-r-1] = ins.oid;                                 \
	pg->nItems += 1;                                                    \
	return dbBtree::done;                                               \
    } else { /* page is full then divide page */                            \
	oid_t pageId = db->allocatePage();                                  \
	dbBtreeLeafPage* b = (dbBtreeLeafPage*)db->put(pageId);             \
        assert(n == max);                                                   \
	const int m = max/2;                                                \
	if (r < m) {                                                        \
	    memcpy(b->KEY, pg->KEY, r*sizeof(TYPE));                        \
	    b->KEY[r] = key;                                                \
	    memcpy(&b->KEY[r+1], &pg->KEY[r], (m-r-1)*sizeof(TYPE));        \
	    memcpy(pg->KEY, &pg->KEY[m-1], (max-m+1)*sizeof(TYPE));         \
	    memcpy(&b->record[maxItems-r], &pg->record[maxItems-r],         \
		   r*sizeof(oid_t));                                        \
	    b->record[maxItems-r-1] = ins.oid;                              \
	    memcpy(&b->record[maxItems-m], &pg->record[maxItems-m+1],       \
		   (m-r-1)*sizeof(oid_t));                                  \
            memmove(&pg->record[maxItems-max+m-1],&pg->record[maxItems-max],\
                    (max-m+1)*sizeof(oid_t));                               \
	} else {                                                            \
	    memcpy(b->KEY, pg->KEY, m*sizeof(TYPE));                        \
	    memcpy(pg->KEY, &pg->KEY[m], (r-m)*sizeof(TYPE));               \
	    pg->KEY[r-m] = key;                                             \
	    memcpy(&pg->KEY[r-m+1], &pg->KEY[r], (max-r)*sizeof(TYPE));     \
	    memcpy(&b->record[maxItems-m], &pg->record[maxItems-m],         \
		   m*sizeof(oid_t));                                        \
	    memmove(&pg->record[maxItems-r+m], &pg->record[maxItems-r],     \
		    (r-m)*sizeof(oid_t));                                   \
	    pg->record[maxItems-r+m-1] = ins.oid;                           \
    	    memmove(&pg->record[maxItems-max+m-1],&pg->record[maxItems-max],\
		    (max-r)*sizeof(oid_t));                                 \
	}                                                                   \
	ins.pkey = pkey_t(*(PKEY*)(b->KEY + m - 1) >> SHIFT);               \
	ins.oid = pageId;                                                   \
	pg->nItems = max - m + 1;                                           \
	b->nItems = m;                                                      \
        b->size = 0;                                                        \
	db->pool.unfix(b);                                                  \
	return dbBtree::overflow;                                           \
    }                                                                       \
}


int dbBtreeLeafPage::insert(dbDatabase* db, oid_t pageId, oid_t lastPageId, 
			    int type, int offs, byte* p, 
			    dbBtreePage::item& ins)
{
    dbPutTie tie;
    dbBtreeLeafPage* pg = (dbBtreeLeafPage*)db->get(pageId);
    int l = 0, n = pg->nItems, r = n;
    switch (type) { 
      case dbField::tpBool:
      case dbField::tpInt1:
	INSERT(keyInt1, int1, int1, 0);
      case dbField::tpInt2:
	INSERT(keyInt2, int2, int2, 0);
      case dbField::tpInt4:
	INSERT(keyInt4, int4, int4, 0);
      case dbField::tpInt8:
	INSERT(keyInt8, int8, int8, 32);
      case dbField::tpReal4:
	INSERT(keyReal4, real4, int4, 0);
      case dbField::tpReal8:
	INSERT(keyReal8, real8, int8, 32);
      case dbField::tpString:
      {
	size_t len = ((dbVarying*)(p+offs))->size;
	char* key = (char*)p + ((dbVarying*)(p+offs))->offs;
#ifdef USE_LOCALE_SETTINGS
	char buf[dbMaxKeyLen];
	len = strxfrm(buf, key, sizeof buf) + 1;
	key = buf;
#endif
	while (l < r)  {
	    int i = (l+r) >> 1;
	    if (strcmp(key, &pg->keyChar[pg->keyStr[i].offs]) > 0) { 
		l = i+1; 
	    } else { 
		r = i;
	    }
	}
	db->pool.unfix(pg);
	if (l == n && pageId != lastPageId) {
	    return dbBtree::not_found;
	}
	pg = (dbBtreeLeafPage*)db->put(tie, pageId);
	// insert before e[r]
	if (pg->size + len + (n+1)*sizeof(str) <= sizeof(pg->keyChar)) { 
	    memmove(&pg->keyStr[r+1], &pg->keyStr[r], (n-r)*sizeof(str));
	    pg->size += len;
	    pg->keyStr[r].offs = sizeof(pg->keyChar) - pg->size;
	    pg->keyStr[r].size = len;
	    pg->keyStr[r].oid = ins.oid;
	    memcpy(&pg->keyChar[sizeof(pg->keyChar) - pg->size], key, len);
	    pg->nItems += 1;
	    return dbBtree::done;
	} else { // page is full then divide page
	    oid_t pageId = db->allocatePage();
	    dbBtreeLeafPage* b = (dbBtreeLeafPage*)db->put(pageId);
	    size_t moved = 0;
	    size_t inserted = len;
	    for (int bn = 0, i = 0; ; bn += 1) { 
		size_t keyLen;
		if (bn == r) { 
		    keyLen = len;
		    inserted = 0;
		} else { 
		    keyLen =  pg->keyStr[i].size;
		}
		if (moved != 0 
		    && moved + keyLen > pg->size - keyLen + inserted) 
		{ 
		    pg->compactify(i);
		    if (bn <= r) { 
			memmove(&pg->keyStr[r-i+1], &pg->keyStr[r-i], 
				(n - r)*sizeof(str));
			pg->size += len;
			pg->nItems += 1;
			assert(((void)"String fits in the B-Tree page", 
				pg->size + pg->nItems*sizeof(str) 
				<= sizeof(pg->keyChar)));
			pg->keyStr[r-i].offs = sizeof(pg->keyChar) - pg->size;
			pg->keyStr[r-i].size = len;
			pg->keyStr[r-i].oid = ins.oid;
			memcpy(&pg->keyChar[pg->keyStr[r-i].offs], key, len);
		    }
		    ins.pkey= packStrFrmKey(&b->keyChar[b->keyStr[bn-1].offs]);
		    ins.oid = pageId;
		    b->nItems = bn;
		    b->size = moved;
		    db->pool.unfix(b);
		    return dbBtree::overflow;
		}
		moved += keyLen;
		assert(((void)"String fits in the B-Tree page", 
			moved + bn*sizeof(str) <= sizeof(b->keyChar)));
		b->keyStr[bn].size = keyLen;
		b->keyStr[bn].offs = sizeof(b->keyChar) - moved;
		if (bn == r) { 
		    b->keyStr[bn].oid = ins.oid;
		    memcpy(&b->keyChar[b->keyStr[bn].offs], key, keyLen);
		} else { 
		    b->keyStr[bn].oid = pg->keyStr[i].oid;
		    memcpy(&b->keyChar[b->keyStr[bn].offs], 
			   &pg->keyChar[pg->keyStr[i].offs], keyLen);
		    pg->size -= keyLen;
		    i += 1;
		}
	    }
	}
      }
      default:
	assert(false);
    }
    return dbBtree::done;
}


void dbBtreeLeafPage::compactify(int m)
{
    int i, j, offs, len, n = nItems;
    int size[dbPageSize];
    int index[dbPageSize];
    if (m == 0) { 
	return;
    }
    if (m < 0) {
	m = -m;
	for (i = 0; i < n-m; i++) { 
	    len = keyStr[i].size;
	    size[keyStr[i].offs + len] = len;
	    index[keyStr[i].offs + len] = i;
	}	
	for (; i < n; i++) { 
	    len = keyStr[i].size;
	    size[keyStr[i].offs + len] = len;
	    index[keyStr[i].offs + len] = -1;
	}
    } else { 
	for (i = 0; i < m; i++) { 
	    len = keyStr[i].size;
	    size[keyStr[i].offs + len] = len;
	    index[keyStr[i].offs + len] = -1;
	}
	for (; i < n; i++) { 
	    len = keyStr[i].size;
	    size[keyStr[i].offs + len] = len;
	    index[keyStr[i].offs + len] = i - m;
	    keyStr[i-m].oid = keyStr[i].oid;
	    keyStr[i-m].size = len;
	}	
    }
    nItems = n -= m;
    for (offs = sizeof(keyChar), i = offs; n != 0; i -= len) { 
	len = size[i];
	j = index[i];
	if (j >= 0) {
	    offs -= len;
	    n -= 1;
	    keyStr[j].offs = offs;
	    if (offs != i - len) { 
		memmove(&keyChar[offs], &keyChar[i - len], len);
	    }
	}
    }
}


int dbBtreePage::remove(dbDatabase* db, oid_t pageId, oid_t lastPageId, 
			int type, int offs, byte* p, item& rem, int height)

{
    pkey_t pkey = rem.pkey;
    dbBtreePage* pg = (dbBtreePage*)db->get(pageId);
    int i, l = pg->m, r = maxItems-1;
    dbPutTie tie;
    while (l < r) {
	i = (l+r) >> 1;
	if (pkey > pg->e[i].pkey) l = i+1; else r = i;
    }
    assert(r == l && pg->e[r].pkey >= pkey); 
    if (pageId == lastPageId) { 
	lastPageId = pg->e[maxItems-1].oid;
    }
    if (--height == 1) {
	while (l < maxItems) { 
	    switch (dbBtreeLeafPage::remove(db, pg->e[l].oid, lastPageId, 
					    type, offs, p, rem)) 
	    {
	      case dbBtree::underflow: 
		db->pool.unfix(pg);
		pg = (dbBtreePage*)db->put(tie, pageId);
		return pg->handleLeafPageUnderflow(db, l, type, lastPageId);
	      case dbBtree::propagation: 
		db->pool.unfix(pg);
		pg = (dbBtreePage*)db->put(tie, pageId);
		pg->e[l].pkey = rem.pkey; 
		return l == maxItems-1 ? dbBtree::propagation : dbBtree::done;
	      case dbBtree::done:
		db->pool.unfix(pg);
		return dbBtree::done;
	    } 
	    l += 1;
	}
    } else { 
	while (l < maxItems) { 
	    switch(remove(db,pg->e[l].oid,lastPageId,type,offs,p,rem,height)) {
	      case dbBtree::underflow: 
		db->pool.unfix(pg);
		pg = (dbBtreePage*)db->put(tie, pageId);
		return pg->handlePageUnderflow(db, l);
	      case dbBtree::propagation: 
		db->pool.unfix(pg);
		pg = (dbBtreePage*)db->put(tie, pageId);
		pg->e[l].pkey = rem.pkey; 
		return l == maxItems-1 ? dbBtree::propagation : dbBtree::done;
	      case dbBtree::done:
		db->pool.unfix(pg);
		return dbBtree::done;
	    } 
	    l += 1;
	}	
    }
    return dbBtree::not_found; 
}


int dbBtreePage::handlePageUnderflow(dbDatabase* db, int r)
{
    const int am = maxItems - minItems + 1;
    dbPutTie tie;
    dbBtreePage* a = (dbBtreePage*)db->put(tie, e[r].oid);
    assert(a->m == am);
    if (r < maxItems-1) { // exists greater page
	dbBtreePage* b = (dbBtreePage*)db->get(e[r+1].oid);
	int bm = b->m; 
	if (maxItems > am + bm){ // reallocation of nodes between pages a and b
            int i = maxItems - bm - ((maxItems - bm + minItems - 1) >> 1);
	    dbPutTie tie;
	    db->pool.unfix(b);
	    b = (dbBtreePage*)db->put(tie, e[r+1].oid);
            memcpy(&a->e[am-i], &a->e[am], (minItems-1)*sizeof(item));
            memcpy(&a->e[maxItems-i], &b->e[bm], i*sizeof(item));
            b->m += i;
	    a->m -= i;
	    e[r].pkey = a->e[maxItems-1].pkey;
            return dbBtree::done;
	} else { // merge page b to a  
	    memcpy(&a->e[am-maxItems+bm], &a->e[am], 
		   (minItems-1)*sizeof(item));
            memcpy(&a->e[bm], &b->e[bm], (maxItems-bm)*sizeof(item));
	    db->pool.unfix(b);
            db->freePage(e[r+1].oid);
	    e[r+1].oid = e[r].oid;
	    memmove(&e[m+1], &e[m], (r-m)*sizeof(item));
            a->m = am - maxItems + bm;
	    m += 1;
	    return m > maxItems-minItems ? dbBtree::underflow : dbBtree::done;
	}
    } else { // page b is before a
	dbBtreePage* b = (dbBtreePage*)db->get(e[maxItems-2].oid);
	int bm = b->m; 
	if (maxItems > am + bm){ // reallocation of nodes between pages a and b
            int i = maxItems - bm - ((maxItems - bm + minItems - 1) >> 1);
	    dbPutTie tie;
	    db->pool.unfix(b);
	    b = (dbBtreePage*)db->put(tie, e[maxItems-2].oid);
            memcpy(&a->e[am-i], &b->e[maxItems-i], i*sizeof(item));
            memmove(&b->e[bm+i], &b->e[bm], (maxItems-bm-i)*sizeof(item));
            e[maxItems-2].pkey = b->e[maxItems-1].pkey;
	    b->m += i;
	    a->m -= i;
	    if (e[maxItems-1].pkey != a->e[maxItems-1].pkey) { 
		e[maxItems-1].pkey = a->e[maxItems-1].pkey;
		return dbBtree::propagation;
	    }
	    return dbBtree::done;
	} else { // merge page b to a
            memcpy(&a->e[bm-minItems+1], &b->e[bm], 
		   (maxItems-bm)*sizeof(item));
	    db->pool.unfix(b);
            db->freePage(e[maxItems-2].oid);
	    memmove(&e[m+1], &e[m], (maxItems-2-m)*sizeof(item));
	    a->m = bm - minItems + 1;
	    m += 1;
	    if (e[maxItems-1].pkey != a->e[maxItems-1].pkey) { 
		e[maxItems-1].pkey = a->e[maxItems-1].pkey;
		return m > maxItems-minItems 
		    ? dbBtree::underflow : dbBtree::propagation;
	    } else { 
		return m > maxItems-minItems 
		    ? dbBtree::underflow : dbBtree::done;
	    }
	}
    }
}


int dbBtreePage::handleLeafPageUnderflow(dbDatabase* db, int r, int type,
					 oid_t lastPageId)
{
    dbPutTie tie;
    dbBtreeLeafPage* a = (dbBtreeLeafPage*)db->put(tie, e[r].oid);
    int an = a->nItems;
    if (type == dbField::tpString) { 
	if (r < maxItems-1) { // exists greater page
	    dbBtreeLeafPage* b = (dbBtreeLeafPage*)db->get(e[r+1].oid);
	    int bn = b->nItems; 
	    if ((an+bn)*sizeof(dbBtreeLeafPage::str) + a->size + b->size 
		> sizeof(b->keyChar)) 
	    {
		// reallocation of nodes between pages a and b
		int i;
		dbPutTie tie;
		db->pool.unfix(b);
		b = (dbBtreeLeafPage*)db->put(tie, e[r+1].oid);
		for (i = 0; a->size + b->keyStr[i].size < b->size; i++) { 
		    int len = b->keyStr[i].size;
		    a->size += len;
		    b->size -= len;
		    a->keyStr[an].offs = sizeof(a->keyChar) - a->size;
		    a->keyStr[an].size = len;
		    a->keyStr[an].oid = b->keyStr[i].oid;
		    memcpy(a->keyChar + a->keyStr[an].offs,
			   b->keyChar + b->keyStr[i].offs, len);
		    an += 1;
		}
		a->nItems = an;
		e[r].pkey = packStrFrmKey(&a->keyChar[a->keyStr[an-1].offs]);
		b->compactify(i);
		return dbBtree::done;
	    } else { // merge page b to a
		for (int i = 0; i < bn; i++, an++) { 
		    a->keyStr[an] = b->keyStr[i];
		    a->keyStr[an].offs -= a->size;
		}
		a->size += b->size;
		a->nItems = an;
		memcpy(a->keyChar + sizeof(a->keyChar) - a->size,
		       b->keyChar + sizeof(b->keyChar) - b->size,
		       b->size);
		db->pool.unfix(b);
		db->freePage(e[r+1].oid);
		e[r+1].oid = e[r].oid;
		memmove(&e[m+1], &e[m], (r-m)*sizeof(item));
		m += 1;
		return m > maxItems-minItems 
		    ? dbBtree::underflow : dbBtree::done;
	    }
	} else { // page b is before a
	    dbBtreeLeafPage* b = (dbBtreeLeafPage*)db->get(e[maxItems-2].oid);
	    int bn = b->nItems; 
	    if ((an+bn)*sizeof(dbBtreeLeafPage::str) + a->size + b->size 
		> sizeof(b->keyChar)) 
	    {
		// reallocation of nodes between pages a and b
		dbPutTie tie;
		int i = 0;
		int moved = 0;
		db->pool.unfix(b);
		b = (dbBtreeLeafPage*)db->put(tie, e[maxItems-2].oid);
		while (a->size+b->keyStr[bn-i-1].size+moved < b->size-moved) {
		    moved += b->keyStr[i++].size;
		}
		memmove(&a->keyStr[i], a->keyStr, 
			an*sizeof(dbBtreeLeafPage::str));
		for (int j = 0; j < i; j++) { 
		    int len = b->keyStr[bn-i+j].size;
		    a->size += len;
		    b->size -= len;
		    a->keyStr[j].offs = sizeof(a->keyChar) - a->size;
		    a->keyStr[j].size = len;
		    a->keyStr[j].oid = b->keyStr[bn-i+j].oid;
		    memcpy(a->keyChar + a->keyStr[j].offs,
			   b->keyChar + b->keyStr[bn-i+j].offs, len);
		}
		an += i;
		bn -= i;
		a->nItems = an;
		b->compactify(-i);
		e[maxItems-2].pkey = 
		    packStrFrmKey(&b->keyChar[b->keyStr[bn-1].offs]);
		if (e[maxItems-1].oid != lastPageId) { 
		    pkey_t pkey = 
			packStrFrmKey(&a->keyChar[a->keyStr[an-1].offs]);
		    if (e[maxItems-1].pkey != pkey) { 
			e[maxItems-1].pkey = pkey;
			return dbBtree::propagation;
		    }
		}
		return dbBtree::done;
	    } else { // merge page b to a
		memmove(a->keyStr + bn, a->keyStr, 
			an*sizeof(dbBtreeLeafPage::str));
		for (int i = 0; i < bn; i++) { 
		    a->keyStr[i] = b->keyStr[i];
		    a->keyStr[i].offs -= a->size;
		}
		an += bn;
		a->nItems = an;
		a->size += b->size;
		memcpy(a->keyChar + sizeof(a->keyChar) - a->size,
		       b->keyChar + sizeof(b->keyChar) - b->size,
		       b->size);
		db->pool.unfix(b);
		db->freePage(e[maxItems-2].oid);
		memmove(&e[m+1], &e[m], (maxItems-2-m)*sizeof(item));
		m += 1;
		if (e[maxItems-1].oid != lastPageId) { 
		    pkey_t pkey = 
			packStrFrmKey(&a->keyChar[a->keyStr[an-1].offs]);
		    if (e[maxItems-1].pkey != pkey) { 
			e[maxItems-1].pkey = pkey;
			return m > maxItems-minItems 
			    ? dbBtree::underflow : dbBtree::propagation;
		    }
		}  
		return m > maxItems-minItems 
		    ? dbBtree::underflow : dbBtree::done;
	    }
	}
    } else { 
	int itemSize = keySize[type];
	if (r < maxItems-1) { // exists greater page
	    dbBtreeLeafPage* b = (dbBtreeLeafPage*)db->get(e[r+1].oid);
	    int bn = b->nItems; 
	    assert(bn >= an);
	    if ((an+bn)*(sizeof(oid_t) + itemSize) > sizeof(b->keyChar)) { 
		// reallocation of nodes between pages a and b
		int i = bn - ((an + bn) >> 1);
		dbPutTie tie;
		db->pool.unfix(b);
		b = (dbBtreeLeafPage*)db->put(tie, e[r+1].oid);
		memcpy(a->keyChar + an*itemSize, b->keyChar, i*itemSize);
		memmove(b->keyChar, b->keyChar + i*itemSize, (bn-i)*itemSize);
		memcpy(&a->record[dbBtreeLeafPage::maxItems-an-i], 
		       &b->record[dbBtreeLeafPage::maxItems-i], 
		       i*sizeof(oid_t));
		memmove(&b->record[dbBtreeLeafPage::maxItems-bn+i], 
			&b->record[dbBtreeLeafPage::maxItems-bn], 
			(bn-i)*sizeof(oid_t));
		e[r].pkey = packKey(a->keyChar+(an+i-1)*itemSize, type);
		b->nItems -= i;
		a->nItems += i;
		return dbBtree::done;
	    } else { // merge page b to a  
		memcpy(a->keyChar + an*itemSize, b->keyChar, bn*itemSize);
		memcpy(&a->record[dbBtreeLeafPage::maxItems-an-bn],
		       &b->record[dbBtreeLeafPage::maxItems-bn],
		       bn*sizeof(oid_t));
		db->pool.unfix(b);
		db->freePage(e[r+1].oid);
		e[r+1].oid = e[r].oid;
		memmove(&e[m+1], &e[m], (r-m)*sizeof(item));
		a->nItems = an + bn;
		m += 1;
		return m < maxItems-minItems 
		    ? dbBtree::underflow : dbBtree::done;
	    }
	} else { // page b is before a
	    dbBtreeLeafPage* b = (dbBtreeLeafPage*)db->get(e[maxItems-2].oid);
	    int bn = b->nItems; 
	    assert(bn >= an);
	    if ((an+bn)*(sizeof(oid_t) + itemSize) > sizeof(b->keyChar)) { 
		// reallocation of nodes between pages a and b
		int i = bn - ((an + bn) >> 1);
		dbPutTie tie;
		db->pool.unfix(b);
		b = (dbBtreeLeafPage*)db->put(tie, e[maxItems-2].oid);
		memmove(a->keyChar + i*itemSize, a->keyChar, an*itemSize);
		memcpy(a->keyChar, b->keyChar + (bn-i)*itemSize, i*itemSize);
		memcpy(&a->record[dbBtreeLeafPage::maxItems-an-i], 
		       &a->record[dbBtreeLeafPage::maxItems-an], 
		       an*sizeof(oid_t));
		memcpy(&a->record[dbBtreeLeafPage::maxItems-i], 
		       &b->record[dbBtreeLeafPage::maxItems-bn], 
		       i*sizeof(oid_t));
		e[maxItems-2].pkey = 
		    packKey(b->keyChar + (bn-i-1)*itemSize, type);
		b->nItems -= i;
		a->nItems += i;
		if (e[maxItems-1].oid != lastPageId) { 
		    pkey_t pkey = packKey(a->keyChar+(an+i-1)*itemSize, type);
		    if (e[maxItems-1].pkey != pkey) { 
			e[maxItems-1].pkey = pkey;
			return dbBtree::propagation;
		    }
		}
		return dbBtree::done;
	    } else { // merge page b to a
		memmove(a->keyChar + bn*itemSize, a->keyChar, an*itemSize);
		memcpy(a->keyChar, b->keyChar, bn*itemSize);
		memcpy(&a->record[dbBtreeLeafPage::maxItems-an-bn],
		       &a->record[dbBtreeLeafPage::maxItems-an],
		       an*sizeof(oid_t));
		memcpy(&a->record[dbBtreeLeafPage::maxItems-bn], 
		       &b->record[dbBtreeLeafPage::maxItems-bn], 
		       bn*sizeof(oid_t));
		db->pool.unfix(b);
		db->freePage(e[maxItems-2].oid);
		memmove(&e[m+1], &e[m], (maxItems-2-m)*sizeof(item));
		a->nItems = an + bn;
		m += 1;
		if (e[maxItems-1].oid != lastPageId) { 
		    pkey_t pkey = packKey(a->keyChar+(an+bn-1)*itemSize, type);
		    if (e[maxItems-1].pkey != pkey) { 
			e[maxItems-1].pkey = pkey;
			return m < maxItems-minItems 
			    ? dbBtree::underflow : dbBtree::propagation;
		    }
		}
		return m < maxItems-minItems 
		    ? dbBtree::underflow : dbBtree::done;
	    }
	}
    }
}


#define REMOVE(KEY,TYPE) {                                                  \
    TYPE key = *(TYPE*)(p+offs);                                            \
    while (l < r)  {                                                        \
	i = (l+r) >> 1;                                                     \
	if (key > pg->KEY[i]) l = i+1; else r = i;                          \
    }                                                                       \
    break;                                                                  \
}

int dbBtreeLeafPage::remove(dbDatabase* db, oid_t pageId, oid_t lastPageId, 
			    int type, int offs, byte* p, 
			    dbBtreePage::item& rem)
{
    dbBtreeLeafPage* pg = (dbBtreeLeafPage*)db->get(pageId);
    int i, n = pg->nItems, l = 0, r = n;
    oid_t oid = rem.oid;
    switch (type) { 
      case dbField::tpBool:
      case dbField::tpInt1:
	REMOVE(keyInt1, int1);
      case dbField::tpInt2:
	REMOVE(keyInt2, int2);
      case dbField::tpInt4:
	REMOVE(keyInt4, int4);
      case dbField::tpInt8:
	REMOVE(keyInt8, int8);
      case dbField::tpReal4:
	REMOVE(keyReal4, real4);
      case dbField::tpReal8:
	REMOVE(keyReal8, real8);
      case dbField::tpString:
      { 
	char* key = (char*)p + ((dbVarying*)(p+offs))->offs;
#ifdef USE_LOCALE_SETTINGS
	char buf[dbMaxKeyLen];
	strxfrm(buf, key, sizeof buf);
	key = buf;
#endif
	while (l < r)  {
	    i = (l+r) >> 1;
	    if (strcmp(key, &pg->keyChar[pg->keyStr[i].offs]) > 0) { 
		l = i+1; 
	    } else { 
		r = i;
	    }
	}
	while (r < n) { 
	    if (pg->keyStr[r].oid == oid) { 
		dbPutTie tie;
		db->pool.unfix(pg);
		pg = (dbBtreeLeafPage*)db->put(tie, pageId);
		int len = pg->keyStr[r].size;
		int offs = pg->keyStr[r].offs;
		memcpy(&pg->keyStr[r], &pg->keyStr[r+1], (n-r-1)*sizeof(str));
		memmove(&pg->keyChar[sizeof(pg->keyChar) - pg->size + len],
			&pg->keyChar[sizeof(pg->keyChar) - pg->size],
			pg->size - sizeof(pg->keyChar) + offs);
		pg->nItems = --n;
		pg->size -= len;
		for (i = 0; i < n; i++) { 
		    if (pg->keyStr[i].offs < offs) { 
			pg->keyStr[i].offs += len;
		    }
		}		    
		if (r == n && pageId != lastPageId) { // last key at the page
		    pkey_t pkey = 
			packStrFrmKey(&pg->keyChar[pg->keyStr[n-1].offs]);
		    if (pkey != rem.pkey) { 
			rem.pkey = pkey;
			return n*sizeof(str) + pg->size < sizeof(pg->keyChar)/2
			    ? dbBtree::underflow : dbBtree::propagation;
		    }
		}
		return n*sizeof(str) + pg->size < sizeof(pg->keyChar)/2
		    ? dbBtree::underflow : dbBtree::done;
	    }
	    r += 1;
	}
	db->pool.unfix(pg);
	return dbBtree::not_found;
      }
      default:
	assert(false);
    }
    while (r < n) { 
	if (pg->record[maxItems-r-1] == oid) { 
	    dbPutTie tie;
	    db->pool.unfix(pg);
	    pg = (dbBtreeLeafPage*)db->put(tie, pageId);
	    size_t itemSize = keySize[type];
	    memcpy(pg->keyChar + r*itemSize, pg->keyChar + (r+1)*itemSize, 
		   (n - r - 1)*itemSize);
	    memmove(&pg->record[maxItems-n+1], &pg->record[maxItems-n],
		    (n - r - 1)*sizeof(oid_t));
	    pg->nItems = --n;
	    if (r == n && pageId != lastPageId) { // last key at the page
		pkey_t pkey = packKey(pg->keyChar + (n-1)*itemSize, type);
		if (pkey != rem.pkey) { 
		    rem.pkey = pkey;
		    return n*(itemSize + sizeof(oid_t)) 
			< sizeof(pg->keyChar)/2
			? dbBtree::underflow : dbBtree::propagation;
		}
	    }
	    return n*(itemSize + sizeof(oid_t)) < sizeof(pg->keyChar)/2
		? dbBtree::underflow : dbBtree::done;
	}
	r += 1;
    }
    db->pool.unfix(pg);
    return dbBtree::not_found;
}


void dbBtreePage::purge(dbDatabase* db, oid_t pageId, int height)
{
    int i;
    dbBtreePage* page = (dbBtreePage*)db->get(pageId);
    if (--height != 1) { 
	for (i = page->m; i < maxItems; i++) { 
	    purge(db, page->e[i].oid, height);
	}
    } else { 
	for (i = page->m; i < maxItems; i++) { 
	    db->freePage(page->e[i].oid);
	}
    } 
    db->pool.unfix(page);
    db->freePage(pageId);
}

bool dbBtreePage::traverseForward(dbDatabase* db, dbAnyCursor* cursor, 
				  dbExprNode* condition, int height)
{
    if (--height != 1) { 
	for (int i = m; i < maxItems; i++) { 
	    dbBtreePage* page = (dbBtreePage*)db->get(e[i].oid);
	    if (!page->traverseForward(db, cursor, condition, height)) { 
		db->pool.unfix(page);
		return false;
	    }
	    db->pool.unfix(page);
	}
    } else { 
	for (int i = m; i < maxItems; i++) { 
	    dbBtreeLeafPage* page = (dbBtreeLeafPage*)db->get(e[i].oid);
	    if (!page->traverseForward(db, cursor, condition)) { 
		db->pool.unfix(page);
		return false;
	    }
	    db->pool.unfix(page);
	}
    }
    return true;
}


bool dbBtreePage::traverseBackward(dbDatabase* db, dbAnyCursor* cursor, 
				   dbExprNode* condition, int height)
{
    int m = this->m;
    if (--height != 1) { 
	for (int i = maxItems; --i >= m;) { 
	    dbBtreePage* page = (dbBtreePage*)db->get(e[i].oid);
	    if (!page->traverseBackward(db, cursor, condition, height)) { 
		db->pool.unfix(page);
		return false;
	    }
	    db->pool.unfix(page);
	}
    } else { 
	for (int i = maxItems; --i >= m;) { 
	    dbBtreeLeafPage* page = (dbBtreeLeafPage*)db->get(e[i].oid);
	    if (!page->traverseBackward(db, cursor, condition)) { 
		db->pool.unfix(page);
		return false;
	    }
	    db->pool.unfix(page);
	}
    }
    return true;
}

bool dbBtreeLeafPage::traverseForward(dbDatabase* db, dbAnyCursor* cursor, 
				      dbExprNode* condition)
{
    int n = nItems;
    if (size == 0) { // page of scalars
	if (condition == NULL) { 
	    for (int i = 0; i < n; i++) { 
		if (!cursor->add(record[maxItems-1-i])) { 
		    return false;
		}
	    }
	} else { 
	    dbTableDescriptor* table = &cursor->table;
	    for (int i = 0; i < n; i++) { 
		if (db->evaluate(condition, record[maxItems-1-i], table)) { 
		    if (!cursor->add(record[maxItems-1-i])) { 
			return false;
		    }
		}
	    }
	}
    } else { // page of strings
	if (condition == NULL) { 
	    for (int i = 0; i < n; i++) { 
		if (!cursor->add(keyStr[i].oid)) { 
		    return false;
		}
	    }
	} else { 
	    dbTableDescriptor* table = &cursor->table;
	    for (int i = 0; i < n; i++) { 
		if (db->evaluate(condition, keyStr[i].oid, table)) { 
		    if (!cursor->add(keyStr[i].oid)) { 
			return false;
		    }
		}
	    }
	}	
    }
    return true;
}


bool dbBtreeLeafPage::traverseBackward(dbDatabase* db, dbAnyCursor* cursor, 
				       dbExprNode* condition)
{
    int i = nItems;
    if (size == 0) { // page of scalars
	if (condition == NULL) { 
	    while (--i >= 0) { 
		if (!cursor->add(record[maxItems-1-i])) { 
		    return false;
		}
	    }
	} else { 
	    dbTableDescriptor* table = &cursor->table;
	    while (--i >= 0) { 
		if (db->evaluate(condition, record[maxItems-1-i], table)) { 
		    if (!cursor->add(record[maxItems-1-i])) { 
			return false;
		    }
		}
	    }
	}
    } else { // page of strings
	if (condition == NULL) { 
	    while (--i >= 0) { 
		if (!cursor->add(keyStr[i].oid)) { 
		    return false;
		}
	    }
	} else { 
	    dbTableDescriptor* table = &cursor->table;
	    while (--i >= 0) { 
		if (db->evaluate(condition, keyStr[i].oid, table)) { 
		    if (!cursor->add(keyStr[i].oid)) { 
			return false;
		    }
		}
	    }
	}	
    }
    return true;
}

