Commit a062ee98 authored by Radford Neal's avatar Radford Neal

start changing symbol table to use lphash - linked in, but not used yet

parent c4d53fa9
......@@ -2686,6 +2686,7 @@ AC_CONFIG_FILES(
src/extra/matprod/Makefile
src/extra/helpers/Makefile
src/extra/sggc/Makefile
src/extra/lphash/Makefile
src/include/Makefile
src/include/Rmath.h0
src/include/R_ext/Makefile
......
......@@ -6,6 +6,9 @@ Copyright (C) 2013, 2014, 2015, 2016, 2017 by Radford M. Neal.
The library in src/extra/sggc is Copyright (C) 2016, 2017 Radford M. Neal,
and licensed under GPL2 or later.
The library in src/extra/lphash is Copyright (C) 2017 Radford M. Neal,
and licensed under GPL2 or later.
The library in src/extra/matprod is Copyright (C) 2013, 2014 Radford M. Neal,
and licensed under GPL2 or later.
......
Change symbol table to use lphash library. Allows removal of the next
link from symbol objects, making space for symbits.
......@@ -14,7 +14,7 @@ distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir)
DISTFILES = Makefile.in
SUBDIRS = blas bzip2 pcre tre xdr xz zlib @USE_NLS_TRUE@ intl \
matprod helpers sggc
matprod helpers sggc lphash
SUBDIRS_WITH_NO_BUILD = graphapp trio tzone win_iconv @USE_NLS_FALSE@ intl
RBLAS =@BLAS_SHLIB_TRUE@ make.blas
......@@ -28,7 +28,8 @@ LIBXZ =@BUILD_XZ_TRUE@ make.xz
LIBMATPROD = make.matprod
LIBHELPERS = make.helpers
LIBSGGC = make.sggc
EXTRAS = $(RBLAS) $(LIBBZ2) $(LIBPCRE) $(LIBZ) $(LIBXDR) $(LIBINTL) $(LIBTRE) $(LIBXZ) $(LIBMATPROD) $(LIBHELPERS) $(LIBSGGC)
LIBLPHASH = make.lphash
EXTRAS = $(RBLAS) $(LIBBZ2) $(LIBPCRE) $(LIBZ) $(LIBXDR) $(LIBINTL) $(LIBTRE) $(LIBXZ) $(LIBMATPROD) $(LIBHELPERS) $(LIBSGGC) $(LIBLPHASH)
all: Makefile R
......@@ -71,6 +72,9 @@ make.helpers: Makefile
make.sggc: Makefile
@(cd sggc; $(MAKE))
make.lphash: Makefile
@(cd lphash; $(MAKE))
mostlyclean: clean
clean:
......
This diff is collapsed.
#
# ${R_HOME}/src/extra/lphash/Makefile
VPATH = @srcdir@
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = ../../..
subdir = src/extra/lphash
R_HOME = $(top_builddir)
include $(top_builddir)/Makeconf
LPHASH_CPPFLAGS = -I$(srcdir)
ALL_CPPFLAGS = $(LPHASH_CPPFLAGS) $(R_XTRA_CPPFLAGS) $(CPPFLAGS) $(DEFS)
SOURCES = lphash.c
HEADERS = lphash.h lphash-app.h
DEPENDS = $(SOURCES:.c=.d)
OBJECTS = $(SOURCES:.c=.o)
@WANT_R_SHLIB_TRUE@ALL_CFLAGS = $(ALL_CFLAGS_LO)
distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir)
DISTFILES = Makefile.in Makefile.win \
$(SOURCES) $(HEADERS)
noinst_LIBRARIES = liblphash.a
liblphash_a_SOURCES = $(SOURCES)
liblphash_a_OBJECTS = $(OBJECTS)
all: R
Makefile: $(srcdir)/Makefile.in \
$(top_builddir)/config.status \
$(SOURCES)
@cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
Makedeps: Makefile $(DEPENDS)
@cat $(DEPENDS) >> Makefile
@touch $@
R: Makefile
@$(MAKE) Makedeps
@$(MAKE) $(noinst_LIBRARIES)
liblphash.a: $(liblphash_a_OBJECTS)
rm -f $@
$(AR) cr $@ $(liblphash_a_OBJECTS)
$(RANLIB) $@
mostlyclean: clean
clean:
@-rm -f Makedeps *.d *.o *.lo *.a
distclean: clean
@-rm -f Makefile dftables chartables.h
maintainer-clean: distclean
install install-strip uninstall TAGS info dvi check:
distdir: $(DISTFILES)
@for f in $(DISTFILES); do \
test -f $(distdir)/$${f} \
|| ln $(srcdir)/$${f} $(distdir)/$${f} 2>$(R_BIT_BUCKET) \
|| cp -p $(srcdir)/$${f} $(distdir)/$${f}; \
done
## Automagically generated dependencies:
#-*- Makefile -*-
include ../../gnuwin32/MkRules
DEFS=-DHAVE_CONFIG_H
CPPFLAGS=-I../../include -I. $(DEFS) $(OPENMP)
## done this way for parallel make
all:
$(MAKE) -f Makefile.win makeMakedeps
$(MAKE) -f Makefile.win liblphash.a
CSOURCES = lphash.c
OBJS = $(CSOURCES:.c=.o)
liblphash.a: $(OBJS)
clean:
@$(RM) *.o *~ *.d Makedeps
distclean: clean
@$(RM) liblphash.a
# Dependencies
DEPS=$(CSOURCES:.c=.d)
makeMakedeps: $(DEPS)
@$(RM) Makedeps
@cat $(DEPS) >> Makedeps
-include Makedeps
LOCALLY-PROBED HASHING FACILITY
Copyright 2017 Radford M. Neal. Distributed under the GPL version 2
or later. See COPYING for licence details.
The lphash library provides a facility for managing hash tables, such
as might be used to implement a symbol table in an implementation of a
language such as R.
See the file lphash-doc for documentation.
A simple set of tests for correctness is in test1, and a performance
test is in test2.
The source code repository for this software is at
https://gitlab.com/radfordneal/lphash
The stable versions of lphash below are tagged in source code repository.
Version 2017-05-26
First complete version.
/*
* pqR : A Computer Language for Statistical Data Analysis
* Copyright (C) 2017 Radford M. Neal
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, a copy is available at
* http://www.r-project.org/Licenses/
*/
#ifndef LPHASH_APP_H_
#define LPHASH_APP_H_
typedef unsigned lphash_entry_t; /* Compressed pointer to symbol object */
typedef char *lphash_key_t; /* Symbol's print name */
typedef unsigned lphash_hash_t; /* Character hash as stored in print name */
#define LPHASH_NO_ENTRY 0 /* R_NoObject */
#ifndef LPHASH_MAX_LOAD
#define LPHASH_MAX_LOAD 0.7
#endif
#define lphash_free free
#define lphash_match(e,k) 0
#define lphash_make_entry(k) 0
#include "lphash.h"
#endif
LPHASH - A LOCALLY-PROBED HASHING FACILITY
Copyright (c) 2017 Radford M. Neal.
The lphash library is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
INTRODUCTION
The lphash library of C functions supports non-chaining hash tables in
which the probes following an initial hash location are done locally,
to improve cache performance. Currently, only insertion and lookup
operations are supported - no deletion - as this is all that is
required for some applications, such as the symbol table in an
implementation of R. The size of the hash table is automatically
increased as needed.
The application using lphash defines the type of an entry in the hash
table, and the type of a key. It also defines the type of a hash
value, which is computed by the application from a key. It also
supplies functions or macros to create an entry with a given key and
to test whether an entry matches a key.
USING LPHASH IN AN APPLICATION
To use lphash, an application should create an lphash-app.h file,
which must define the constants and types below and then include the
lphash.h file. The lphash-app.h file must be included by application
modules that use the lphash library, and is also included in the C
source file for the lphash library routines.
The following types must be defined in lphash-app.h:
lphash_entry_t The type of an entry in the hash table, which must
be an integer or pointer type, so that comparison is
possible. (Larger types would also have a bad
effect on performance.)
lphash_key_t The type of a key that may be tested for matching
an entry in the hash table. Could be the actual key
value, or a pointer to it.
lphash_hash_t An unsigned integer type to be used for hash values.
The following constant must be defined in lphash-app.h:
LPHASH_NO_ENTRY A value of type lphash_entry_t that will not compare
equal to any actual entry, and which can therefore
be used to mark an empty hash bucket, or signal that
no entry was found.
The lphash.h file will define the following type:
lphash_table_t The type of a pointer to a hash table managed by
the lphash functions. The application should not
directly reference fields of this table.
The application must provide the following functions (or macros),
which will have prototypes declared for them as below in lphash.h (if
they are not macros):
int lphash_match (lphash_entry_t entry, lphash_key_t key)
Returns 1 if the 'key' matches 'entry', and 0 if not.
lphash_entry_t lphash_make_entry (lphash_key_t key)
Creates an entry for the given key. It should be the case that
lphash_match would return 1 if called with the new entry and this
key, but this is not checked.
void *lphash_malloc (size_t size)
Allocates 'size' bytes of memory (with undefined contents) to be
used as part of a hash table. Returns NULL if allocation fails.
void lphash_free (void *ptr)
Frees memory allocated by lphash_malloc.
An application might simply define lphash_malloc and lphash_free to be
the standard malloc and free functions from the C library, or might
define an lphash_malloc that tries to recover memory from elsewhere if
allocation initially fails.
The application may optionally define the following in lphash-app.h:
LPHASH_MAX_LOAD A real value greater than zero and less than one
giving the maximum ratio of occupied to total hash
buckets. When this maximum is exceeded, a larger
table is allocated, if possible (see below). If
not defined, it defaults to 0.75.
LPHASH_ALIGN The address boundary to which the array of hash
buckets is aligned. Must be a power of two. If
not defined, it defaults to 64, which is a common
size for a cache line.
LPHASH_LINEAR If defined (as anything), indexes of successive
probe locations will be found by adding, rather
than xor'ing (see below for details).
LPHASH_STATS If defined (as anything), some statistics will
be collected (at a small performance cost). The
details are subject to change, and may be found by
looking at the source code in lphash.c.
FUNCTIONS PROVIDED BY LPHASH TO THE APPLICATION
The lphash library provides an application with the functions (or
macros) below, which (if functions) have prototypes as shown:
lphash_table_t lphash_create (int initial_size)
Returns a pointer to a new hash table, which is initially empty,
and has the number of buckets specified by the argument, which
should be a power of two and at least 8. (The size is silently
increased to 8 if it is less, and is otherwise silently decreased
to the next lower power of two if it is not a power of two.)
The initial size specification is relevant only for performance.
The table size is increased as needed later, but if it is known
that many entries will be needed, allocating a large table
initially will reduce startup costs.
NULL is returned if a new table cannot be allocated.
lphash_entry_t lphash_lookup (lphash_table_t table, lphash_hash_t hash,
lphash_key_t key)
Looks for an entry in 'table' that has the given 'hash' and
matches the given 'key'. Returns the entry found, or returns
LPHASH_NO_ENTRY if no matching entry is present in 'table'.
lphash_entry_t lphash_insert (lphash_table_t table, lphash_hash_t hash,
lphash_key_t key)
Searches for an entry in 'table' that has the given 'hash' and
matches the given 'key'. If one is found, it is returned.
Otherwise, a new entry created by calling lphash_make_entry is
added to 'table', except that if adding a new entry would overflow
the table (see below), LPHASH_NO_ENTRY is returned instead.
void lphash_destroy (lphash_table_t table)
Destroys the given hash table, calling lphash_free for all memory
areas that it was using.
The size of a hash table will be doubled when an insertion would
increase the load ratio above LPHASH_MAX_LOAD, except that if memory
for the larger table cannot be allocated (lphash_malloc returns NULL),
the existing table will be used, until the load ratio exceeds
sqrt(LPHASH_MAX_LOAD), at which point an attempt to allocate a bigger
table will again be made, and if this fails, the table is considered
to have overflowed, and no new entries can be added.
LPHASH HASHING AND PROBING STRATEGY
The hash buckets used by lphash contain a C structure value consisting
of an lphash_hash_t value and an lphash_entry_t value.
The number of buckets in a hash table is always a power of two. The
hash value passed to lphash_lookup or lphash_insert is used to form an
initial index of a bucket in the table by simply taking the
appropriate number of low order bits of the hash value.
The bucket array is aligned to a LPHASH_ALIGN address boundary. If
LPHASH_ALIGN is at least the size of a cache line (assumed to be a
power of two), and the size of a bucket is a power of two no larger
than a cache line, all buckets will be in a single cache line.
If the initial bucket looked at by lphash_lookup or lphash_insert
based on the hash value is occupied by an entry that does not have the
same hash value or does not match the key, indexes of further buckets
to be probed in succession are found by exclusive or'ing the low-order
bits of the hash value with the binary representations of the integers
1, 2, 3, etc. Given the assumptions above, this probing order
guarantees that the probes will be confined to a single cache line
until all buckets in that cache line have been probed.
For experimental purposes, defining the symbol LPHASH_LINEAR (as
anything) in lphash-app.h will switch lphash to using the strategy of
probing successive buckets (modulo the table size) - ie, probing by
adding 1, 2, 3, etc. (modulo the number of buckets) rather than
exclusive-or'ing.
/* LPHASH - LOCALLY-PROBED HASH TABLE FACILITY - FUNCTION DEFINITIONS
Copyright (c) 2017 Radford M. Neal.
The lphash library is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
/* See lphash-doc for general information on the lphash library. */
#include "lphash-app.h"
#include <stdint.h>
#include <limits.h>
#include <math.h>
/* DEFAULT VALUES FOR OPTIONS. */
#ifndef LPHASH_MAX_LOAD
#define LPHASH_MAX_LOAD 0.75
#endif
#ifndef LPHASH_ALIGN
#define LPHASH_ALIGN 64
#endif
/* ALLOCATE BUCKETS FOR A TABLE. Returns 0 if space can't be allocated.
Otherwise returns 1 after setting the fields in 'table' for the bucket
array (initialized to LPHASH_NO_ENTRY) and the size and thresholds. */
static int allocate_buckets (lphash_table_t table, int size)
{
char *m, *m_aligned;
m = lphash_malloc ((size_t)size * sizeof (lphash_bucket_t) + LPHASH_ALIGN-1);
if (m == NULL)
{ return 0;
}
m_aligned = (char *) (((uintptr_t)m + LPHASH_ALIGN-1) & ~(LPHASH_ALIGN-1));
table->buckets = (lphash_bucket_t *) m_aligned;
table->buckets_offset = m_aligned - m;
table->size = size;
table->threshold = (int) (size * LPHASH_MAX_LOAD);
if (table->threshold < 2)
{ table->threshold = 2;
}
if (table->threshold >= size)
{ table->threshold = size-1;
}
table->threshold2 = (int) (size * sqrt(LPHASH_MAX_LOAD));
if (table->threshold2 < 2)
{ table->threshold2 = 2;
}
if (table->threshold2 >= size)
{ table->threshold2 = size-1;
}
for (int i = 0; i < size; i++)
{ table->buckets[i].entry = LPHASH_NO_ENTRY;
}
return 1;
}
/* CREATE A HASH TABLE. */
lphash_table_t lphash_create (int initial_size)
{
int size = 8;
if (initial_size > 8)
{ while ((unsigned)size << 1 <= (unsigned)initial_size)
{ size <<= 1;
}
}
lphash_table_t table = lphash_malloc (sizeof *table);
if (table == NULL)
{ return NULL;
}
if (!allocate_buckets(table,size))
{ lphash_free(table);
return NULL;
}
table->occupied = 0;
# ifdef LPHASH_STATS
table->searches = 0;
table->not_found = 0;
table->probes = 0;
table->matches = 0;
# endif
return table;
}
/* DESTROY A HASH TABLE. */
void lphash_destroy (lphash_table_t table)
{
lphash_free ((char *)table->buckets - table->buckets_offset);
lphash_free (table);
}
/* SEARCH FOR A TABLE ENTRY WITH GIVEN HASH AND KEY. Returns the index
of the bucket with the entry found, or if not found, the index of the
bucket where a new entry should be stored. */
static inline int search (lphash_table_t table, lphash_hash_t hash,
lphash_key_t key)
{
int i, x;
# ifdef LPHASH_STATS
table->searches += 1;
# endif
i = hash & (table->size-1);
x = 0;
for (;;)
{
# ifdef LPHASH_LINEAR
int ix = (i+x) & (table->size-1);
# else
int ix = i^x;
# endif
lphash_bucket_t *b = &table->buckets[ix];
# ifdef LPHASH_STATS
table->probes += 1;
# endif
if (b->entry == LPHASH_NO_ENTRY)
{
# ifdef LPHASH_STATS
table->not_found += 1;
# endif
return ix;
}
if (b->hash == hash)
{
# ifdef LPHASH_STATS
table->matches += 1;
# endif
if (lphash_match (b->entry, key))
{ return ix;
}
}
x += 1;
if (x == table->size)
{ abort(); /* shouldn't happen - table should always have an empty bucket */
}
}
}
/* TRY TO EXPAND A TABLE TO DOUBLE ITS SIZE. */
static void expand_table (lphash_table_t table)
{
if (table->size > INT_MAX/2)
{ return;
}
lphash_bucket_t *old_buckets = table->buckets;
int old_size = table->size;
int old_offset = table->buckets_offset;
if (!allocate_buckets(table,old_size*2))
{ return;
}
int i, j, x, ix;
for (j = 0; j < old_size; j++)
{
if (old_buckets[j].entry == LPHASH_NO_ENTRY)
{ continue;
}
lphash_hash_t hash = old_buckets[j].hash;
i = hash & (table->size-1);
x = 0;
for (;;)
{
# ifdef LPHASH_LINEAR
ix = (i+x) & (table->size-1);
# else
ix = i^x;
# endif
if (table->buckets[ix].entry == LPHASH_NO_ENTRY)
{ break;
}
x += 1;
}
table->buckets[ix] = old_buckets[j];
}
lphash_free ((char *)old_buckets - old_offset);
}
/* INSERT AN ENTRY IN A HASH TABLE. */
lphash_entry_t lphash_insert (lphash_table_t table, lphash_hash_t hash,
lphash_key_t key)
{
int ix = search (table, hash, key);
lphash_entry_t entry = table->buckets[ix].entry;
if (entry != LPHASH_NO_ENTRY)
{ return entry;
}
if (table->occupied==table->threshold || table->occupied==table->threshold2)
{ expand_table (table);
ix = search (table, hash, key);
}
if (table->occupied >= table->threshold2)
{ return LPHASH_NO_ENTRY;
}
entry = lphash_make_entry(key);
table->buckets[ix].entry = entry;
table->buckets[ix].hash = hash;
table->occupied += 1;
return entry;
}
/* SEARCH FOR AN ENTRY IN A HASH TABLE. */
lphash_entry_t lphash_lookup (lphash_table_t table, lphash_hash_t hash,
lphash_key_t key)
{
return table->buckets [search (table, hash, key)] . entry;
}
/* LPHASH - LOCALLY-PROBED HASH TABLE FACILITY - HEADER FILE.
Copyright (c) 2017 Radford M. Neal.
The lphash library is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
/* See lphash-doc for general information on the lphash library. */
#include <stdlib.h>
/* INTERNAL STRUCTURE OF A HASH TABLE. Should not be directly
referenced by the application, except for testing and performance
evaluation (using the statistics fields). */
typedef struct
{ lphash_entry_t entry; /* Entry in this bucket, or LPHASH_NO_ENTRY */
lphash_hash_t hash; /* Full hash value for this entry (if present) */
} lphash_bucket_t;
typedef struct
{
int size; /* Number of buckets in table */
int occupied; /* Number of occupied buckets */
int threshold; /* Threshold for increasing table size */
int threshold2; /* Threshold for declaring overflow */
lphash_bucket_t *buckets; /* Pointer to an array of 'size' buckets */
int buckets_offset; /* Offset added to align buckets */
# ifdef LPHASH_STATS
int searches; /* Number of searches done */
int not_found; /* Number of searches where entry not found */
int probes; /* Number of buckets probed */
int matches; /* Number of calls of lphash_match */
# endif