Commit bb48551b authored by lars's avatar lars
Browse files

Initial CRM merge. Not compiled in by default, nothing changes if not

activated. Not functional yet!

CVS patchset: 2291
CVS date: 2004/01/14 12:00:30
parent 2cd0cea7
......@@ -52,11 +52,11 @@ MAINTAINERCLEANFILES = Makefile.in aclocal.m4 configure DRF/config-h.in \
SUBDIRS = libltdl doc debian replace include lib \
ldirectord heartbeat telecom\
cts linux-ha membership contrib config @SNMP_SUBAGENT_DIR@
cts linux-ha membership contrib config @SNMP_SUBAGENT_DIR@ @CRM_DIR@
DIST_SUBDIRS = libltdl doc debian replace include lib \
ldirectord heartbeat telecom\
cts linux-ha membership contrib config
cts linux-ha membership contrib config @CRM_DIR@
HANDY_DOCS = doc/ChangeLog doc/GettingStarted.html doc/DirectoryMap.txt
HBDOCS = doc/heartbeat_api.html
......
......@@ -8,7 +8,7 @@ dnl Initialiase, with sanity check of a unique file in the hierarchy
AC_INIT(heartbeat.spec.in)
AC_CONFIG_AUX_DIR(.)
AC_REVISION($Revision: 1.220 $) dnl cvs revision
AC_REVISION($Revision: 1.221 $) dnl cvs revision
AC_CANONICAL_HOST
......@@ -1301,6 +1301,41 @@ telecom/Makefile \
telecom/checkpointd/Makefile
)
AC_ARG_ENABLE([crm],
[ --enable-crm Compile the new Cluster Resource Manager.
This is still in very early developement stage. [default=no]])
if test "${enable_crm+set}" = "set"; then
echo "Enabling the new Cluster Resource Manager"
AC_DEFINE_UNQUOTED(WITH_CRM, 1, Use the new Cluster Resource Manager)
CRM_DIR="crm"
dnl sinclude([config/crm_subagent.m4])
dnl LIB_CRM
CRM_ENABLED=1
AC_SUBST(CRM_ENABLED)
AC_SUBST(CRM_LIBS)
else
CRM_ENABLED=0
AC_SUBST(CRM_ENABLED)
CRM_DIR=""
fi
AC_SUBST(CRM_DIR)
dnl testing again makes the eventual output a little easier to read
if test "${enable_crm+set}" = "set"; then
AC_OUTPUT(crm/Makefile \
crm/common/Makefile \
crm/cib/Makefile \
crm/crmd/Makefile \
crm/lrmd/Makefile \
crm/pengine/Makefile \
crm/tengine/Makefile \
crm/admin/Makefile \
)
fi
dnl subpackages configuration - perhaps configure it properly some other time
dnl when it has been discussed at linux-ha-dev
dnl AC_CONFIG_SUBDIRS(stonith heartbeat)
......@@ -1348,6 +1383,16 @@ case x"$SNMP_SUBAGENT_DIR" in
;;
esac
case x"$CRM_DIR" in
x)
AC_MSG_RESULT([ Build New CRM = no])
;;
xcrm)
AC_MSG_RESULT([ Build New CRM = yes])
dnl AC_MSG_RESULT([ CRM libraries = "${CRM_LIBS}"])
;;
esac
AC_MSG_RESULT([ CC_WARNINGS = "${CC_WARNINGS}"])
AC_MSG_RESULT([ Mangled CFLAGS = "${CFLAGS}"])
#
# Copyright (C) 2004 Andrew Beekhof
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
MAINTAINERCLEANFILES = Makefile.in
## Subdirectories
SUBDIRS = common cib crmd lrmd pengine tengine admin
CRM Admin Command-line Interface
##################################
Proposed Man Page
##################################
Usage
========================
crmadmin --daemon [-V] [daemon options]
--query [-V] [-o <object_type> [-t <type>]] [-i <id>]
--delete [-V] -o <object_type> -i <id>
--create [-V] -o <object_type> -i <id> [-d <desc>] [create_modify options]
--modify [-V] -o <object_type> -i <id> [-d <desc>] [create_modify options]
Top-level Options
========================
--daemon : Perform cluster admin functions
--query : Retrieve information from the CIB
--delete : Remove objects from the CIB
--create : Create objects in the CIB
--modify : Modify objects in the CIB
--verbose : Be verbose
Short equivalent: -V
--help : show help and exist
Short equivalent: -?
Common Options
========================
--obj_type <object_type>
<object_type>: (node|resource|constraint)
Set the type of object we are interested in.
Short equivalent: -o
--id <id>
<id>: The unique identifier for a new or pre-existing object in the CIB.
Short equivalent: -i
--description <object_type>
<object_type>: (node|resource|constraint)
Set the type of object we are interested in.
Short equivalent: -D
[daemon options]
========================
--recalc_tree
Recalculate the resource dependancy tree.
Short equivalent: -R
--flush_recalc_tree
Like -T but recalculate (flush) the CIB first (Preferred).
Short equivalent: -F
--migrate_res <res>=<instance>
Migrate one resource, ie. instance <instance> of <res>, away from its current node.
Short equivalent: -I
--migrate_from <node>
Migrate all resources away from <node>.
Short equivalent: -M
--unload <node>
As per -M, but then unload Heartbeat on <node>.
Short equivalent: -U
--disconnect <node>
Disconnect <node> but dont unload resources and dont recalculate
the dependancy tree. The is for the upgrade process. For now,
see lmb for details.
Short equivalent: -A
--reset <node>
Display the status of <node>. Ie. Are all sub-systems running?
Is it the DC? What resources is it running? Health. Etc.
Short equivalent: -S
--health
Display the health of the cluster.
Short equivalent: -H
--clear <node>
Clear all system generated contraints for <node>.
Ie. we've manually fixed the problem(s).
Short equivalent: -C
--elect_dc
Force the election of a new DC. May result in the same node
being elected. The only way to guarentee a different node is to
perform a -U on the DC.
Short equivalent: -E
--whois_dc
Tells which node the DC is running on.
Short equivalent: -W
[create_modify options]
========================
--obj_type node:
--subtype (node|ping)
What type of node is this.
Short equivalent: -t
--obj_type resource:
--subtype (none|IPAddr|Nfs|Apache|Drbd|Stonith|...)
What type of resource is this.
Short equivalent: -t
-max_instances <max_instances>
<max_instances>: Positive integer. The maximum instances of the
resource that should be running at any one time.
Default: 1.
Short equivalent: -m
--list_add <node>=<weight> (can be specified more than once)
Add a node/weight to the resource.
Deletes are handled before Adds.
<node> : The id of a pre-existing node in the CIB.
<weight>: Integer. Relative preference for running on that node.
Higher numbers translate into higher preference.
Short equivalent: -a
--list_del <node>=<weight> (can be specified more than once)
Delete a node/weight from the resource.
Deletes are handled before Adds.
Short equivalent: -d
--list_wipe
Wipe the complete list of nodes/weights from the resource
Short equivalent: -w
--obj_type constraint:
--subtype (None|StartAfter|SameNode|Block|...)
What type of constraint is this.
Short equivalent: -t
--constraint_del (never|stonith|active|localRestart|...)
When should the constraint be removed.
Default: never
Short equivalent: -c
--resource <resource> (can be specified a maximum of twice)
Overwrite the list of resources for the constraint.
<resource>: The id of a pre-existing resource in the CIB.
Short equivalent: -r
--list_add <var_name>=<var_value> (can be specified more than once)
Add a variable name/value pair to the constraint.
Deletes are handled before Adds.
<var_name> : The variable name.
<var_value>: The value the variable must take in order for the
constraint to be satisfied.
Possibly allow regular expressions here.
Short equivalent: -a
--list_del <var_name>=<var_value> (can be specified more than once)
Delete a variable name/value pair to the constraint.
Deletes are handled before Adds.
Short equivalent: -d
--list_wipe
Wipe the complete list of variable name/value pairs from the constraint.
Short equivalent: -w
#
# Copyright (C) 2004 Andrew Beekhof
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
MAINTAINERCLEANFILES = Makefile.in
INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \
-I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl \
-I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha
hadir = $(sysconfdir)/ha.d
halibdir = $(libdir)/@HB_PKG@
commmoddir = $(halibdir)/modules/comm
havarlibdir = $(localstatedir)/lib/@HB_PKG@
PIDFILE = $(localstatedir)/run/crmd.pid
XML_FLAGS = `xml2-config --cflags`
XML_LIBS = `xml2-config --libs`
# fifos with path
crmdir = $(havarlibdir)/crm
apigid = @HA_APIGID@
crmuid = @HA_CCMUID@
COMMONLIBS = $(top_builddir)/lib/clplumbing/libplumb.la \
$(top_builddir)/$(CRM_DIR)/common/libcrmcommon.la \
$(top_builddir)/$(CRM_DIR)/cib/libcib.la \
$(top_builddir)/lib/apphb/libapphb.la \
$(top_builddir)/heartbeat/libhbclient.la \
$(GLIBLIB) \
$(LIBRT)
LIBRT = @LIBRT@
AM_CFLAGS = @CFLAGS@ \
-DPIDFILE='"$(PIDFILE)"'
## libraries
lib_LTLIBRARIES =
## binary progs
halib_PROGRAMS = crmadmin
## SOURCES
#noinst_HEADERS = config.h control.h crmd.h
noinst_HEADERS =
crmadmin_SOURCES = adminmain.c
crmadmin_CFLAGS = $(XML_FLAGS) -I$(top_builddir) -DHA_VARLIBDIR='"@HA_VARLIBDIR@"'
crmadmin_LDFLAGS = $(XML_LIBS)
crmadmin_LDADD = $(COMMONLIBS) \
$(top_builddir)/libltdl/libltdlc.la \
$(top_builddir)/replace/libreplace.la \
$(LIBRT)
clean-generic:
rm -f *.log *.debug *.xml *~
install-exec-local:
uninstall-local:
This diff is collapsed.
CIB: Purpose
==========================
The CIB is described quit well in section 5 of lmb's crm.txt (checked into CVS in the crm directory) so I wont repeat everything here. Suffice to say that it stores the configuration and runtime data required for cluster-wide resource managment.
CIB: External Representation
===========================
I believe there have been a number of discussions on this and I'm going to take it as given that we are all (if not grudgingly) agreed that the CIB will be passed around as XML.
CIB: Internal Representation
===========================
Internally, the CIB will be stored and manipulated as a libxml2 (as used by stonith code, so this is not a new dependancy) XML document. This may not be the most efficient data representation but the trade off is that we dont spend half our time encoding/decoding everytime messages are sent/retrieved. It also means that fields can be added and deprecated more easily if required (ie. no encode/decode methods to rewrite) and, in theory, should allow for better compatibility between CIB versions.
CIB: Information Structure
===========================
The CIB is divided up into 4 main sections: nodes, resource, constraints and status. It contains a version string, a flag to indicate if this was read from a file or generated (possibly should be replaced with a generation number) and a timestamp.
All information carrying elements carry an "id" field which is an alpha-numeric string which must also be unique for that element type, a description to aid administration and a timestamp. The timestamps are required for the DC (Designated Controller) to determine the latest cluster-wide configuration.
Resources carry a "max_instances", an "op_timeout" and a "priority" field. The op_timeout is intended for use by the LRM and CRM in order to determine when to start worrying about the length of time (in seconds) a start, stop or restart is taking. The priority is used by the Policy Engine in order to determine the order in which to process dependancies and more drastically to know which resources to leave out if all dependancies can not be satisfied. The max_instances field is intended to address the replicated resource issue. When a resource is started, it is told which instance number it has and it is then up to it to decide if it should be a master, a slave or some other variation, as the resource itself is the best equiped to do so (Ie. the resource knows if there can be 1, 2 or 20 masters).
Constraints have their own section as many (such as startAfter, startBefore) will be relevant to more than one resource, making the resource the wrong place to store them. Constraints may be put in place by the CRM automatically, for example after a failed startup, and these will need to be cleared at some point. The clear_on attribute is intended to determin when this point should be.
Resource state information is also separated out as it this section is "runtime" data and is "compiled" by the DC from information sent by all nodes in the cluster. An argument could also be made that the same could be done for node_status and health, however the primary source of this data is the CCM (ie. not compiled) and information about "bad" nodes should persist even after all good nodes have exited. I will leave this a matter for debate if people feel strongly about it.
In places, a "source" attribute has been added so that the CRM is able to know where this information came from. This is helpful during the merging process (performed by a new DC and perhaps periodically) as it allows the CRM to allow nodes to be authoritive about themselves if appropriate (ie. which resources it is running, but perhaps not always about its own health).
To avoid duplication of data, state entries only carry references to nodes and resources. Likewise resources and constraints only contain references to the nodes that they can (or explicitly cannot via a negative weight) run on.
CIB: Messaging
===========================
The rationale behind the "cib_operation" and "cib_section" elements of the command is to allow the DC the ability, if desired, to optimise the CIB's performance querying for and updating only certain sections of the CIB. In the initial prototype, this will most likely not be used.
CIB: Schema/DTD
===========================
<!--
Internally to the CIB and when written to disk, "cib" is used as the root element. However when sent as a message to another node or sub-system (the schema describing this will be available shortly) this will not be the case. With this in mind, all functions hace been written to operate on xmlNodePtr objects instead of xmlDocPtr objects.
-->
<!ELEMENT cib (nodes, resources, constraints, status)>
<!ATTLIST cib
version #CDATA '1'
generated (true|false) 'true'
timestamp #CDATA '0'>
<!-- list placholders -->
<!ELEMENT nodes (node*)>
<!ATTLIST nodes>
<!ELEMENT resources (resource*)>
<!ATTLIST resources>
<!ELEMENT constraints (constraint*)>
<!ATTLIST constraints)>
<!ELEMENT status (state*)>
<!ATTLIST status>
<!-- the information carrying elements -->
<!ELEMENT node>
<!ATTLIST node
id #CDATA
description #CDATA
node_type (node|ping) 'ping'
health (0|10|20|30|40|50|60|70|80|90|100) '0'
node_status (down|up|active|stonith|stonith_failed) 'down'
source #CDATA
timestamp #CDATA '0'>
<!-- the nvpairs in this case correspond to argument to the init scripts
<!ELEMENT resource (node_reference*,nvpair*)>
<!ATTLIST resource
id #CDATA
description #CDATA
resource_cat (none|ocf|init|resource|...) 'none'
resource_type (none|IPAddr|Nfs|Apache|Drbd|Stonith|...) 'none'
op_timeout #CDATA
priority #CDATA
max_instances #CDATA '1'
timestamp #CDATA '0'>
<!ELEMENT node_reference>
<!--
action is only used during CIB updates. To know if the node_reference should be added or removed from the resource
-->
<!ATTLIST node_reference
id #CDATA
action? #CDATA
weight #CDATA
timestamp #CDATA '0'>
<!ELEMENT constraint (nv_pair)*>
<!--
r_id_2, var_name, var_value will only valid/required depending on the value of constraint_type
-->
<!ATTLIST constraint
id #CDATA
description #CDATA
constraint_type (None|StartAfter|SameNode|Block|...) 'none'
r_id_1 #CDATA
r_id_2? #CDATA
clear_on (never|stonith|active|localRestart|...) 'never'
timestamp #CDATA '0'>
<!ELEMENT nv_pair>
<!--
for consistency elsewhere, id is the name of the variable.
type may be used later on to distinguish between variable scopes.
action is only used during CIB updates. To know if the nv_pair should be added or removed from the constraint
-->
<!ATTLIST nv_pair
id #CDATA
action? #CDATA
var_type #CDATA
var_value #CDATA>
<!ELEMENT state>
<!-- runtime data -->
<!ATTLIST state
id #CDATA
description #CDATA
res_id #CDATA
instance #CDATA
max_instances #CDATA '1'
node_id #CDATA
resource_status (stopped|starting|running|stopping|failed) 'stopped'
source #CDATA
timestamp #CDATA '0'>
CIB: Sample:
======================
<cib version="1" generated="false" timestamp="1071655595">
<nodes>
<node id="node1"
name="production server"
node_type="node"
health="0"
node_status="down"
source="unknown"
timestamp="1071655595"/>
<node id="node2"
name="an experimental 3GogoHz intel"
node_type="ping"
health="0"
node_status="down"
source="unknown"
timestamp="1071655595"/>
<node id="node3"
name="an old sparc in the corner"
node_type="ping"
health="0"
node_status="down"
source="unknown"
timestamp="1071655595"/>
<node id="node4"
name="my personal desktop"
node_type="node"
health="0"
node_status="down"
source="unknown"
timestamp="1071655595"/>
</nodes>
<resources>
<resource id="res1"
resource_type="apache"
description="my web site"
op_timeout="20"
priority="20"
max_instance="1"
timestamp="1071655595">
<node_reference id="node2"
weight="20"
timestamp="1071655595"/>
</resource>
<resource id="res2"
resource_type="drbd"
description="apache data"
op_timeout="60"
priority="20"
max_instance="2"
timestamp="1071655595">
<node_reference id="node1"
weight="10"
timestamp="1071656274"/>
<node_reference id="node2"
weight="100"
timestamp="1071656274"/>
<node_reference id="node3"
weight="5"
timestamp="1071656274"/>
</resource>
<resource id="res3"
resource_type="dhcp"
description="dhcp"
op_timeout="45"
priority="10"
max_instance="1"
timestamp="1071655595">
<node_reference id="node2"
weight="-1"
timestamp="1071655595"/>
<node_reference id="node4"
weight="10"
timestamp="1071655595"/>
</resource>
</resources>
<contraints>
<contraint id="con1"
description="start apache after drbd"
constraint_type="StartAfter"
clear_on="never"
timestamp="1071655595"
res_id_1="res1"
res_id_2="res2"/>
<contraint id="con2"
description="only start apache on systems where the kernel is 2.4.20-gentoo-r9"
constraint_type="SysVar"
clear_on="never"
timestamp="1071655595"
res_id_1="res1"
var_name="KERNEL_RELEASE"
var_value="2.4.20-gentoo-r9"/>
<contraint id="failed-node1-res2-1"
description="system generated: instance 1 of res2 failed on node1"
constraint_type="Block"
clear_on="stonith"
timestamp="1071655595"
res_id_1="res2"
var_name="blockHost"
var_value="node1"/>
</contraints>
<status>
<state id="res2-1"
res_id="res2"
instance="1"
max_instance="2"
node_id="node3"
resource_status="running"
source="none"
timestamp="1071656274"/>
<state id="res3-1"
res_id="res3"
instance="1"
max_instance="1"
node_id="node4"
resource_status="running"
source="none"
timestamp="1071656274"/>
<state id="res2-2"
res_id="res2"
instance="2"
max_instance="2"
node_id="node1"
resource_status="starting"
source="none"
timestamp="1071656274"/>
<state id="res1-1"
res_id="res1"
instance="1"
max_instance="1"
node_id="node1"
resource_status="starting"
source="none"
timestamp="1071656274"/>
</status>
</cib>
#
# Copyright (C) 2004 Andrew Beekhof
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
MAINTAINERCLEANFILES = Makefile.in
INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \
-I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl \
-I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha
hadir = $(sysconfdir)/ha.d
halibdir = $(libdir)/@HB_PKG@
commmoddir = $(halibdir)/modules/comm
havarlibdir = $(localstatedir)/lib/@HB_PKG@
PIDFILE = $(localstatedir)/run/crmd.pid
XML_FLAGS = `xml2-config --cflags`