Cfengine: Glasgow Worker Nodes
From GridPP Wiki
This is a cfengine excerpt from Glasgow showing how we manage our worker nodes with cfengine. Note that it's probably a good idea to split your cfengine file into sections once it gets big, but this is presented as a single cfagent.conf file for simplicity.
Note that worker nodes are in the following classes: worker, grid, torque, autofs, scientific_sl_3 and, of course, any
########################## # # cfagent.conf for UKI-SCOTGRID-GLASGOW # # $Id: cfagent.conf 201 2006-11-22 12:43:00Z root $ # ########################## groups: worker = ( HostRange(node,1-140) ) gridsvr = ( HostRange(svr,016-023) ) disksvr = ( HostRange(disk,032-041) ) # Nicer names for grid servers ce = ( svr016 ) dpm = ( svr018 ) mon = ( svr019 ) ui = ( svr020 ) sitebdii = ( svr021 ) ## Compound groups # Batch system nodes torque = ( ce worker ) # Nodes which look at the autofs system autofs = ( worker ce ui ) # All grid nodes grid = ( worker gridsvr disksvr ) control: any:: actionsequence = ( directories files links editfiles packages copy shellcommands tidy ) domain = ( beowulf.cluster ) skel = ( /var/cfengine/inputs/skel ) scripts = ( /var/cfengine/inputs/scripts ) syslog = ( on ) ChecksumUpdates = ( on ) DefaultPkgMgr = ( rpm ) RPMcommand = ( /bin/rpm ) RPMInstallCommand = ( "/usr/bin/yum -y install %s" ) torque:: torquesvr = ( 10.141.255.16 ) torquequeues = ( dteam:atlas:alice:cms:lhcb:biom:pheno:zeus:sixt:ilc:babar:dzero:ngs:ops:glpnp:glpppt:glee:glbio ) # It would be nicer if this could be defined more dynamically... scientific_sl_3:: java = ( j2sdk1.4.2_12 ) !gridsvr:: # cfengine will run once an hour, so splay the cluster across 50 minutes # to ensure the load on the master server is not too high splaytime = ( 50 ) gridsvr:: splaytime = ( 5 ) directories: grid:: # We need to create the locations for files to be copied into - copy runs before shellcommands /opt/glite/yaim mode=0700 owner=root group=root /opt/glite/yaim/etc mode=0755 owner=root group=root /opt/glite/yaim/functions/local mode=0755 owner=root group=root /etc/grid-security mode=0755 owner=root group=root scientific_sl_3:: /usr/java/$(java) mode=0755 owner=root group=root torque:: /var/spool/pbs/mom_priv mode=0755 owner=root group=root /gridstorage mode=0755 owner=root group=root /home mode=0755 owner=root group=root links: grid.scientific_sl_3:: # In YAIM we give java location as /usr/java/current, and link here # (It would be much better if grid stuff just used /etc/java.conf or JAVA_HOME, *sigh*) /usr/java/current -> /usr/java/j2sdk1.4.2_12 torque:: /gridstorage/exptsw -> /grid/exp_soft packages: any:: ganglia-gmond action=install elsedefine=newgmon grid:: lcg-CA action=install version=1.10 # N.B. note that runyaim happens when the yaim package is first installed glite-yaim action=install elsedefine=runyaim worker:: # Worker node meta package glite-WN action=install torque|ui:: # Packages requested by VOs gcc action=install gcc-ssa action=install gcc-g77 action=install gcc-g77-ssa action=install zsh action=install zlib-devel action=install compat-libstdc++ action=install tidy: any:: # Make sure this is > max wallclock for the batch system! /tmp pattern=* age=12 recurse=inf copy: # Master server is exempt from default files any.!svr031:: # Root's environment $(skel)/common/root/.bash_profile mode=0644 dest=/root/.bash_profile type=sum $(skel)/common/root/.bashrc mode=0644 dest=/root/.bashrc type=sum $(skel)/common/root/.ssh/authorized_keys mode=0644 dest=/root/.ssh/authorized_keys type=sum # Security for servers and ssh $(skel)/common/etc/ssh/ssh_known_hosts mode=644 dest=/etc/ssh/ssh_known_hosts type=sum $(skel)/common/etc/ssh/ssh_config mode=644 dest=/etc/ssh/ssh_config define=newssh type=sum $(skel)/common/etc/ssh/sshd_config mode=600 dest=/etc/ssh/sshd_config define=newssh type=sum # Time, time, time! $(skel)/common/etc/ntp.conf mode=644 dest=/etc/ntp.conf define=newntp type=sum $(skel)/common/etc/ntp/step-tickers mode=644 dest=/etc/ntp/step-tickers define=newntp type=sum # Environment for interactive shells (and jobs) $(skel)/common/etc/profile.d/proxy.csh mode=644 dest=/etc/profile.d/proxy.csh type=sum $(skel)/common/etc/profile.d/proxy.sh mode=644 dest=/etc/profile.d/proxy.sh type=sum $(skel)/common/etc/profile.d/tmpdir.csh mode=644 dest=/etc/profile.d/tmpdir.csh type=sum $(skel)/common/etc/profile.d/tmpdir.sh mode=644 dest=/etc/profile.d/tmpdir.sh type=sum # Post boot signaling script # This is an important part of Glasgow's auto install - it signals to the master server when the first boot # after kickstart has happened. $(skel)/common/etc/rc.d/rc.local mode=644 dest=/etc/rc.d/rc.local type=sum grid:: # GridPP VOMS + YAIM setup for workers $(skel)/grid/etc/grid-security/vomsdir/voms.gridpp.ac.uk mode=0644 dest=/etc/grid-security/vomsdir/voms.gridpp.ac.uk type=sum $(skel)/yaim/site-info.def mode=600 dest=/opt/glite/yaim/etc/site-info.def type=sum $(skel)/yaim/groups.conf mode=644 dest=/opt/glite/yaim/etc/groups.conf type=sum $(skel)/yaim/users.conf mode=644 dest=/opt/glite/yaim/etc/users.conf type=sum # We don't let YAIM do users - so override to a blank function $(skel)/yaim/local/config_users mode=644 dest=/opt/glite/yaim/functions/local/config_users type=sum torque:: $(skel)/torque/shosts.equiv mode=644 dest=/etc/ssh/shosts.equiv type=sum torque|ui|dpm|disksvr:: # On torque hosts (and the ui) distribute the shadow and password files to avoid problems with account locking, etc. # DPM and disk servers need this for gridftp $(skel)/torque/passwd mode=644 dest=/etc/passwd define=localpoolaccounts type=sum $(skel)/torque/shadow mode=400 dest=/etc/shadow type=sum $(skel)/torque/group mode=644 dest=/etc/group type=sum autofs:: $(skel)/autofs/auto.cluster mode=0644 dest=/etc/auto.cluster define=newautofs type=sum $(skel)/autofs/auto.grid mode=0644 dest=/etc/auto.grid define=newautofs type=sum $(skel)/autofs/auto.master mode=0644 dest=/etc/auto.master define=newautofs type=sum gridsvr:: $(skel)/gridsvr/etc/gmond.conf mode=0644 dest=/etc/gmond.conf define=newgmon type=sum worker:: $(skel)/worker/etc/gmond.conf mode=0644 dest=/etc/gmond.conf define=newgmon type=sum # Worker nodes need to route directly to grid and disk servers even when their public IPs are given $(skel)/worker/etc/sysconfig/network-scripts/route-eth1 mode=0644 dest=/etc/sysconfig/network-scripts/route-eth1 define=needroute type=sum shellcommands: newgmon:: "/sbin/service gmond restart" umask=022 newssh:: "/sbin/service sshd restart" umask=022 newntp:: "/sbin/service ntpd restart" umask=022 newautofs:: "/sbin/service autofs restart" umask=022 newtorque:: "/sbin/service pbs_server restart" umask=022 newmaui:: "/sbin/service maui restart" umask=022 newhttp:: "/sbin/service httpd restart" umask=022 localpoolaccounts.!ui:: "/var/cfengine/inputs/scripts/local_pool_accounts /etc/passwd" umask=022 worker.needroute:: "/sbin/ip route add 130.209.239.16/28 dev eth1" umask=022 "/sbin/ip route add 130.209.239.32/28 dev eth1" umask=022 worker.runyaim:: # Only define startmom if this looks ok, otherwise withdraw from the batch system "/opt/glite/yaim/scripts/configure_node /opt/glite/yaim/etc/site-info.def WN" umask=022 define=startmom elsedefine=stopmom worker.startmom:: "/sbin/chkconfig pbs_mom on" umask=022 "/sbin/service pbs_mom restart" umask=022 worker.stopmom:: "/sbin/chkconfig pbs_mom off" umask=022 "/sbin/service pbs_mom stop" umask=022