#!/usr/bin/perl -w $ID = q$Id: afs-balance,v 1.7 2007-10-16 00:01:53 eagle Exp $; # # afs-balance -- Balance AFS servers using AMPL. # # Written by Neil Crellin # and Russ Allbery # Copyright 1998, 1999, 2005 Board of Trustees, Leland Stanford Jr. University # # This program is free software; you may redistribute it and/or modify it # under the same terms as Perl itself. # # Takes a list of server/partition pairs as arguments (with . being a wildcard # representing all partitions on that server), and generates an output file # named vols.*.dat in the current directory (where * is a representation of # the partitions being balanced) that is valid AMPL input for the constraint # problem of the fewest volume transfers to provide a reasonable balance. # Also post-processes the output from AMPL and generates the necessary # commands to implement the balancing solution. # # Must be run on lsdb as a user with access to the password information for # the afsdb database. ############################################################################## # Site configuration ############################################################################## # 1 - allowable fudge in the balance solution (how much each partition can # differ in used space and accesses). $FUDGE = 0.975; ############################################################################## # Modules and declarations ############################################################################## require 5.004; use strict; use vars qw($FUDGE $ID); use Getopt::Long qw(GetOptions); use Stanford::LSDB::AFSDB; ############################################################################## # Utility functions ############################################################################## # Sort by machine name, doing a numeric sort on a trailing digit and a # lexicographic sort on the machine name. sub machinesort ( @ ) { map { $$_[0] } sort { $$a[1] cmp $$b[1] || $$a[2] <=> $$b[2] } map { [ $_, split (/(\d+)$/, $_, 2) ] } @_ } # Used for heredocs to make them more readable. sub unquote ( $ ) { my ($string) = @_; $string =~ s/^:\s+//mg; return $string; } # Build a useful filename from the hash of servers and partitions that we're # going to be looking at. Takes a reference to the server hash. sub filename ( $ ) { my ($servers) = @_; my $name = join ('+', map { my $server = $_; $server =~ s/^afssvr//; $$servers{$_}[0] eq 'vicep.' ? $server : map { my $part = $_; $part =~ s/^vicep//; $part =~ tr/\[\]//d; "$server$part" } @{ $$servers{$_} }; } machinesort keys %$servers); return $name; } ############################################################################## # Server disk usage gathering ############################################################################## # Given the comamnd-line arguments, build a hash of server names that we're # affecting, where the value of each key is an array of partition # specifications (which may include regex matches). Return the hash as a # list. sub server_list ( @ ) { my (@args) = @_; my %servers; while (@args) { my ($server, $part) = splice (@args, 0, 2); $server = 'afssvr' . $server if ($server =~ /^\d+$/); if ($part eq '.') { $part = 'vicep.'; } elsif ($part =~ /^[a-z]-[a-z]$/) { $part = "vicep[$part]"; } else { $part = '/vicep' . $part unless ($part =~ m%^/vicep%); die "$0: invalid partition $part\n" if ($part !~ m%^/vicep[a-z]+$%); $part =~ s%^/%%; } push (@{ $servers{$server} }, $part); } for (keys %servers) { @{ $servers{$_} } = sort @{ $servers{$_} } } return %servers; } # Given a hash of server names, the values being anonymous arrays of # partitions, populate hashes containing the partition capacities and the # current usage of the given server partitions. The keys of the latter two # hashes will be .. Return the total capacity of all # server partitions given. sub partinfo ( $$$ ) { my ($servers, $parts, $capacity) = @_; local *PARTINFO; my ($header, $totalcapacity); for my $server (machinesort keys %$servers) { my @partitions = @{ $$servers{$server} }; open (PARTINFO, "partinfo $server |") or die "$0: can't fork: $!\n"; while () { print, next if /^\s*Partition/ && !$header++; my ($partition) = m%\s/(vicep[a-z]+):% or next; next unless grep { $partition =~ /^$_$/ } @partitions; my $key = "$server.$partition"; my $total = (split)[2]; push (@$parts, $key); push (@$capacity, $total); $totalcapacity += $total; print; } close PARTINFO; } return $totalcapacity; } # Given a volume, return its size and the location of its RW copy as a list, # or the empty list on error. sub volinfo { my $volume = shift; local (*VEX, $_); my ($size, $server, $partition); open (VEX, "vos examine $volume |") or die "$0: can't fork vos: $!\n"; while () { if (/^\Q$volume\E\s+\d+\s+(?:RW|RO)\s+(\d+)/) { $size = $1; ($server, $partition) = split (' ', scalar ); $server =~ s/\.stanford\.edu//i or next; last; } } 1 while ; close VEX; return defined $size ? ($size, "$server $partition") : (); } # Given a list of server.partition names and a corresponding list of # capacities, modify the lists so that the first will instead be a list of # servers and the second will be a list of capacities of those servers. sub compress_parts ( $$ ) { my ($parts, $capacities) = @_; my %new; for (my $i = 0; $i < @$parts; $i++) { my ($server) = split (/\./, $$parts[$i]); $new{$server} += $$capacities[$i]; } my @newparts = sort keys %new; my @newcaps; for (@newparts) { push (@newcaps, $new{$_}); } @$parts = @newparts; @$capacities = @newcaps; } ############################################################################## # Database queries ############################################################################## # Submit a SQL query. sub query_database ( $$$$;$ ) { my ($dbh, $what, $where, $condition, $order) = @_; my $request = "select $what from $where where $condition"; $request .= " order by $order" if $order; return $dbh->selectall_arrayref ($request); } # Construct the constraint on our database queries to return only those # volumes that we're balancing. We first check to see if there are any # read/write volumes on the partitions that we're looking at. If there are, # we balance read/write volumes; otherwise, we'll balance RO volumes. # # Takes the database handle and list of server.partition strings. We return # the SQL where clause that will select only the right partitions. sub query_constraint ( $@ ) { my ($dbh, @parts) = @_; my $request = join (' or ', map { my ($server, $partition) = /^([^.]+)\.(.*)/; if ($partition eq 'vicep.') { "server = '$server'"; } elsif ($partition =~ /^vicep\[([a-z-]+)\]$/) { my ($start, $end) = split ('-', $1); my @parts; for ($start .. $end) { push (@parts, "'/vicep$_'"); } "(server = '$server' and part in (" . join (', ', @parts) . "))"; } else { "(server = '$server' and part = '/$partition')"; } } @parts); $request = "($request)"; my $result = query_database ($dbh, 'count(*)', 'volumes', "$request and type = 'RW'"); my $voltype = ($$result[0][0] > 0) ? 'RW' : 'RO'; $request .= " and type = '$voltype'"; return $request; } ############################################################################## # AMPL file generation ############################################################################## # Given a total, a list of factors, a list of locations, and a fudge value, # output a constraint listing each partition and a weighed ideal formed by # multiplying the total, factor, and fudge. If fudge is not provided, it # defaults to $FUDGE. sub constraint ( $\@\@;$ ) { my ($total, $factors, $parts, $fudge) = @_; $fudge ||= $FUDGE; my $output; for (my $i = 0; $i < @$parts; $i++) { $output .= "$$parts[$i] "; $output .= int ($fudge * $total * $$factors[$i]) . "\n"; } $output .= ";\n\n"; return $output; } # Given a reference to an array of results, iterate through the array and # print them all out given a printf format string to use. sub printf_result ( $$$ ) { my ($fh, $format, $result) = @_; for my $line (@$result) { printf $fh $format, @$line; } } # Print out the data for equal volumes in each location. We don't allow for # any fudge in this at all. Takes the database handle, the query, and the # locations and factors arrays. sub ampl_constraint_count ( $$\@\@ ) { my ($dbh, $request, $factors, $parts) = @_; # Calculate the ideal number of volumes per partition and output that to # the AMPL data file for use as a constraint. print VOLSDAT 'param: PARTS: eqnovols := ' . '# defines set "PARTS" and param "eqnovols"' . "\n"; my $result = query_database ($dbh, 'count(*)', 'volumes', $request); my $count = $$result[0][0]; print VOLSDAT constraint ($count, @$factors, @$parts, 1.0); # Output the current number of volumes per partition. print VOLSDAT "# current volumes per partition\n"; $result = query_database ($dbh, 'server, part, count(*)', 'volumes', $request . " group by server, part"); printf_result (\*VOLSDAT, "# %-12s %-8s: %5d\n", $result); print VOLSDAT "\n\n"; } # Print out the data for roughly even amounts of disk usage in each location. # Takes the database handle, the query, and the locations and factors arrays. sub ampl_constraint_size ( $$\@\@ ) { my ($dbh, $request, $factors, $parts) = @_; # Calculate roughly even amounts of disk usage per partition and output # that to the AMPL data file for use as a constraint. print VOLSDAT "param capacity := # capacity constraint values\n"; my $result = query_database ($dbh, 'sum(used)', 'volumes', $request); print VOLSDAT constraint ($$result[0][0], @$factors, @$parts); # Output the current disk usage per partition. print VOLSDAT "# current disk usage per partition\n"; $result = query_database ($dbh, 'server, part, sum(used)', 'volumes', $request . " group by server, part"); printf_result (\*VOLSDAT, "# %-12s %-8s: %10d\n", $result); print VOLSDAT "\n\n"; } # Print out the data for roughly even amounts of quota in each location. We # don't use this data currently; it over-constrains the problem too much and # makes the solution take too long. Takes the database handle, the query, and # the locations and factors arrays. sub ampl_constraint_quota ( $$\@\@ ) { my ($dbh, $request, $factors, $parts) = @_; # Calculate roughly even amounts of allocated quota per partition for use # as a constraint. This isn't used by default. print VOLSDAT "param quotasum := # total quota constraint values\n"; my $result = query_database ($dbh, 'sum(quota)', 'volumes', $request); print VOLSDAT constraint ($$result[0][0], @$factors, @$parts); # Output the current allocated quota per partition. print VOLSDAT "# current allocated quota per partition\n"; $result = query_database ($dbh, 'server, part, sum(quota)', 'volumes', $request . " group by server, part"); printf_result (\*VOLSDAT, "# %-12s %-8s: %10d\n", $result); print VOLSDAT "\n\n"; } # Print out data for roughly even numbers of accesses in each location. Takes # the database handle, the query, and the locations and factors arrays. sub ampl_constraint_accesses ( $$\@\@ ) { my ($dbh, $request, $factors, $parts) = @_; # Calculate roughly even numbers of accesses per partition for use as a # constraint. This assumes that large partitions can handle more # accesses, which isn't too bad but isn't quite what you want. print VOLSDAT "param eqaccess := # roughly equal accesses per partition\n"; my $result = query_database ($dbh, 'sum(accesses)', 'volumes', $request); print VOLSDAT constraint ($$result[0][0], @$factors, @$parts); # Output the current number of accesses per partition. print VOLSDAT "# current accesses per partition\n"; $result = query_database ($dbh, 'server, part, sum(accesses)', 'volumes', $request . " group by server, part"); printf_result (\*VOLSDAT, "# %-12s %-8s: %10d\n", $result); print VOLSDAT "\n\n"; } # Print out the current sizes, quotas, and accesses of volumes. Takes the # database handle and the query. sub ampl_volume_data ( $$ ) { my ($dbh, $request) = @_; # Output the names of all the affected volumes and their sizes. print VOLSDAT 'param: VOLS: size := # defines "VOLS" and "size"', "\n"; my $result = query_database ($dbh, 'volname, used', 'volumes', $request, 'used desc'); printf_result (\*VOLSDAT, "%-22s %12d\n", $result); print VOLSDAT ";\n\n"; # Output the names of all the affected volumes and their quotas. This # isn't used by default. print VOLSDAT "param quota := # quota of each volume\n"; $result = query_database ($dbh, 'volname, quota', 'volumes', $request, 'used desc'); printf_result (\*VOLSDAT, "%-22s %12d\n", $result); print VOLSDAT ";\n\n"; # Output the names of all affected volumes and the number of accesses they # received the previous day. print VOLSDAT "param accesses := # how many accesses per volume\n"; $result = query_database ($dbh, 'volname, accesses', 'volumes', $request, 'used desc'); printf_result (\*VOLSDAT, "%-22s %12d\n", $result); print VOLSDAT ";\n\n"; } # Print out the current locations of volumes, including server and partition. # Takes the database handle, the SQL filter, and the array of locations. sub ampl_locations ( $$\@ ) { my ($dbh, $request, $parts) = @_; # Build a table of location strings. A location string is a # space-separated set of 0s and 1s for each server/partition pair we're # dealing with, with a 1 in the column corresponding to that location and # 0s in all other columns. In other words, we're generating a zero-filled # matrix with a diagonal of 1s. my $vector = join (' ', 1, (0) x $#$parts); my %location; for (@$parts) { $location{$_} = $vector; } continue { $vector =~ s/1 0/0 1/; } # Output a table showing which partition each volume is located on. print VOLSDAT "param located (tr) : # where is volume now\n", join (' ', @$parts), " :=\n"; my $result = query_database ($dbh, 'volname, server, part', 'volumes', $request, 'used desc'); my ($volume, $server, $part); for my $line (@$result) { my ($volume, $server, $part) = @$line; for ($volume, $server, $part) { s/ //g; } $part =~ s%/%.%; if ($$parts[0] =~ /\./) { printf VOLSDAT "%-22s %s\n", $volume, $location{$server . $part}; } else { printf VOLSDAT "%-22s %s\n", $volume, $location{$server}; } } print VOLSDAT ";\n\n"; } # Generate an AMPL data file, listing accesses, size, and volume count. Takes # the file name, a flag saying whether to balance only by server, the # locations and capacity arrays, and the total capacity. sub ampl_write_data ( $$\@\@$ ) { my ($name, $server, $parts, $capacity, $total) = @_; open (VOLSDAT, "> vols.$name.dat") or die "$0: can't create vols.$name.dat: $!\n"; # Open a database connection and get the SQL constraint that selects just # the volumes we're dealing with. my $dbh = Stanford::LSDB::AFSDB->connect; my $request = query_constraint ($dbh, @$parts); # Modify the locations hash to only go by server if $server is set. if ($server) { compress_parts ($parts, $capacity); } # Determine the fraction of capacity represented by each location. my @factors; for (my $i = 0; $i < @$capacity; $i++) { $factors[$i] = $$capacity[$i] / $total; } # Print out the constraints on the solution. ampl_constraint_count ($dbh, $request, @factors, @$parts); ampl_constraint_size ($dbh, $request, @factors, @$parts); ampl_constraint_quota ($dbh, $request, @factors, @$parts); ampl_constraint_accesses ($dbh, $request, @factors, @$parts); # Print out the current volume data and locations. ampl_volume_data ($dbh, $request); ampl_locations ($dbh, $request, @$parts); # Done. close VOLSDAT; $dbh->disconnect; } # Generate the static portion of the AMPL input script. Takes the name to use # in the file name and the name of the model file to use. sub ampl_write_control ( $$ ) { my ($name, $control) = @_; open (AMPLIN, "> vols.$name.in") or die "$0: can't create vols.$name.in: $!\n"; print AMPLIN unquote (<<"EOA"); : model /afs/ir/service/afs/data/$control.mod; : data vols.$name.dat; : options solver cplexamp; : options show_stats 1; : options cplex_options 'prestats=1 mipdisplay=1 mipsolutions=2'; : display {i in PARTS} sum {j in VOLS} located[i,j]; : display {i in PARTS} sum {j in VOLS} located[i,j]*accesses[j]; : display {i in PARTS} sum {j in VOLS} located[i,j]*size[j]; : display {i in PARTS} sum {j in VOLS} located[i,j]*quota[j]; : solve; : display {i in PARTS} sum {j in VOLS} assigned[i,j]; : display {i in PARTS} sum {j in VOLS} assigned[i,j]*accesses[j]; : display {i in PARTS} sum {j in VOLS} assigned[i,j]*size[j]; : display {i in PARTS} sum {j in VOLS} assigned[i,j]*quota[j]; : display {i in PARTS, j in VOLS} assigned[i,j]*(1-located[i,j]); : quit; EOA close AMPLIN; } ############################################################################## # Solution parsing ############################################################################## # Expecting either a solution file in $ARGV[0] or the solution on standard # input, parse the AMPL solution to the balancing problem and print to # standard output a list of volumes that should be moved, their current # location as a whitespace-separated server and partition pair, and the # destination to which they should be moved as a whitespace-separated server # and partition pair. (This file can be processed with mvto -s -L.) sub ampl_parse_solution () { local $_; while (<>) { last if $_ eq "assigned[i,j]*(1 - located[i,j]) [*,*] (tr)\n"; } # If the next lines begin with #, they're mappings from codes to # server/partition pairs because all the pairs didn't fit across the # screen. The pairs look like: # # # $3 = afssvr1.vicepc # # Build a mapping table with them. my %mapping; while (<>) { last unless /^\#/; my ($variable, $value) = (split)[1,3]; $mapping{"'" . $variable . "'"} = $value; } # The next line will be the column headers that tell us what columns # represent what servers and partitions. The first column will be a ':'; # the rest will either be server.partition or a variable that's in the # mapping hash. my @columns = split; die "$0: can't parse solution at line $.\n" unless $columns[0] eq ':'; shift @columns; if ($columns[$#columns] eq ':=') { pop @columns; } else { die "$0: can't parse solution at line $.\n" unless (<> =~ /^\s*:=\s*$/); } for (@columns) { $_ = $mapping{$_} if defined $mapping{$_}; s%\.% /%; } # Now we have a whole set of volume names followed by columns of 0s and at # most one 1. If there are no 1s, the volume shouldn't be moved. # Otherwise, it should be moved from wherever it is now to the server and # partition indicated by the 1. while (<>) { last if /^;/; my ($volume, $movement) = split (' ', $_, 2); die "$0: can't parse solution at line $.\n" if $movement =~ /[^10\s]/; next if $movement !~ /^((?:0\s+)*)1/; my $prefix = $1; my $index = ($prefix =~ tr/0//); my $destination = $columns[$index]; if ($destination !~ / /) { $destination .= ' .'; } # Now, get the current location of the volume so that we can write # output suitable for processing with mvto -s. This currently doesn't # work for read-only volumes. my ($size, $location) = volinfo $volume; warn "$0: unable to examine volume $volume, skipping\n" if !$location; print "$volume $location $destination\n"; } } ############################################################################## # Main routine ############################################################################## my $fullpath = $0; $0 =~ s%^.*/%%; # Parse command-line options. my ($help, $partition, $results, $server, $version); Getopt::Long::config ('bundling'); GetOptions ('f|fudge=f' => \$FUDGE, 'h|help' => \$help, 'p|partition' => \$partition, 'r|results' => \$results, 's|server' => \$server, 'v|version' => \$version) or exit 1; if ($help) { print "Feeding myself to perldoc, please wait....\n"; exec ('perldoc', '-t', $fullpath); } elsif ($version) { my $version = join (' ', (split (' ', $ID))[1..3]); $version =~ s/,v\b//; $version =~ s/(\S+)$/($1)/; $version =~ tr%/%-%; print $version, "\n"; exit; } die "Usage: $0 [ ...]\n" unless (@ARGV % 2 == 0 || $results); die "$0: only one of -s or -p may be given\n" if ($server && $partition); die "$0: -r may not be combined with -p or -s\n" if ($results && ($server || $partition)); die "Usage: $0 -r []\n" if ($results && @ARGV > 1); # If called with -r, parse the AMPL results and generate the commands required # to implement the balance. Otherwise, read the servers and partitions from # the command line, get the usage, and then write the AMPL input and data # file. if ($results) { ampl_parse_solution; } else { my %servers = server_list (@ARGV); my (@parts, @capacity, @factors); my $totalcapacity = partinfo (\%servers, \@parts, \@capacity); my $name = filename (\%servers); ampl_write_data ($name, $server, @parts, @capacity, $totalcapacity); if ($partition) { ampl_write_control ($name, 'balance-size'); } else { ampl_write_control ($name, 'balance'); } } exit 0; __END__ ############################################################################## # Documentation ############################################################################## =head1 NAME afs-balance - Balance AFS servers using AMPL =head1 SYNOPSIS afs-balance [B<-hpsv>] [B<-f> I] I I [I I ...] afs-balance B<-r> [I] =head1 REQUIREMENTS The Stanford::LSDB::AFSDB Perl module (or some editing of the script to substitute in some other way of connecting to the database), AMPL with the CPLEX solver to solve the linear programming problem, B to obtain partition capacity information, and B to implement the solution. =head1 DESCRIPTION Drawing data about AFS volumes from the AFS Oracle database, B phrases an AFS server balancing problem as an AMPL problem so that it can be solved with linear programming tools. B takes a list of server and partition pairs on the command line and, for each volume on those servers and partitions, gathers the size, quota, and accesses in the past day from the information loaded the previous night into the AFS database. It also determines the total capacity available across all of those server partitions and then defines target goals for balancing, specifying that each partition must contain volumes that add up to at least 97.5% of its fair share of the total accesses and total size. (Quota is not used for balancing.) This 97.5% fudge factor can be adjusted with the B<-f> command-line argument. AFS servers may be specified as a number, in which case C will be implicitly added to the beginning of the number to form the system name. The partition may be a single partition, a partition range expressed as something like C, or C<.> which indicates all partitions on that server. All of this data is written out into a file whose name starts with C and ends with C<.dat> and contains an abbreviated list of the servers and partitions affected in the name. An AMPL control file, referencing one of the AMPL models used to specify the constraints, is then written out with the same file name but ending in C<.in>. AMPL should then be run and given as input the C<.in> file, saving its output to a C<.out> file. Finally, once AMPL generates a C<.out> file, B should be run on the C<.out> file with the B<-r> flag, saving its output in a separate file. This mode will parse the AMPL output and generate a list of volumes that have to be moved, where they should be moved from, and where they should be moved to. That output can then be used as input to C. See the L section for a detailed example. By default, B balances servers so that each partition has a number of volumes proportionate to its share of the total space, satisfies the size balance, and satisfies the accesses balance. This can take a very long time when attempting to balance across many partitions. In this situation, you may want to instead do the balance in two parts using B<-s> and then B<-p> as described below. If B is run with the B<-s> option, rather than balancing at the level of partitions, it will balance at the level of servers. Restrictions on which partitions on each server are involved in the balance will still be honored, but the AMPL solution will only indicate what server the volume should be moved to rather than server and partition, and will only attempt to equalize accesses, volumes, and space usage between the servers. The resulting output file, after processing with B<-r>, will tell B to put the volume on the least full partition on the destination srver. If B is run with the B<-p> option, the balancing will be done at the partition level but accesses will be ignored. Balancing accesses across partitions on a single server isn't particularly useful, and this will allow the balance solution to be found far faster. The combination of these two options can balance much larger cells. First, balance between some set of servers using B<-s> and implement that solution. Then, wait for the AFS database to update its data and balance between partitions using B<-p> on each of the individual servers involved. It will take longer and will mean more total volume moves, but will arrive at the same quality of solution as doing a full balance. =head1 OPTIONS =over 4 =item B<-f> I, B<--fudge>=I The ratio by which to reduce the size and access targets for each location in the balance. The default is 0.975. Note that accesses can vary widely and often some AFS volumes have significantly more accesses than others, so setting this value too high can cause AMPL to fail to find a solution. =item B<-h>, B<--help> Print out this documentation (which is done simply by feeding the script to C). =item B<-p>, B<--partition> Balance without regard to accesses, which is the mode that one should use when balancing across the partitions of a single server. The data file is the same, but a different model will be used that doesn't include a constraint on accesses. =item B<-r>, B<--results> Rather than generate the AMPL input files, process an AMPL output file and produce a list of volumes, source locations, and destination locations reflecting the volume moves that AMPL considered necessary. This output will go to standard output (although generally you want to redirect it to a file) and will contain one line for each volume, formatted as: where the first server/partition pair is where the volume is now, and the second pair is where it should be moved to. This output is suitable as input to C. =item B<-s>, B<--server> Balance by server, ignoring partitions. When these AMPL results are processed by B with the B<-r> flag, the resulting destination information will specify the partition as C<.>. (This means that if not all partitions of some servers are valid destinations for this balance, you will have to modify the output file before running B.) =item B<-v>, B<--version> Print the version of B and exit. =back =head1 EXAMPLES To do a full balance of all partitions on afssvr2, partitions /vicepa through /vicepc on afssvr5, and partition /vicepa on afssvr6, one would run the command: afs-balance 2 . 5 a-c 6 a B will write out two files: vols.2+5a-c+6a.dat vols.2+5a-c+6a.in To calculate the balancing solution, run: ampl < vols.2+5a-c+6a.in > vols.2+5a-c+6a.out Once B finishes, review the C<.out> file to make sure that AMPL found a valid solution. It starts by printing out data about the initial server state and then prints out status information about whether it found a solution. If it did find a valid solution, run: afs-balance -r vols.2+5a-c+6a.out > vols.2+5a-c+6a.list Review that list to make sure it looks reasonable and then apply the balancing solution with: mvto -s -L vols.2+5a-c+6a.list If, instead, you wished to balance all partitions between afssvr1, afssvr2, afssvr3, and partitions /vicepa through /vicepk on afssvr4, this may be too large of a problem for AMPL to solve in a reasonable amount of time. Instead, you can use the two-phase approach. Start by balancing between just the servers: afs-balance -s 1 . 2 . 3 . 4 a-k ampl < vols.1+2+3+4a-k.in > vols.1+2+3+4a-k.out Make sure that AMPL got a solution and then run: afs-balance -r vols.1+2+3+4a-k.out > vols.1+2+3+4a-k.list Normally, you could just apply this list with B, but note that only partitions /vicepa through /vicepk are participating in the balance on afssvr4 (the other partitions might be used for something else, like read-only replicas). B is not currently smart enough to figure that out, so you need to modify the output list to change any occurance of C to C. Once you've done that, run: mvto -s -L vols.1+2+3+4a-k.list to apply the server balance. Now, wait for a day for the nightly refresh of the AFS database to pick up the new volume locations, and then run individual server balances for each of the affected servers: afs-balance -p 1 . ampl < vols.1.in > vols.1.out afs-balance -r vols.1.out > vols.1.list mvto -s -L vols.1.out (checking the AMPL output first before applying the results), repeating this for each of the affected servers. =head1 FILES =over 4 =item F =item F The AMPL models used for balancing, which express the variables and constraints involved in the AMPL program. F is identical to F but doesn't include the constraint on accesses (it's used when B is invoked with the B<-p> option). =back =head1 NOTES Current, B always requests the CPLEX solver in its AMPL input file. This is the solver that we use, and it works well for solving this type of balancing problem. We have not experimented with using any other solver, but it's easy enough to change the script to specify a different one. Sometimes, AMPL will be unable to come up with any solution and will bail out saying that the problem isn't feasible, meaning that there is no possible arrangement of the volumes that satisfies the constraints. Usually this is due to one volume with an extremely high access count in the past day, such that there is no combination of other volumes that can balance that one out and give partitions roughly equal total access counts. This can also be caused by a single particularly large volume. When this happens, there are a few things that you can do: =over 4 =item 1. Wait for the next day and try the balance again. If the access spike is anomalous, this often works. =item 2. Reduce the fudge parameter using the B<-f> option. This tells AMPL to tolerate more deviation in the sizes and accesses of the locations across which it is balancing. AMPL will still equalize the number of volumes, but the solution won't be as nice in equal size and accesses. Note that this parameter affects both size and access count; there isn't a way to affect just one and not the other. =item 3. Manually edit the resulting C<.dat> file and reduce the threshold requirements for either size or accesses, whichever is causing problems (you can usually tell by scanning the volume information and looking for particular volumes with anomalously large values). =item 4. Lie to AMPL by editing the resulting C<.dat> file and reducing the access count or size of the offending volume. AMPL will then construct a solution based on the value that you told it. This is safe to do for accesses; be careful with doing this for size and don't overfill a partition. If you do this, you'll also need to reduce all the threshold values for that statistic accordingly, since otherwise since you've reduced the total access count (or total size) there won't be enough to spread around to meet the thresholds required for each partition. =back Unfortunately, none of the ways of working around AMPL's failure to find a solution are automated. They all require a bit of manual fiddling and manual investigation. =head1 BUGS B currently cannot handle balancing read-only replicas properly. It can prepare the AMPL problem, although the way it determines what type of volume to balance is by checking to see if there are any read/write volumes on the affected partitions and balancing only read/write volumes if there are, which may not be what is desired. It cannot, however, produce a list for B from the result, and the list that it does produce will cause B to do the wrong thing. Using data from a SQL database of AFS volume information is required. There is no way of doing a balance from vos listvol output, even though enough information would be available to do so. (You can always just load the vos listvol output into a database, but then you have to modify this script since the table names are hard-coded.) As mentioned above, B<-r> doesn't correctly handle the case where only some of a server's partitions are participating in a balance run with B<-s>. In particular, it will output instructions that will cause B to pick the least loaded partition across the entire server, not limited to just the participating partitions. The output has to be modified before using B. The B requirement isn't strictly necessary and is only nice for the pretty output when preparing the balance. B should cope if B isn't available. =head1 AUTHORS Written by Neil Crellin and Russ Allbery , based on an idea and an AMPL model by Neil Crellin. =head1 COPYRIGHT AND LICENSE Copyright 1998, 1999, 2005 Board of Trustees, Leland Stanford Jr. University. This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself. =head1 SEE ALSO mvto(1), partinfo(1) The AMPL and CPLEX implementation that we've always used for balancing is the one from L. This is commercial software that has to be purchased. It may be possible to use this program with a free version of AMPL and a free solver, but we have not investigated doing so. B is available from L. B is available from L. The current version of this program is available its web page at L. =cut