diff options
Diffstat (limited to 'feature-server-pool/src/main/java/org/onap')
15 files changed, 11361 insertions, 0 deletions
diff --git a/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/Bucket.java b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/Bucket.java new file mode 100644 index 00000000..2236506e --- /dev/null +++ b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/Bucket.java @@ -0,0 +1,2495 @@ +/* + * ============LICENSE_START======================================================= + * feature-server-pool + * ================================================================================ + * Copyright (C) 2020 AT&T Intellectual Property. All rights reserved. + * ================================================================================ + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============LICENSE_END========================================================= + */ + +package org.onap.policy.drools.serverpool; + +import static org.onap.policy.drools.serverpool.ServerPoolProperties.BUCKET_CONFIRMED_TIMEOUT; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.BUCKET_TIME_TO_LIVE; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.BUCKET_UNCONFIRMED_GRACE_PERIOD; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.BUCKET_UNCONFIRMED_TIMEOUT; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_BUCKET_CONFIRMED_TIMEOUT; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_BUCKET_TIME_TO_LIVE; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_BUCKET_UNCONFIRMED_GRACE_PERIOD; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_BUCKET_UNCONFIRMED_TIMEOUT; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.getProperty; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.io.Serializable; +import java.net.InetSocketAddress; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Base64; +import java.util.Comparator; +import java.util.Date; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Queue; +import java.util.Random; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.FutureTask; +import java.util.concurrent.LinkedTransferQueue; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; + +import javax.ws.rs.client.Entity; +import javax.ws.rs.client.WebTarget; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; + +import lombok.Getter; +import lombok.Setter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The server pool uses an algorithmic way to map things like transactions + * (identified by a 'requestID') and locks (identified by a string key) + * into a server handling that transaction or lock. It does this by mapping + * the string name into one of a set of predefined hash buckets, with each + * bucket being assigned to one of the active servers. + * In other words: + * string key -> hash bucket (fixed mapping, known to all servers) + * hash bucket -> server (assignments may change when servers go up or down, + * but remains fairly static when the system is stable) + * With this approach, there is no global dynamic table that needs to be + * updated as transactions, or other objects come and go. + * Each instance of class 'Bucket' corresponds to one of the hash buckets, + * there are static methods that provide the overall abstraction, as well + * as some supporting classes. + */ + +@Getter +@Setter +public class Bucket { + private static Logger logger = LoggerFactory.getLogger(Bucket.class); + + /* + * Listener class to handle state changes that may lead to + * reassignments of buckets + */ + private static EventHandler eventHandler = new EventHandler(); + + // Used to hash keywords into buckets + private static MessageDigest messageDigest; + + static { + // register Listener class + Events.register(eventHandler); + + // create MD5 MessageDigest -- used to hash keywords + try { + messageDigest = MessageDigest.getInstance("MD5"); + } catch (NoSuchAlgorithmException e) { + throw new ExceptionInInitializerError(e); + } + } + + /* + * Values extracted from properties + */ + + private static String timeToLive; + private static long confirmedTimeout; + private static long unconfirmedTimeout; + private static long unconfirmedGracePeriod; + + /* + * Tags for encoding of bucket data + */ + private static final int END_OF_PARAMETERS_TAG = 0; + private static final int OWNER_UPDATE = 1; + private static final int OWNER_NULL = 2; + private static final int PRIMARY_BACKUP_UPDATE = 3; + private static final int PRIMARY_BACKUP_NULL = 4; + private static final int SECONDARY_BACKUP_UPDATE = 5; + private static final int SECONDARY_BACKUP_NULL = 6; + + // This is the table itself -- the current size is fixed at 1024 buckets + public static final int BUCKETCOUNT = 1024; + private static Bucket[] indexToBucket = new Bucket[BUCKETCOUNT]; + + static { + // create hash bucket entries, but there are no assignments yet + for (int i = 0 ; i < indexToBucket.length ; i += 1) { + Bucket bucket = new Bucket(i); + indexToBucket[i] = bucket; + } + } + + // this is a list of all objects registered for the 'Backup' interface + private static List<Backup> backupList = new LinkedList<>(); + + // 'rebalance' is a non-null value when rebalancing is in progress + private static Object rebalanceLock = new Object(); + private static Rebalance rebalance = null; + + // bucket number + private volatile int index; + + // owner of the bucket -- this is the host where messages should be directed + private volatile Server owner = null; + + // this host will take over as the owner if the current owner goes down, + // and may also contain backup data to support persistence + private volatile Server primaryBackup = null; + + // this is a secondary backup host, which can be used if both owner and + // primary backup go out in quick succession + private volatile Server secondaryBackup = null; + + // when we are in a transient state, certain events are forwarded to + // this object + private volatile State state = null; + + // storage for additional data + private Map<Class<?>, Object> adjuncts = new HashMap<Class<?>, Object>(); + + // BACKUP data (only buckets for where we are the owner, or a backup) + + // TBD: need fields for outgoing queues for application message transfers + + /** + * This method triggers registration of 'eventHandler', and also extracts + * property values. + */ + static void startup() { + int intTimeToLive = + getProperty(BUCKET_TIME_TO_LIVE, DEFAULT_BUCKET_TIME_TO_LIVE); + timeToLive = String.valueOf(intTimeToLive); + confirmedTimeout = + getProperty(BUCKET_CONFIRMED_TIMEOUT, DEFAULT_BUCKET_CONFIRMED_TIMEOUT); + unconfirmedTimeout = + getProperty(BUCKET_UNCONFIRMED_TIMEOUT, + DEFAULT_BUCKET_UNCONFIRMED_TIMEOUT); + unconfirmedGracePeriod = + getProperty(BUCKET_UNCONFIRMED_GRACE_PERIOD, + DEFAULT_BUCKET_UNCONFIRMED_GRACE_PERIOD); + } + + /** + * Constructor -- called when building the 'indexToBucket' table. + * + * @param index the bucket number + */ + private Bucket(int index) { + this.index = index; + } + + /** + * This method converts a String keyword into the corresponding bucket + * number. + * + * @param value the keyword to be converted + * @return the bucket number + */ + public static int bucketNumber(String value) { + /* + * It would be possible to create a new 'MessageDigest' instance each + * It would be possible to create a new 'MessageDigest' instance each + * time this method is called, and avoid the need for synchronization. + * However, past experience has taught me that this might involve a + * considerable amount of computation, due to internal table + * initialization, so it shouldn't be done this way for performance + * reasons. + * If we start running into blocking issues because there are too many + * simultaneous calls to this method, we can initialize an array of these + * objects, and iterate over them using an AtomicInteger index. + */ + synchronized (messageDigest) { + /* + * Note that we only need the first two bytes of this, even though + * 16 bytes are produced. There may be other operations that can be + * used to more efficiently map keyword -> hash bucket. The only + * issue is the same algorithm must be used on all servers, and it + * should produce a fairly even distribution across all of the buckets. + */ + byte[] digest = messageDigest.digest(value.getBytes()); + return ((Byte.toUnsignedInt(digest[0]) << 8) + | Byte.toUnsignedInt(digest[1])) & 0x3ff; + } + } + + /** + * Fetch the server associated with a particular bucket number. + * + * @param bucketNumber a bucket number in the range 0-1023 + * @return the Server that currently handles the bucket, + * or 'null' if none is currently assigned + */ + public static Server bucketToServer(int bucketNumber) { + Bucket bucket = indexToBucket[bucketNumber]; + return bucket.getOwner(); + } + + /** + * Fetch the bucket object associated with a particular bucket number. + * + * @param bucketNumber a bucket number in the range 0-1023 + * @return the Bucket associated with this bucket number + */ + public static Bucket getBucket(int bucketNumber) { + return indexToBucket[bucketNumber]; + } + + /** + * Fetch the bucket object associated with a particular keyword. + * + * @param value the keyword to be converted + * @return the Bucket associated with this keyword + */ + public static Bucket getBucket(String value) { + return indexToBucket[bucketNumber(value)]; + } + + /** + * Determine if the associated key is assigned to the current server. + * + * @param key the keyword to be hashed + * @return 'true' if the associated bucket is assigned to this server, + * 'false' if not + */ + public static boolean isKeyOnThisServer(String key) { + int bucketNumber = bucketNumber(key); + Bucket bucket = indexToBucket[bucketNumber]; + return bucket.getOwner() == Server.getThisServer(); + } + + /** + * This method is called to start a 'rebalance' operation in a background + * thread, but it only does this on the lead server. Being balanced means + * the following: + * 1) Each server owns approximately the same number of buckets + * 2) If any server were to fail, and the designated primaries take over + * for all of that server's buckets, all remaining servers would still + * own approximately the same number of buckets. + * 3) If any two servers were to fail, and the designated primaries were + * to take over for the failed server's buckets (secondaries would take + * for buckets where the owner and primary are OOS), all remaining + * servers would still own approximately the same number of buckets. + * 4) Each server should have approximately the same number of + * (primary-backup + secondary-backup) buckets that it is responsible for. + * 5) The primary backup for each bucket must be on the same site as the + * owner, and the secondary backup must be on a different site. + */ + private static void rebalance() { + if (Leader.getLeader() == Server.getThisServer()) { + Rebalance rb = new Rebalance(); + synchronized (rebalanceLock) { + // the most recent 'Rebalance' instance is the only valid one + rebalance = rb; + } + + new Thread("BUCKET REBALANCER") { + @Override + public void run() { + /* + * copy bucket and host data, + * generating a temporary internal table. + */ + rb.copyData(); + + /* + * allocate owners for all buckets without an owner, + * and rebalance bucket owners, if necessary -- + * this takes card of item #1, above. + */ + rb.allocateBuckets(); + + /* + * make sure that primary backups always have the same site + * as the owner, and secondary backups always have a different + * site -- this takes care of #5, above. + */ + rb.checkSiteValues(); + + /* + * adjust primary backup lists to take care of item #2, above + * (taking #5 into account). + */ + rb.rebalancePrimaryBackups(); + + /* + * allocate secondary backups, and take care of items + * #3 and #4, above (taking #5 into account). + */ + rb.rebalanceSecondaryBackups(); + + try { + synchronized (rebalanceLock) { + /* + * if another 'Rebalance' instance has started in the + * mean time, don't do the update. + */ + if (rebalance == rb) { + /* + * build a message containing all of the updated bucket + * information, process it internally in this host + * (lead server), and send it out to others in the + * "notify list". + */ + rb.generateBucketMessage(); + rebalance = null; + } + } + } catch (IOException e) { + logger.error("Exception in Rebalance.generateBucketMessage", + e); + } + } + }.start(); + } + } + + /** + * Handle an incoming /bucket/update REST message. + * + * @param data base64-encoded data, containing all bucket updates + */ + static void updateBucket(byte[] data) { + final byte[] packet = Base64.getDecoder().decode(data); + Runnable task = () -> { + try { + /* + * process the packet, handling any updates + */ + if (updateBucketInternal(packet)) { + /* + * updates have occurred -- forward this packet to + * all servers in the "notify list" + */ + logger.info("One or more bucket updates occurred"); + Entity<String> entity = + Entity.entity(new String(data, StandardCharsets.UTF_8), + MediaType.APPLICATION_OCTET_STREAM_TYPE); + for (Server server : Server.getNotifyList()) { + server.post("bucket/update", entity); + } + } + } catch (Exception e) { + logger.error("Exception in Bucket.updateBucket", e); + } + }; + MainLoop.queueWork(task); + } + + /** + * This method supports the 'updateBucket' method, and runs entirely within + * the 'MainLoop' thread. + */ + private static boolean updateBucketInternal(byte[] packet) throws IOException { + boolean changes = false; + + ByteArrayInputStream bis = new ByteArrayInputStream(packet); + DataInputStream dis = new DataInputStream(bis); + + // the packet contains a sequence of bucket updates + while (dis.available() != 0) { + // first parameter = bucket number + int index = dis.readUnsignedShort(); + + // locate the corresponding 'Bucket' object + Bucket bucket = indexToBucket[index]; + + // indicates whether changes occurred to the bucket + boolean bucketChanges = false; + + /* + * the remainder of the information for this bucket consists of + * a sequence of '<tag> [ <associated-data> ]' followed by the tag + * value 'END_OF_PARAMETERS_TAG'. + */ + int tag; + while ((tag = dis.readUnsignedByte()) != END_OF_PARAMETERS_TAG) { + switch (tag) { + case OWNER_UPDATE: { + // <OWNER_UPDATE> <owner-uuid> -- owner UUID specified + bucketChanges = updateBucketInternalOwnerUpdate(bucket, dis, index); + break; + } + + case OWNER_NULL: { + // <OWNER_NULL> -- owner UUID should be set to 'null' + if (bucket.getOwner() != null) { + logger.info("Bucket {} owner: {}->null", + index, bucket.getOwner()); + bucketChanges = true; + synchronized (bucket) { + bucket.setOwner(null); + bucket.setState(null); + } + } + break; + } + + case PRIMARY_BACKUP_UPDATE: { + // <PRIMARY_BACKUP_UPDATE> <primary-backup-uuid> -- + // primary backup UUID specified + Server newPrimaryBackup = + Server.getServer(Util.readUuid(dis)); + if (bucket.primaryBackup != newPrimaryBackup) { + logger.info("Bucket {} primary backup: {}->{}", index, + bucket.primaryBackup, newPrimaryBackup); + bucketChanges = true; + bucket.primaryBackup = newPrimaryBackup; + } + break; + } + + case PRIMARY_BACKUP_NULL: { + // <PRIMARY_BACKUP_NULL> -- + // primary backup should be set to 'null' + if (bucket.primaryBackup != null) { + logger.info("Bucket {} primary backup: {}->null", + index, bucket.primaryBackup); + bucketChanges = true; + bucket.primaryBackup = null; + } + break; + } + + case SECONDARY_BACKUP_UPDATE: { + // <SECONDARY_BACKUP_UPDATE> <secondary-backup-uuid> -- + // secondary backup UUID specified + Server newSecondaryBackup = + Server.getServer(Util.readUuid(dis)); + if (bucket.secondaryBackup != newSecondaryBackup) { + logger.info("Bucket {} secondary backup: {}->{}", index, + bucket.secondaryBackup, newSecondaryBackup); + bucketChanges = true; + bucket.secondaryBackup = newSecondaryBackup; + } + break; + } + + case SECONDARY_BACKUP_NULL: { + // <SECONDARY_BACKUP_NULL> -- + // secondary backup should be set to 'null' + if (bucket.secondaryBackup != null) { + logger.info("Bucket {} secondary backup: {}->null", + index, bucket.secondaryBackup); + bucketChanges = true; + bucket.secondaryBackup = null; + } + break; + } + + default: + logger.error("Illegal tag: {}", tag); + break; + } + } + if (bucketChanges) { + // give audit a chance to run + changes = true; + bucket.stateChanged(); + } + } + return changes; + } + + /** + * Update bucket owner information. + * + * @param bucket the bucket in process + * @param dis data input stream contains the update + * @param index the bucket number + * @return a value indicate bucket changes + */ + private static boolean updateBucketInternalOwnerUpdate(Bucket bucket, DataInputStream dis, + int index) throws IOException { + boolean bucketChanges = false; + Server newOwner = Server.getServer(Util.readUuid(dis)); + if (bucket.getOwner() != newOwner) { + logger.info("Bucket {} owner: {}->{}", + index, bucket.getOwner(), newOwner); + bucketChanges = true; + + Server thisServer = Server.getThisServer(); + Server oldOwner = bucket.getOwner(); + bucket.setOwner(newOwner); + if (thisServer == oldOwner) { + // the current server is the old owner + if (bucket.getState() == null) { + bucket.state = bucket.new OldOwner(newOwner); + } + } else if (thisServer == newOwner) { + // the current server the new owner + if (bucket.getState() == null) { + bucket.state = bucket.new NewOwner(true, oldOwner); + } else { + // new owner has been confirmed + // orig bucket.state.newOwner(); + bucket.state.newOwner(); + } + } + } + return bucketChanges; + } + + /** + * Forward a message to the specified bucket number. If the bucket is + * in a transient state (the value of 'state' is not 'null'), the handling + * is determined by that state. + * + * @param bucketNumber the bucket number determined by extracting the + * keyword from 'message' + * @param message the message to be forwarded/processed + * @return a value of 'true' indicates the message has been "handled" + * (forwarded or queued), and 'false' indicates it has not, and needs + * to be handled locally. + */ + public static boolean forward(int bucketNumber, Message message) { + Bucket bucket = indexToBucket[bucketNumber]; + Server server; + + synchronized (bucket) { + if (bucket.state != null) { + // we are in a transient state -- the handling is state-specific + return bucket.state.forward(message); + } + server = bucket.getOwner(); + } + + if (server == null || server == Server.getThisServer()) { + // this needs to be processed locally + return false; + } else { + // send message to remote server + message.sendToServer(server, bucketNumber); + return true; + } + } + + /** + * This is a convenience method, which forwards a message through the + * bucket associated with the specified keyword. + * + * @param keyword the keyword extracted from 'message' + * keyword from 'message' + * @param message the message to be forwarded/processed + * @return a value of 'true' indicates the message has been "handled" + * (forwarded or queued), and 'false' indicates it has not, and needs + * to be handled locally. + */ + public static boolean forward(String keyword, Message message) { + return forward(bucketNumber(keyword), message); + } + + /** + * Forward a message to the specified bucket number. If the bucket is + * in a transient state (the value of 'state' is not 'null'), the handling + * is determined by that state. This is a variant of the 'forward' method, + * which handles local processing, instead of just returning 'false'. + * + * @param bucketNumber the bucket number determined by extracting the + * keyword from 'message' + * @param message the message to be forwarded/processed + */ + public static void forwardAndProcess(int bucketNumber, Message message) { + if (!forward(bucketNumber, message)) { + message.process(); + } + } + + /** + * Forward a message to the specified bucket number. If the bucket is + * in a transient state (the value of 'state' is not 'null'), the handling + * is determined by that state. This is a variant of the 'forward' method, + * which handles local processing, instead of just returning 'false'. + * + * @param keyword the keyword extracted from 'message' + * keyword from 'message' + * @param message the message to be forwarded/processed + */ + public static void forwardAndProcess(String keyword, Message message) { + forwardAndProcess(bucketNumber(keyword), message); + } + + /** + * Handle an incoming /cmd/dumpBuckets REST message. + * + * @param out the 'PrintStream' to use for displaying information + */ + public static void dumpBuckets(final PrintStream out) { + /* + * we aren't really doing a 'Rebalance' here, but the 'copyData' method + * is useful for extracting the data, and determining the buckets + * associated with each server. + */ + Rebalance rb = new Rebalance(); + rb.copyData(); + + /* + * this method is not accessing anything in the 'Server' or 'Bucket' + * table, so it doesn't need to run within the 'MainLoop' thread. + */ + rb.dumpBucketsInternal(out); + } + + /** + * Handle an incoming /cmd/bucketMessage REST message -- this is only + * used for testing the routing of messages between servers. + * + * @param out the 'PrintStream' to use for displaying information + * @param keyword the keyword that is hashed to select the bucket number + * @param message the message to send to the remote end + * @throws IOException when error occurred + */ + public static void bucketMessage( + final PrintStream out, final String keyword, String message) throws IOException { + + if (keyword == null) { + out.println("'keyword' is mandatory"); + return; + } + if (message == null) { + message = "Message generated at " + new Date(); + } + final int bucketNumber = bucketNumber(keyword); + Server server = bucketToServer(bucketNumber); + + if (server == null) { + /* + * selected bucket has no server assigned -- this should only be a + * transient situation, until 'rebalance' is run. + */ + out.println("Bucket is " + bucketNumber + ", which has no owner"); + } else if (server == Server.getThisServer()) { + /* + * the selected bucket is associated with this particular server -- + * no forwarding is needed. + */ + out.println("Bucket is " + bucketNumber + + ", which is owned by this server: " + server.getUuid()); + } else { + /* + * the selected bucket is assigned to a different server -- forward + * the message. + */ + out.println("Bucket is " + bucketNumber + ": sending from\n" + + " " + Server.getThisServer().getUuid() + " to \n" + + " " + server.getUuid()); + + // do a POST call of /bucket/bucketResponse to the remoote server + Entity<String> entity = + Entity.entity(new String(message.getBytes(), StandardCharsets.UTF_8), + MediaType.TEXT_PLAIN); + + /* + * the POST itself runs in a server-specific thread, and + * 'responseQueue' is used to pass back the response. + */ + final LinkedTransferQueue<Response> responseQueue = + new LinkedTransferQueue<>(); + + server.post("bucket/bucketResponse", entity, new Server.PostResponse() { + /** + * {@inheritDoc} + */ + @Override + public WebTarget webTarget(WebTarget webTarget) { + // we need to include the 'bucket' and 'keyword' parameters + // in the POST that we are sending out + return webTarget + .queryParam("bucket", bucketNumber) + .queryParam("keyword", keyword); + } + + /** + * {@inheritDoc} + */ + @Override + public void response(Response response) { + // this is the POST response -- + // pass it back to the calling thread + responseQueue.put(response); + } + }); + + try { + // this is the calling thread -- wait for the POST response + Response response = responseQueue.poll(60, TimeUnit.SECONDS); + if (response == null) { + out.println("Timed out waiting for a response"); + } else { + out.println("Received response code " + response.getStatus()); + out.println("Entity = " + response.readEntity(String.class)); + } + } catch (InterruptedException e) { + out.println(e); + throw new IOException(e); + } + } + } + + /** + * Handle an incoming /bucket/bucketResponse REST message -- this runs on + * the destination host, and is the continuation of an operation triggered + * by the /cmd/bucketMessage REST message running on the originating host. + * + * @param out the 'PrintStream' to use for passing back information + * in a human-readable form + * @param bucket the bucket number, which should be owned by this host + * if we are in sync with the sending host, and didn't get caught + * in a transient state + * @param keyword the keyword selected on the originating end, which should + * hash to 'bucket' + * @param message the message selected on the originating end + */ + public static void bucketResponse( + final PrintStream out, int bucket, String keyword, byte[] message) { + + Server thisServer = Server.getThisServer(); + Server server = bucketToServer(bucket); + + if (server != thisServer) { + /* + * this isn't expected, and either indicates we are out-of-sync with + * pthe originating server, or this operation was triggered while in + * a transient state. + */ + out.println("ERROR: " + thisServer.toString() + ": bucket " + bucket + + "is owned by\n " + server); + } else { + /* + * As expected, we are the owner of this bucket. Print out a message, + * which will be returned to the originating host, and displayed. + */ + out.println(thisServer.toString() + ":\n" + + " bucket = " + bucket + + "\n keyword = " + keyword + + "\n message = " + new String(message)); + } + } + + /** + * Handle an incoming /cmd/moveBucket REST message -- this is only + * used for testing bucket migration. It only works on the lead server. + * + * @param out the 'PrintStream' to use for displaying information + * @param bucketNumber the bucket number to be moved + * @param newHostUuid the UUID of the destination host (if 'null', a + * destination host will be chosen at random) + */ + public static void moveBucket(PrintStream out, int bucketNumber, String newHostUuid) { + Server leader = Leader.getLeader(); + if (leader != Server.getThisServer()) { + out.println("This is not the lead server"); + return; + } + + if (bucketNumber < 0 || bucketNumber >= indexToBucket.length) { + out.println("Bucket number out of range"); + return; + } + + Rebalance rb = new Rebalance(); + rb.copyData(); + + TestBucket bucket = rb.buckets[bucketNumber]; + TestServer oldHost = bucket.owner; + + if (oldHost == rb.nullServer) { + out.println("Bucket " + bucketNumber + " is currently unassigned"); + return; + } + + TestServer newHost = null; + + if (newHostUuid != null) { + // the UUID of a destination host has been specified + newHost = rb.testServers.get(UUID.fromString(newHostUuid)); + if (newHost == null) { + out.println("Can't locate UUID " + newHostUuid); + return; + } + } else { + /* + * Choose a destination host at random, other than the current owner. + * Step a random count in the range of 1 to (n-1) relative to the + * current host. + */ + UUID key = oldHost.uuid; + for (int count = new Random().nextInt(rb.testServers.size() - 1) ; + count >= 0 ; count -= 1) { + key = rb.testServers.higherKey(key); + if (key == null) { + // wrap to the beginning of the list + key = rb.testServers.firstKey(); + } + } + newHost = rb.testServers.get(key); + } + out.println("Moving bucket " + bucketNumber + " from " + + oldHost + " to " + newHost); + + /* + * update the owner, and ensure that the primary and secondary backup + * remain different from the owner. + */ + bucket.setOwner(newHost); + if (newHost == bucket.primaryBackup) { + out.println("Moving primary back from " + newHost + " to " + oldHost); + bucket.setPrimaryBackup(oldHost); + } else if (newHost == bucket.secondaryBackup) { + out.println("Moving secondary back from " + newHost + + " to " + oldHost); + bucket.setSecondaryBackup(oldHost); + } + + try { + /* + * build a message containing all of the updated bucket + * information, process it internally in this host + * (lead server), and send it out to others in the + * "notify list". + */ + rb.generateBucketMessage(); + } catch (IOException e) { + logger.error("Exception in Rebalance.generateBucketMessage", + e); + } + } + + /** + * This method is called when an incoming /bucket/sessionData message is + * received from the old owner of the bucket, which presumably means that + * we are the new owner of the bucket. + * + * @param bucketNumber the bucket number + * @param dest the UUID of the intended destination + * @param ttl similar to IP time-to-live -- it controls the number of hops + * the message may take + * @param data serialized data associated with this bucket, encoded using + * base64 + */ + + static void sessionData(int bucketNumber, UUID dest, int ttl, byte[] data) { + logger.info("Bucket.sessionData: bucket={}, data length={}", + bucketNumber, data.length); + + if (dest != null && !dest.equals(Server.getThisServer().getUuid())) { + // the message needs to be forwarded to the intended destination + Server server; + WebTarget webTarget; + + if ((ttl -= 1) > 0 + && (server = Server.getServer(dest)) != null + && (webTarget = server.getWebTarget("bucket/sessionData")) != null) { + logger.info("Forwarding 'bucket/sessionData' to uuid {}", + server.getUuid()); + Entity<String> entity = + Entity.entity(new String(data, StandardCharsets.UTF_8), + MediaType.APPLICATION_OCTET_STREAM_TYPE); + Response response = + webTarget + .queryParam("bucket", bucketNumber) + .queryParam("dest", dest) + .queryParam("ttl", String.valueOf(ttl)) + .request().post(entity); + logger.info("/bucket/sessionData response code = {}", + response.getStatus()); + } else { + logger.error("Couldn't forward 'bucket/sessionData' to uuid {}, ttl={}", + dest, ttl); + } + return; + } + + byte[] decodedData = Base64.getDecoder().decode(data); + Bucket bucket = indexToBucket[bucketNumber]; + + logger.info("Bucket.sessionData: decoded data length = {}", + decodedData.length); + + if (bucket.state == null) { + /* + * We received the serialized data prior to being notified + * that we are the owner -- this happens sometimes. Behave as + * though we are the new owner, but intidate it is unconfirmed. + */ + logger.info("Bucket {} session data received unexpectedly", + bucketNumber); + bucket.state = bucket.new NewOwner(false, bucket.getOwner()); + } + bucket.state.bulkSerializedData(decodedData); + } + + /** + * This method is called whenever the bucket's state has changed in a + * way that it should be audited. + */ + private synchronized void stateChanged() { + if (state != null) { + return; + } + // the audit should be run + Server thisServer = Server.getThisServer(); + boolean isOwner = (thisServer == owner); + boolean isBackup = (!isOwner && (thisServer == primaryBackup + || thisServer == secondaryBackup)); + + // invoke 'TargetLock' directly + TargetLock.auditBucket(this, isOwner, isBackup); + for (ServerPoolApi feature : ServerPoolApi.impl.getList()) { + feature.auditBucket(this, isOwner, isBackup); + } + } + + /** + * Returns an adjunct of the specified class + * (it is created if it doesn't exist). + * + * @param clazz this is the class of the adjunct + * @return an adjunct of the specified class ('null' may be returned if + * the 'newInstance' method is unable to create the adjunct) + */ + public <T> T getAdjunct(Class<T> clazz) { + synchronized (adjuncts) { + // look up the adjunct in the table + Object adj = adjuncts.get(clazz); + if (adj == null) { + // lookup failed -- create one + try { + // create the adjunct (may trigger an exception) + adj = clazz.newInstance(); + + // update the table + adjuncts.put(clazz, adj); + } catch (Exception e) { + logger.error("Can't create adjunct of {}", clazz, e); + } + } + return clazz.cast(adj); + } + } + + /** + * Returns an adjunct of the specified class. + * + * @param clazz this is the class of the adjunct + * @return an adjunct of the specified class, if it exists, + * and 'null' if it does not + */ + public <T> T getAdjunctDontCreate(Class<T> clazz) { + synchronized (adjuncts) { + // look up the adjunct in the table + return clazz.cast(adjuncts.get(clazz)); + } + } + + /** + * Explicitly create an adjunct -- this is useful when the adjunct + * initialization requires that some parameters be passed. + * + * @param adj this is the adjunct to insert into the table + * @return the previous adjunct of this type ('null' if none) + */ + public Object putAdjunct(Object adj) { + synchronized (adjuncts) { + Class clazz = adj.getClass(); + return adjuncts.put(clazz, adj); + } + } + + /** + * Remove an adjunct. + * + * @param clazz this is the class of adjuncts to remove + * @return the object, if found, and 'null' if not + */ + public <T> T removeAdjunct(Class<T> clazz) { + synchronized (adjuncts) { + // remove the adjunct in the table + return clazz.cast(adjuncts.remove(clazz)); + } + } + + /** + * Dump out all buckets with adjuncts. + * + * @param out the 'PrintStream' to use for displaying information + */ + public static void dumpAdjuncts(PrintStream out) { + boolean noneFound = true; + String format = "%6s %s\n"; + + for (Bucket bucket : indexToBucket) { + synchronized (bucket.adjuncts) { + if (bucket.adjuncts.size() != 0) { + if (noneFound) { + out.printf(format, "Bucket", "Adjunct Classes"); + out.printf(format, "------", "---------------"); + noneFound = false; + } + boolean first = true; + for (Class<?> clazz : bucket.adjuncts.keySet()) { + if (first) { + out.printf(format, bucket.index, clazz.getName()); + first = false; + } else { + out.printf(format, "", clazz.getName()); + } + } + } + } + } + } + + /* ============================================================ */ + + /** + * There is a single instance of this class (Bucket.eventHandler), which + * is registered to listen for notifications of state transitions. Note + * that all of these methods are running within the 'MainLoop' thread. + */ + private static class EventHandler implements Events { + /** + * {@inheritDoc} + */ + @Override + public void serverFailed(Server server) { + // remove this server from any bucket where it is referenced + + Server thisServer = Server.getThisServer(); + for (Bucket bucket : indexToBucket) { + synchronized (bucket) { + boolean changes = false; + if (bucket.getOwner() == server) { + // the failed server owns this bucket -- + // move to the primary backup + bucket.setOwner(bucket.getPrimaryBackup()); + bucket.primaryBackup = null; + changes = true; + + if (bucket.getOwner() == null) { + // bucket owner is still null -- presumably, we had no + // primary backup, so use the secondary backup instead + bucket.setOwner(bucket.getSecondaryBackup()); + bucket.setSecondaryBackup(null); + } + } + if (bucket.getPrimaryBackup() == server) { + // the failed server was a primary backup to this bucket -- + // mark the entry as 'null' + bucket.setPrimaryBackup(null); + changes = true; + } + if (bucket.getSecondaryBackup() == server) { + // the failed server was a secondary backup to this bucket -- + // mark the entry as 'null' + bucket.setSecondaryBackup(null); + changes = true; + } + + if (bucket.owner == thisServer && bucket.state == null) { + // the current server is the new owner + bucket.setState(bucket.new NewOwner(false, null)); + changes = true; + } + + if (changes) { + // may give audits a chance to run + bucket.stateChanged(); + } + } + } + + // trigger a rebalance (only happens if we are the lead server) + rebalance(); + } + + /** + * {@inheritDoc} + */ + @Override + public void newLeader(Server server) { + // trigger a rebalance (only happens if we are the new lead server) + rebalance(); + } + + /** + * {@inheritDoc} + */ + @Override + public void leaderConfirmed(Server server) { + // trigger a rebalance (only happens if we are the lead server) + rebalance(); + } + } + + /* ============================================================ */ + + /** + * Instances of this class are created as part of the 'rebalance' + * operation on the lead server, or as part of a 'dumpBuckets' operation + * on any server. + * Each instance of this class corresponds to a 'Bucket' instance. + */ + private static class TestBucket implements Comparable<TestBucket> { + // bucket number + int index; + + // owner of the bucket + TestServer owner; + + // primary backup for this bucket + + TestServer primaryBackup; + + // secondary backup for this bucket + TestServer secondaryBackup; + + /** + * Constructor -- initialize the 'TestBucket' instance. + * + * @param index the bucket number + */ + TestBucket(int index) { + this.index = index; + } + + /** + * Update the owner of a bucket, which also involves updating the + * backward links in the 'TestServer' instances. + * + * @param newOwner the new owner of the bucket + */ + void setOwner(TestServer newOwner) { + if (owner != newOwner) { + // the 'owner' field does need to be changed + if (owner != null) { + // remove this bucket from the 'buckets' list of the old owner + owner.buckets.remove(this); + } + if (newOwner != null) { + // add this bucket to the 'buckets' list of the new owner + newOwner.buckets.add(this); + } + // update the 'owner' field in the bucket + owner = newOwner; + } + } + + /** + * Update the primary backup of a bucket, which also involves updating + * the backward links in the 'TestServer' instances. + * + * @param newPrimaryBackup the new primary of the bucket + */ + void setPrimaryBackup(TestServer newPrimaryBackup) { + if (primaryBackup != newPrimaryBackup) { + // the 'primaryBackup' field does need to be changed + if (primaryBackup != null) { + // remove this bucket from the 'buckets' list of the + // old primary backup + primaryBackup.primaryBackupBuckets.remove(this); + } + if (newPrimaryBackup != null) { + // add this bucket to the 'buckets' list of the + // new primary backup + newPrimaryBackup.primaryBackupBuckets.add(this); + } + // update the 'primaryBackup' field in the bucket + primaryBackup = newPrimaryBackup; + } + } + + /** + * Update the secondary backup of a bucket, which also involves updating + * the backward links in the 'TestServer' instances. + * + * @param newSecondaryBackup the new secondary of the bucket + */ + void setSecondaryBackup(TestServer newSecondaryBackup) { + if (secondaryBackup != newSecondaryBackup) { + // the 'secondaryBackup' field does need to be changed + if (secondaryBackup != null) { + // remove this bucket from the 'buckets' list of the + // old secondary backup + secondaryBackup.secondaryBackupBuckets.remove(this); + } + if (newSecondaryBackup != null) { + // add this bucket to the 'buckets' list of the + // new secondary backup + newSecondaryBackup.secondaryBackupBuckets.add(this); + } + // update the 'secondaryBackup' field in the bucket + secondaryBackup = newSecondaryBackup; + } + } + + /*==================================*/ + /* Comparable<TestBucket> interface */ + /*==================================*/ + + /** + * Compare two 'TestBucket' instances, for use in a 'TreeSet'. + * + * @param other the other 'TestBucket' instance to compare to + */ + @Override + public int compareTo(TestBucket other) { + return index - other.index; + } + + /** + * Return a string representation of this 'TestBucket' instance. + * + * @return a string representation of this 'TestBucket' instance + */ + @Override + public String toString() { + return "TestBucket[" + index + "]"; + } + } + + /* ============================================================ */ + + /** + * Instances of this class are created as part of the 'rebalance' + * operation on the lead server, or as part of a 'dumpBuckets' operation + * on any server. + * Each instance of this class corresponds to a 'Server' instance. + * Unlike the actual 'Server' instances, each 'TestServer' instance + * contains back links to all of the buckets it is associated with. + */ + private static class TestServer { + // unique id for this server + // (matches the one in the corresponding 'Server' entry) + final UUID uuid; + + // site socket information (matches 'Server' entry) + final InetSocketAddress siteSocketAddress; + + // the set of all 'TestBucket' instances, + // where this 'TestServer' is listed as 'owner' + final TreeSet<TestBucket> buckets = new TreeSet<>(); + + // the set of all 'TestBucket' instances, + // where this 'TestServer' is listed as 'primaryBackup' + final TreeSet<TestBucket> primaryBackupBuckets = new TreeSet<>(); + + // the set of all 'TestBucket' instances, + // where this 'TestServer' is listed as 'secondaryBackup' + final TreeSet<TestBucket> secondaryBackupBuckets = new TreeSet<>(); + + /** + * Constructor. + * + * @param uuid uuid of this 'TestServer' instance + * @param siteSocketAddress matches the value in the corresponding 'Server' + */ + TestServer(UUID uuid, InetSocketAddress siteSocketAddress) { + this.uuid = uuid; + this.siteSocketAddress = siteSocketAddress; + } + + /** + * Return a string representation of this 'TestServer' instance. + * + * @return a string representation of this 'TestServer' instance + */ + @Override + public String toString() { + return "TestServer[" + uuid + "]"; + } + } + + /* ============================================================ */ + + /** + * This class supports the 'rebalance' operation. Each instance is a wrapper + * around a 'TestServer' instance, as it would be if another specific + * server failed. + */ + private static class AdjustedTestServer + implements Comparable<AdjustedTestServer> { + TestServer server; + + // simulated fail on this server + TestServer failedServer; + + // expected bucket count if 'failedServer' fails + int bucketCount; + + // total number of primary backup buckets used by this host + int primaryBackupBucketCount; + + // total number of secondary backup buckets used by this host + int secondaryBackupBucketCount; + + /** + * Constructor. + * + * @param server the server this 'AdjustedTestServer' instance represents + * @param failedServer the server going through a simulated failure -- + * the 'bucketCount' value is adjusted based upon this + */ + AdjustedTestServer(TestServer server, TestServer failedServer) { + this.server = server; + this.failedServer = failedServer; + + this.bucketCount = server.buckets.size(); + this.primaryBackupBucketCount = server.primaryBackupBuckets.size(); + this.secondaryBackupBucketCount = server.secondaryBackupBuckets.size(); + + // need to adjust 'bucketCount' for the case where the current + // host fails + for (TestBucket bucket : server.primaryBackupBuckets) { + if (bucket.owner == failedServer) { + bucketCount += 1; + // TBD: should 'primaryBackupBucketCount' be decremented? + } + } + + // need to adjust 'bucketCount' for the case where the current + // host fails + for (TestBucket bucket : server.secondaryBackupBuckets) { + if (bucket.owner == failedServer) { + bucketCount += 1; + // TBD: should 'secondaryBackupBucketCount' be decremented? + } + } + } + + /********************************************/ + /* Comparable<AdjustedTestServer> interface */ + /********************************************/ + + /** + * {@inheritDoc} + */ + @Override + public int compareTo(AdjustedTestServer other) { + /* + * Comparison order: + * 1) minimal expected bucket count if current host fails + * (differences of 1 are treated as a match) + * 2) minimal backup bucket count + * 3) UUID order + */ + int rval = bucketCount - other.bucketCount; + if (rval <= 1 && rval >= -1) { + rval = (primaryBackupBucketCount + secondaryBackupBucketCount) + - (other.primaryBackupBucketCount + + other.secondaryBackupBucketCount); + if (rval == 0) { + rval = -Util.uuidComparator.compare(server.uuid, other.server.uuid); + } + } + return rval; + } + } + + /* ============================================================ */ + + /** + * This class is primarily used to do a 'Rebalance' operation on the + * lead server, which is then distributed to all of the other servers. + * Part of it is also useful for implementing the /cmd/dumpBuckets + * REST message handler. + */ + private static class Rebalance { + // this table resembles the 'Bucket.indexToBucket' table + TestBucket[] buckets = new TestBucket[indexToBucket.length]; + + // this table resembles the 'Server.servers' table + TreeMap<UUID,TestServer> testServers = new TreeMap<>(Util.uuidComparator); + + /* this is a special server instance, which is allocated any + * owned, primary, or secondary buckets that haven't been allocated to + * any of the real servers + */ + TestServer nullServer = new TestServer(null, null); + + /** + * Copy all of the bucket data in the 'buckets' table, and also return + * a copy of the 'Server.servers' table + */ + void copyData() { + // will contain a copy of the 'Bucket' table + final Bucket[] bucketSnapshot = new Bucket[indexToBucket.length]; + + /* + * This method is running within the 'MainLoop' thread, + * and builds a copy of the 'Bucket' table, as well as the + * list of active servers -- these can then be examined + * in a different thread, without potentially distrupting + * the 'MainLoop' thread. + * + * @return 0 (the return value is not significant at present) + */ + Callable<Integer> callable = () -> { + // copy the 'Bucket' table + for (int i = 0 ; i < indexToBucket.length; i += 1) { + // makes a snapshot of the bucket information + Bucket bucket = indexToBucket[i]; + + Bucket tmpBucket = new Bucket(i); + tmpBucket.setOwner(bucket.getOwner()); + tmpBucket.setPrimaryBackup(bucket.getPrimaryBackup()); + tmpBucket.setSecondaryBackup(bucket.getSecondaryBackup()); + bucketSnapshot[i] = tmpBucket; + } + + /* + * At this point, 'bucketSnapshot' and 'servers' should be + * complete. The next step is to create a 'TestServer' entry + * that matches each 'Server' entry. + */ + for (Server server : Server.getServers()) { + UUID uuid = server.getUuid(); + testServers.put(uuid, new TestServer(uuid, server.getSiteSocketAddress())); + } + + return 0; + }; + FutureTask<Integer> ft = new FutureTask(callable); + MainLoop.queueWork(ft); + try { + ft.get(60, TimeUnit.SECONDS); + } catch (InterruptedException | ExecutionException | TimeoutException e) { + logger.error("Exception in Rebalance.copyData", e); + return; + } + + /* + * Now, create a 'TestBucket' table that mirrors the 'Bucket' table. + * Unlike the standard 'Bucket' and 'Server' tables, the 'TestServer' + * entries contain links referring back to the 'TestBucket' entries. + * This information is useful when rebalancing. + */ + for (Bucket bucket : bucketSnapshot) { + int index = bucket.index; + TestBucket testBucket = new TestBucket(index); + + // populate the 'owner' field + if (bucket.getOwner() != null) { + testBucket.setOwner(testServers.get(bucket.getOwner().getUuid())); + } else { + testBucket.setOwner(nullServer); + } + + // populate the 'primaryBackup' field + if (bucket.primaryBackup != null) { + testBucket.setPrimaryBackup( + testServers.get(bucket.primaryBackup.getUuid())); + } else { + testBucket.setPrimaryBackup(nullServer); + } + + // populate the 'secondaryBackup' field + if (bucket.secondaryBackup != null) { + testBucket.setSecondaryBackup( + testServers.get(bucket.secondaryBackup.getUuid())); + } else { + testBucket.setSecondaryBackup(nullServer); + } + buckets[index] = testBucket; + } + } + + /** + * Allocate unowned 'TestBucket' entries across all of the 'TestServer' + * entries. When every 'TestBucket' has an owner, rebalance as needed, + * so the 'TestServer' entry with the most buckets has at most one more + * bucket than the 'TestServer' entry with the least. + */ + void allocateBuckets() { + /* + * the following 'Comparator' is used to control the order of the + * 'needBuckets' TreeSet: those with the fewest buckets allocated are + * at the head of the list. + */ + Comparator<TestServer> bucketCount = new Comparator<TestServer>() { + @Override + public int compare(TestServer s1, TestServer s2) { + int rval = s1.buckets.size() - s2.buckets.size(); + if (rval == 0) { + rval = Util.uuidComparator.compare(s1.uuid, s2.uuid); + } + return rval; + } + }; + + // sort servers according to the order in which they can + // take on ownership of buckets + TreeSet<TestServer> needBuckets = new TreeSet<>(bucketCount); + for (TestServer ts : testServers.values()) { + needBuckets.add(ts); + } + + // go through all of the unowned buckets, and allocate them + for (TestBucket bucket : new LinkedList<TestBucket>(nullServer.buckets)) { + // take first entry off of sorted server list + TestServer ts = needBuckets.first(); + needBuckets.remove(ts); + + // add this bucket to the 'buckets' list + bucket.setOwner(ts); + + // place it back in the list, possibly in a new position + // (it's attributes have changed) + needBuckets.add(ts); + } + nullServer.buckets.clear(); + + // there may still be rebalancing needed -- no host should contain + // 2 or more buckets more than any other host + for ( ; ; ) { + TestServer first = needBuckets.first(); + TestServer last = needBuckets.last(); + + if (last.buckets.size() - first.buckets.size() <= 1) { + // no more rebalancing needed + break; + } + + // remove both from sorted list + needBuckets.remove(first); + needBuckets.remove(last); + + // take one bucket from 'last', and assign it to 'first' + last.buckets.first().setOwner(first); + + // place back in sorted list + needBuckets.add(first); + needBuckets.add(last); + } + } + + /** + * Make sure that the primary backups have the same site as the owner, + * and the secondary backups have a different site. + */ + void checkSiteValues() { + for (TestBucket bucket : buckets) { + if (bucket.owner != null) { + InetSocketAddress siteSocketAddress = + bucket.owner.siteSocketAddress; + TestServer primaryBackup = bucket.primaryBackup; + TestServer secondaryBackup = bucket.secondaryBackup; + + validateSiteOwner(bucket, siteSocketAddress, + primaryBackup, secondaryBackup); + } + } + } + + /** + * Validate primary site owner and secondary site owner are valid. + * @param bucket TestBucket + * @param siteSocketAddress site socket address + * @param primaryBackup primary backups + * @param secondaryBackup secondary backups + */ + private void validateSiteOwner(TestBucket bucket, InetSocketAddress siteSocketAddress, + TestServer primaryBackup, TestServer secondaryBackup) { + if (primaryBackup != null + && !Objects.equals(siteSocketAddress, + primaryBackup.siteSocketAddress)) { + /** + * primary backup is from the wrong site -- see if we can + * use the secondary. + */ + if (secondaryBackup != null + && Objects.equals(siteSocketAddress, + secondaryBackup.siteSocketAddress)) { + // swap primary and secondary + bucket.setPrimaryBackup(secondaryBackup); + bucket.setSecondaryBackup(primaryBackup); + } else { + // just invalidate primary backup + bucket.setPrimaryBackup(null); + } + } else if (secondaryBackup != null + && Objects.equals(siteSocketAddress, + secondaryBackup.siteSocketAddress)) { + // secondary backup is from the wrong site + bucket.setSecondaryBackup(null); + if (primaryBackup == null) { + // we can use this as the primary + bucket.setPrimaryBackup(secondaryBackup); + } + } + } + + /** + * Allocate and rebalance the primary backups. + */ + void rebalancePrimaryBackups() { + for (TestServer failedServer : testServers.values()) { + /* + * to allocate primary backups for this server, + * simulate a failure, and balance the backup hosts + */ + + // get siteSocketAddress from server + InetSocketAddress siteSocketAddress = failedServer.siteSocketAddress; + + // populate a 'TreeSet' of 'AdjustedTestServer' instances based + // the failure of 'failedServer' + TreeSet<AdjustedTestServer> adjustedTestServers = + new TreeSet<AdjustedTestServer>(); + for (TestServer server : testServers.values()) { + if (server == failedServer + || !Objects.equals(siteSocketAddress, + server.siteSocketAddress)) { + continue; + } + adjustedTestServers.add(new AdjustedTestServer(server, failedServer)); + } + + if (adjustedTestServers.isEmpty()) { + // this is presumably the only server -- there is no other server + // to act as primary backup, and no rebalancing can occur + continue; + } + + // we need a backup host for each bucket + for (TestBucket bucket : failedServer.buckets) { + if (bucket.primaryBackup == null + || bucket.primaryBackup == nullServer) { + // need a backup host for this bucket -- remove the first + // entry from 'adjustedTestServers', which is most favored + AdjustedTestServer backupHost = adjustedTestServers.first(); + adjustedTestServers.remove(backupHost); + + // update add this bucket to the list + bucket.setPrimaryBackup(backupHost.server); + + // update counts in 'AdjustedTestServer' + backupHost.bucketCount += 1; + backupHost.primaryBackupBucketCount += 1; + + // place it back in the table, possibly in a new position + // (it's attributes have changed) + adjustedTestServers.add(backupHost); + } + } + + // TBD: Is additional rebalancing needed? + } + } + + /** + * Allocate and rebalance the secondary backups. + */ + void rebalanceSecondaryBackups() { + for (TestServer failedServer : testServers.values()) { + /* + * to allocate secondary backups for this server, + * simulate a failure, and balance the backup hosts + */ + + // get siteSocketAddress from server + InetSocketAddress siteSocketAddress = failedServer.siteSocketAddress; + + // populate a 'TreeSet' of 'AdjustedTestServer' instances based + // the failure of 'failedServer' + TreeSet<AdjustedTestServer> adjustedTestServers = + new TreeSet<AdjustedTestServer>(); + for (TestServer server : testServers.values()) { + if (server == failedServer + || Objects.equals(siteSocketAddress, + server.siteSocketAddress)) { + continue; + } + adjustedTestServers.add(new AdjustedTestServer(server, failedServer)); + } + + if (adjustedTestServers.isEmpty()) { + // this is presumably the only server -- there is no other server + // to act as secondary backup, and no rebalancing can occur + continue; + } + + // we need a backup host for each bucket + for (TestBucket bucket : failedServer.buckets) { + if (bucket.secondaryBackup == null + || bucket.secondaryBackup == nullServer) { + // need a backup host for this bucket -- remove the first + // entry from 'adjustedTestServers', which is most favored + AdjustedTestServer backupHost = adjustedTestServers.first(); + adjustedTestServers.remove(backupHost); + + // update add this bucket to the list + bucket.setSecondaryBackup(backupHost.server); + + // update counts in 'AdjustedTestServer' + backupHost.bucketCount += 1; + backupHost.secondaryBackupBucketCount += 1; + + // place it back in the table, possibly in a new position + // (it's attributes have changed) + adjustedTestServers.add(backupHost); + } + } + + // TBD: Is additional rebalancing needed? + } + } + + /** + * Generate a message with all of the bucket updates, process it locally, + * and send it to all servers in the "Notify List". + */ + void generateBucketMessage() throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos); + + // go through the entire 'buckets' table + for (int i = 0 ; i < buckets.length ; i += 1) { + // fetch the 'TestBucket' associated with index 'i' + TestBucket testBucket = buckets[i]; + + /* + * Get the UUID of the owner, primary backup, and secondary backup + * for this bucket. If the associated value does not exist, 'null' + * is used. + */ + UUID newOwner = null; + UUID newPrimary = null; + UUID newSecondary = null; + + if (testBucket.owner != nullServer && testBucket.owner != null) { + newOwner = testBucket.owner.uuid; + } + if (testBucket.primaryBackup != nullServer + && testBucket.primaryBackup != null) { + newPrimary = testBucket.primaryBackup.uuid; + } + if (testBucket.secondaryBackup != nullServer + && testBucket.secondaryBackup != null) { + newSecondary = testBucket.secondaryBackup.uuid; + } + + // write bucket number + dos.writeShort(i); + + // 'owner' field + if (newOwner != null) { + dos.writeByte(OWNER_UPDATE); + Util.writeUuid(dos, newOwner); + } else { + dos.writeByte(OWNER_NULL); + } + + // 'primaryBackup' field + if (newPrimary != null) { + dos.writeByte(PRIMARY_BACKUP_UPDATE); + Util.writeUuid(dos, newPrimary); + } else { + dos.writeByte(PRIMARY_BACKUP_NULL); + } + + // 'secondaryBackup' field + if (newSecondary != null) { + dos.writeByte(SECONDARY_BACKUP_UPDATE); + Util.writeUuid(dos, newSecondary); + } else { + dos.writeByte(SECONDARY_BACKUP_NULL); + } + + dos.writeByte(END_OF_PARAMETERS_TAG); + } + + // get the unencoded 'packet' + final byte[] packet = bos.toByteArray(); + + // create an 'Entity' containing the encoded packet + final Entity<String> entity = + Entity.entity(new String(Base64.getEncoder().encode(packet), + StandardCharsets.UTF_8), MediaType.APPLICATION_OCTET_STREAM_TYPE); + /** + * This method is running within the 'MainLoop' thread. + */ + Runnable task = () -> { + try { + /* + * update the buckets on this host, + * which is presumably the lead server. + */ + Bucket.updateBucketInternal(packet); + } catch (Exception e) { + logger.error("Exception updating buckets", e); + } + + // send a message to all servers on the notify list + for (Server server : Server.getNotifyList()) { + server.post("bucket/update", entity); + } + }; + MainLoop.queueWork(task); + } + + /** + * Supports the '/cmd/dumpBuckets' REST message -- this isn't part of + * a 'rebalance' operation, but it turned out to be a convenient way + * to dump out the bucket table. + * + * @param out the output stream + */ + private void dumpBucketsInternal(PrintStream out) { + // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxxxx * + // UUID Type Buckets + String format = "%-36s %-9s %5s %s\n"; + + int totalOwner = 0; + int totalPrimary = 0; + int totalSecondary = 0; + + out.printf(format, "UUID", "Type", "Count", "Buckets"); + out.printf(format, "----", "----", "-----", "-------"); + for (TestServer ts : testServers.values()) { + // dump out 'owned' bucket information + if (ts.buckets.isEmpty()) { + // no buckets owned by this server + out.printf(format, ts.uuid, "Owned", 0, ""); + } else { + // dump out primary buckets information + totalOwner += + dumpBucketsSegment(out, format, ts.buckets, ts.uuid.toString(), "Owned"); + } + // optionally dump out primary buckets information + totalPrimary += + dumpBucketsSegment(out, format, ts.primaryBackupBuckets, "", "Primary"); + // optionally dump out secondary buckets information + totalSecondary += + dumpBucketsSegment(out, format, ts.secondaryBackupBuckets, "", "Secondary"); + } + + if (!nullServer.buckets.isEmpty() + || !nullServer.primaryBackupBuckets.isEmpty() + || !nullServer.secondaryBackupBuckets.isEmpty()) { + /* + * There are some owned, primary, or secondary buckets that are + * unassigned. It is displayed in a manner similar to buckets that + * do have a server, but the UUID field is marked as 'UNASSIGNED' + * in the first line of the display. + */ + String uuidField = "UNASSIGNED"; + + // optionally dump out unassigned owned buckets information + if (dumpBucketsSegment(out, format, nullServer.buckets, + uuidField, "Owned") != 0) { + uuidField = ""; + } + // optionally dump out unassigned primary backup buckets information + if (dumpBucketsSegment(out, format, nullServer.primaryBackupBuckets, + uuidField, "Primary") != 0) { + uuidField = ""; + } + // optionally dump out unassigned secondary backup buckets information + dumpBucketsSegment(out, format, nullServer.secondaryBackupBuckets, + uuidField, "Secondary"); + } + out.println("\nTotal assigned: owner = " + totalOwner + + ", primary = " + totalPrimary + + ", secondary = " + totalSecondary); + } + + /** + * Supports the 'dumpBucketsInternal' command, and indirectly, the + * '/cmd/dumpBuckets' REST message. It formats one segment of bucket data + * (owned, primary backup, or secondary backup), and dumps out the + * associated bucket data in segments of 8. Note: if the size of 'buckets' + * is 0, nothing is displayed. + * + * @param out the output stream + * @param format the message format string + * @param buckets the entire set of buckets to be displayed + * @param uuid string to display under the 'UUID' header + * @param segmentDescription string to display under the 'Type' header + * @return the size of the 'buckets' set + */ + private static int dumpBucketsSegment( + PrintStream out, String format, TreeSet<TestBucket> buckets, + String uuid, String segmentDescription) { + + int size = buckets.size(); + if (size != 0) { + // generate a linked list of the bucket data to display + LinkedList<String> data = new LinkedList<String>(); + StringBuilder sb = new StringBuilder(); + int count = 8; + + for (TestBucket bucket : buckets) { + if (sb.length() != 0) { + // this is not the first bucket in the line -- + // prepend a space + sb.append(' '); + } + + // add the bucket number + sb.append(String.format("%4s", bucket.index)); + if ((count -= 1) <= 0) { + // filled up a row -- + // add it to the list, and start a new line + data.add(sb.toString()); + sb = new StringBuilder(); + count = 8; + } + } + if (sb.length() != 0) { + // there is a partial line remaining -- add it to the list + data.add(sb.toString()); + } + + /* + * The first line displayed includes the UUID and size information, + * and the first line of bucket data (owned, primary, or secondary). + * The remaining lines of bucket data are displayed alone, + * without any UUID or size information. + */ + out.printf(format, uuid, segmentDescription, buckets.size(), + data.removeFirst()); + while (!data.isEmpty()) { + out.printf(format, "", "", "", data.removeFirst()); + } + } + return size; + } + } + + /* ============================================================ */ + + /** + * This interface is an abstraction for all messages that are routed + * through buckets. It exists, so that messages may be queued while + * bucket migration is taking place, and it makes it possible to support + * multiple types of messages (routed UEB/DMAAP messages, or lock messages) + */ + public static interface Message { + /** + * Process the current message -- this may mean delivering it locally, + * or forwarding it. + */ + public void process(); + + /** + * Send the message to another host for processing. + * + * @param server the destination host (although it could end up being + * forwarded again) + * @param bucketNumber the bucket number determined by extracting the + * current message's keyword + */ + public void sendToServer(Server server, int bucketNumber); + } + + /* ============================================================ */ + + /** + * This interface implements a type of backup; for example, there is one + * for backing up Drools objects within sessions, and another for + * backing up lock data. + */ + public static interface Backup { + /** + * This method is called to add a 'Backup' instance to the registered list. + * + * @param backup an object implementing the 'Backup' interface + */ + public static void register(Backup backup) { + synchronized (backupList) { + if (!backupList.contains(backup)) { + backupList.add(backup); + } + } + } + + /** + * Generate Serializable backup data for the specified bucket. + * + * @param bucketNumber the bucket number to back up + * @return a Serializable object containing backkup data + */ + public Restore generate(int bucketNumber); + } + + /* ============================================================ */ + + /** + * Objects implementing this interface may be serialized, and restored + * on a different host. + */ + public static interface Restore extends Serializable { + /** + * Restore from deserialized data. + * + * @param bucketNumber the bucket number being restored + */ + void restore(int bucketNumber); + } + + /* ============================================================ */ + + /** + * This interface corresponds to a transient state within a Bucket. + */ + private interface State { + /** + * This method allows state-specific handling of the + * 'Bucket.forward()' methods + * + * @param message the message to be forwarded/processed + * @return a value of 'true' indicates the message has been "handled" + * (forwarded or queued), and 'false' indicates it has not, and needs + * to be handled locally. + */ + boolean forward(Message message); + + /** + * This method indicates that the current server is the new owner + * of the current bucket. + */ + void newOwner(); + + /** + * This method indicates that serialized data has been received, + * presumably from the old owner of the bucket. The data could correspond + * to Drools objects within sessions, as well as global locks. + * + * @param data serialized data associated with this bucket (at present, + * this is assumed to be complete, all within a single message) + */ + void bulkSerializedData(byte[] data); + } + + /* ============================================================ */ + + /** + * Each state instance is associated with a bucket, and is used when + * that bucket is in a transient state where it is the new owner of a + * bucket, or is presumed to be the new owner, based upon other events + * that have occurred. + */ + private class NewOwner extends Thread implements State { + /* + * this value is 'true' if we have explicitly received a 'newOwner' + * indication, and 'false' if there was another trigger for entering this + * transient state (e.g. receiving serialized data) + */ + boolean confirmed; + + // when 'System.currentTimeMillis()' reaches this value, we time out + long endTime; + + // If not 'null', we are queueing messages for this bucket; + // otherwise, we are sending them through. + Queue<Message> messages = new ConcurrentLinkedQueue<>(); + + // this is used to signal the thread that we have data available + CountDownLatch dataAvailable = new CountDownLatch(1); + + // this is the data + byte[] data = null; + + // this is the old owner of the bucket + Server oldOwner; + + /** + * Constructor - a transient state, where we are expecting to receive + * bulk data from the old owner. + * + * @param confirmed 'true' if we were explicitly notified that we + * are the new owner of the bucket, 'false' if not + */ + NewOwner(boolean confirmed, Server oldOwner) { + super("New Owner for Bucket " + index); + this.confirmed = confirmed; + this.oldOwner = oldOwner; + if (oldOwner == null) { + // we aren't expecting any data -- this is indicated by 0-length data + bulkSerializedData(new byte[0]); + } + endTime = System.currentTimeMillis() + + (confirmed ? confirmedTimeout : unconfirmedTimeout); + start(); + } + + /** + * Return the 'confirmed' indicator. + * + * @return the 'confirmed' indicator + */ + private boolean getConfirmed() { + synchronized (Bucket.this) { + return confirmed; + } + } + + /** + * This returns the timeout delay, which will always be less than or + * equal to 1 second. This allows us to periodically check whether the + * old server is still active. + * + * @return the timeout delay, which is the difference between the + * 'endTime' value and the current time or 1 second + * (whichever is less) + */ + private long getTimeout() { + long lclEndTime; + synchronized (Bucket.this) { + lclEndTime = endTime; + } + return Math.min(lclEndTime - System.currentTimeMillis(), 1000L); + } + + /** + * Return the current value of the 'data' field. + * + * @return the current value of the 'data' field + */ + private byte[] getData() { + synchronized (Bucket.this) { + return data; + } + } + + /*********************/ + /* 'State' interface */ + /*********************/ + + /** + * {@inheritDoc} + */ + @Override + public boolean forward(Message message) { + // the caller of this method is synchronized on 'Bucket.this' + if (messages != null && Thread.currentThread() != this) { + // just queue the message + messages.add(message); + return true; + } else { + /* + * Either: + * + * 1) We are in a grace period, where 'state' is still set, but + * we are no longer forwarding messages. + * 2) We are calling 'message.process()' from this thread + * in the 'finally' block of 'NewOwner.run()'. + * + * In either case, messages should be processed locally. + */ + return false; + } + } + + /** + * {@inheritDoc} + */ + @Override + public void newOwner() { + // the caller of this method is synchronized on 'Bucket.this' + if (!confirmed) { + confirmed = true; + endTime += (confirmedTimeout - unconfirmedTimeout); + } + } + + /** + * {@inheritDoc} + */ + @Override + public void bulkSerializedData(byte[] data) { + // the caller of this method is synchronized on 'Bucket.this' + if (this.data == null) { + this.data = data; + dataAvailable.countDown(); + } + } + + /**********************/ + /* 'Thread' interface */ + /**********************/ + + /** + * {@inheritDoc} + */ + @Override + public void run() { + logger.info("{}: 'run' method invoked", this); + try { + byte[] lclData; + long delay; + + while ((lclData = getData()) == null + && oldOwner.isActive() + && (delay = getTimeout()) > 0) { + // ignore return value -- 'data' will indicate the result + dataAvailable.await(delay, TimeUnit.MILLISECONDS); + } + if (lclData == null) { + // no data available -- log an error, and abort + if (getConfirmed()) { + // we never received the data, but we are the new owner + logger.error("{}: never received session data", this); + } else { + /* + * no data received, and it was never confirmed -- + * assume that the forwarded message that triggered this was + * erroneus + */ + logger.error("{}: no confirmation or data received -- aborting", this); + return; + } + } else { + logger.info("{}: {} bytes of data available", + this, lclData.length); + } + + // if we reach this point, this server is the new owner + if (lclData == null || lclData.length == 0) { + // see if any features can do the restore + for (ServerPoolApi feature : ServerPoolApi.impl.getList()) { + feature.restoreBucket(Bucket.this); + } + } else { + // deserialize data + Object obj = Util.deserialize(lclData); + restoreBucketData(obj); + } + } catch (Exception e) { + logger.error("Exception in {}", this, e); + } finally { + /* + * cleanly leave state -- we want to make sure that messages + * are processed in order, so the queue needs to remain until + * it is empty + */ + logger.info("{}: entering cleanup state", this); + for ( ; ; ) { + Message message = messages.poll(); + if (message == null) { + // no messages left, but this could change + synchronized (Bucket.this) { + message = messages.poll(); + if (message == null) { + // no messages left + if (state == this) { + if (owner == Server.getThisServer()) { + // we can now exit the state + state = null; + stateChanged(); + } else { + /* + * We need a grace period before we can + * remove the 'state' value (this can happen + * if we receive and process the bulk data + * before receiving official confirmation + * that we are owner of the bucket. + */ + messages = null; + } + } + break; + } + } + } + // this doesn't work -- it ends up right back in the queue + // if 'messages' is defined + message.process(); + } + if (messages == null) { + // this indicates we need to enter a grace period before cleanup, + try { + logger.info("{}: entering grace period before terminating", + this); + Thread.sleep(unconfirmedGracePeriod); + } catch (InterruptedException e) { + // we are exiting in any case + Thread.currentThread().interrupt(); + } finally { + synchronized (Bucket.this) { + // Do we need to confirm that we really are the owner? + // What does it mean if we are not? + if (state == this) { + state = null; + stateChanged(); + } + } + } + } + logger.info("{}: exiting cleanup state", this); + } + } + + /** + * Return a useful value to display in log messages. + * + * @return a useful value to display in log messages + */ + public String toString() { + return "Bucket.NewOwner(" + index + ")"; + } + } + + /** + * Restore bucket data. + * + * @param obj deserialized bucket data + */ + private void restoreBucketData(Object obj) { + if (obj instanceof List) { + for (Object entry : (List<?>)obj) { + if (entry instanceof Restore) { + // entry-specific 'restore' operation + ((Restore)entry).restore(this.index); + } else { + logger.error("{}: Expected '{}' but got '{}'", + this, Restore.class.getName(), + entry.getClass().getName()); + } + } + } else { + logger.error("{}: expected 'List' but got '{}'", + this, obj.getClass().getName()); + } + } + + /* ============================================================ */ + + /** + * Each state instance is associated with a bucket, and is used when + * that bucket is in a transient state where it is the old owner of + * a bucket, and the data is being transferred to the new owner. + */ + private class OldOwner extends Thread implements State { + Server newOwner; + + OldOwner(Server newOwner) { + super("Old Owner for Bucket " + index); + this.newOwner = newOwner; + start(); + } + + /*********************/ + /* 'State' interface */ + /*********************/ + + /** + * {@inheritDoc} + */ + @Override + public boolean forward(Message message) { + // forward message to new owner + message.sendToServer(newOwner, index); + return true; + } + + /** + * {@inheritDoc} + */ + @Override + public void newOwner() { + // shouldn't happen -- just log an error + logger.error("{}: 'newOwner()' shouldn't be called in this state", this); + } + + /** + * {@inheritDoc} + */ + @Override + public void bulkSerializedData(byte[] data) { + // shouldn't happen -- just log an error + logger.error("{}: 'bulkSerializedData()' shouldn't be called in this state", this); + } + + /**********************/ + /* 'Thread' interface */ + /**********************/ + + /** + * {@inheritDoc} + */ + @Override + public void run() { + logger.info("{}: 'run' method invoked", this); + try { + // go through all of the entries in the list, collecting restore data + List<Restore> restoreData = new LinkedList<>(); + for (Backup backup : backupList) { + Restore restore = backup.generate(index); + if (restore != null) { + restoreData.add(restore); + } + } + + // serialize all of the objects, + // and send what we have to the new owner + Entity<String> entity = Entity.entity( + new String(Base64.getEncoder().encode(Util.serialize(restoreData))), + MediaType.APPLICATION_OCTET_STREAM_TYPE); + newOwner.post("bucket/sessionData", entity, new Server.PostResponse() { + @Override + public WebTarget webTarget(WebTarget webTarget) { + return webTarget + .queryParam("bucket", index) + .queryParam("dest", newOwner.getUuid()) + .queryParam("ttl", timeToLive); + } + + @Override + public void response(Response response) { + logger.info("/bucket/sessionData response code = {}", + response.getStatus()); + } + }); + } catch (Exception e) { + logger.error("Exception in {}", this, e); + } finally { + synchronized (Bucket.this) { + // restore the state + if (state == this) { + state = null; + stateChanged(); + } + } + } + } + + /** + * Return a useful value to display in log messages. + * + * @return a useful value to display in log messages + */ + public String toString() { + return "Bucket.OldOwner(" + index + ")"; + } + } +} diff --git a/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/Discovery.java b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/Discovery.java new file mode 100644 index 00000000..c507e97d --- /dev/null +++ b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/Discovery.java @@ -0,0 +1,354 @@ +/* + * ============LICENSE_START======================================================= + * feature-server-pool + * ================================================================================ + * Copyright (C) 2020 AT&T Intellectual Property. All rights reserved. + * ================================================================================ + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============LICENSE_END========================================================= + */ + +package org.onap.policy.drools.serverpool; + +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_DISCOVERY_FETCH_LIMIT; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_DISCOVERY_FETCH_TIMEOUT; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_DISCOVER_PUBLISHER_LOOP_CYCLE_TIME; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DISCOVERY_ALLOW_SELF_SIGNED_CERTIFICATES; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DISCOVERY_API_KEY; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DISCOVERY_API_SECRET; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DISCOVERY_FETCH_LIMIT; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DISCOVERY_FETCH_TIMEOUT; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DISCOVERY_HTTPS; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DISCOVERY_PASSWORD; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DISCOVERY_SERVERS; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DISCOVERY_TOPIC; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DISCOVERY_USERNAME; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DISCOVER_PUBLISHER_LOOP_CYCLE_TIME; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.getProperty; + +import com.google.gson.Gson; +import com.google.gson.JsonObject; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.nio.charset.StandardCharsets; +import java.util.Base64; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.onap.policy.common.endpoints.event.comm.Topic.CommInfrastructure; +import org.onap.policy.common.endpoints.event.comm.TopicEndpointManager; +import org.onap.policy.common.endpoints.event.comm.TopicListener; +import org.onap.policy.common.endpoints.event.comm.TopicSink; +import org.onap.policy.common.endpoints.event.comm.TopicSource; +import org.onap.policy.common.endpoints.properties.PolicyEndPointProperties; +import org.onap.policy.common.utils.coder.CoderException; +import org.onap.policy.common.utils.coder.StandardCoder; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class makes use of UEB/DMAAP to discover other servers in the pool. + * The discovery processes ordinarily run only on the lead server, but they + * run on other servers up until the point that they determine who the + * leader is. + */ +public class Discovery implements TopicListener { + private static Logger logger = LoggerFactory.getLogger(Discovery.class); + + // used for JSON <-> String conversion + private static StandardCoder coder = new StandardCoder(); + + private static Discovery discovery = null; + + private volatile Publisher publisherThread = null; + + private List<TopicSource> consumers = null; + private List<TopicSink> publishers = null; + + private Discovery() { + // we want to modify the properties we send to 'TopicManager' + PropBuilder builder = new PropBuilder(ServerPoolProperties.getProperties()); + builder.convert(DISCOVERY_SERVERS, null, + PolicyEndPointProperties.PROPERTY_TOPIC_SERVERS_SUFFIX); + builder.convert(DISCOVERY_USERNAME, null, + PolicyEndPointProperties.PROPERTY_TOPIC_AAF_MECHID_SUFFIX); + builder.convert(DISCOVERY_PASSWORD, null, + PolicyEndPointProperties.PROPERTY_TOPIC_AAF_PASSWORD_SUFFIX); + builder.convert(DISCOVERY_HTTPS, null, + PolicyEndPointProperties.PROPERTY_HTTP_HTTPS_SUFFIX); + builder.convert(DISCOVERY_API_KEY, null, + PolicyEndPointProperties.PROPERTY_TOPIC_API_KEY_SUFFIX); + builder.convert(DISCOVERY_API_SECRET, null, + PolicyEndPointProperties.PROPERTY_TOPIC_API_SECRET_SUFFIX); + builder.convert(DISCOVERY_FETCH_TIMEOUT, + String.valueOf(DEFAULT_DISCOVERY_FETCH_TIMEOUT), + PolicyEndPointProperties.PROPERTY_TOPIC_SOURCE_FETCH_TIMEOUT_SUFFIX); + builder.convert(DISCOVERY_FETCH_LIMIT, + String.valueOf(DEFAULT_DISCOVERY_FETCH_LIMIT), + PolicyEndPointProperties.PROPERTY_TOPIC_SOURCE_FETCH_LIMIT_SUFFIX); + builder.convert(DISCOVERY_ALLOW_SELF_SIGNED_CERTIFICATES, null, + PolicyEndPointProperties.PROPERTY_ALLOW_SELF_SIGNED_CERTIFICATES_SUFFIX); + Properties prop = builder.finish(); + logger.debug("Discovery converted properties: {}", prop); + + consumers = TopicEndpointManager.getManager().addTopicSources(prop); + publishers = TopicEndpointManager.getManager().addTopicSinks(prop); + + if (consumers.isEmpty()) { + logger.error("No consumer topics"); + } + if (publishers.isEmpty()) { + logger.error("No publisher topics"); + } + logger.debug("Discovery: {} consumers, {} publishers", + consumers.size(), publishers.size()); + } + + /** + * Start all consumers and publishers, and start the publisher thread. + */ + static synchronized void startDiscovery() { + if (discovery == null) { + discovery = new Discovery(); + } + discovery.start(); + } + + /** + * Stop all consumers and publishers, and stop the publisher thread. + */ + static synchronized void stopDiscovery() { + if (discovery != null) { + discovery.stop(); + } + } + + /** + * Start all consumers and publishers, and start the publisher thread. + */ + private void start() { + for (TopicSource consumer : consumers) { + consumer.register(this); + consumer.start(); + } + for (TopicSink publisher : publishers) { + publisher.start(); + } + if (publisherThread == null) { + // send thread wasn't running -- start it + publisherThread = new Publisher(); + publisherThread.start(); + } + } + + /** + * Stop all consumers and publishers, and stop the publisher thread. + */ + private void stop() { + publisherThread = null; + for (TopicSink publisher : publishers) { + publisher.stop(); + } + for (TopicSource consumer : consumers) { + consumer.unregister(this); + consumer.stop(); + } + } + + /*===========================*/ + /* 'TopicListener' interface */ + /*===========================*/ + + /** + * {@inheritDoc} + */ + @Override + public void onTopicEvent(CommInfrastructure infra, String topic, String event) { + /* + * a JSON message has been received -- it should contain + * a single string parameter 'pingData', which contains the + * same format base64-encoded message that 'Server' + * instances periodically exchange + */ + LinkedHashMap<String, String> map = new LinkedHashMap<>(); + try { + map = coder.decode(event, LinkedHashMap.class); + String message = map.get("pingData"); + Server.adminRequest(message.getBytes(StandardCharsets.UTF_8)); + logger.info("Received a message, server count={}", Server.getServerCount()); + } catch (CoderException e) { + logger.error("Can't decode message: {}", e); + } + } + + /* ============================================================ */ + + /** + * This class is used to convert internal 'discovery.*' properties to + * properties that 'TopicEndpointManager' can use. + */ + private static class PropBuilder { + // properties being incrementally modified + Properties prop; + + // value from 'discovery.topic' parameter + String topic; + + // 'true' only if both 'discovery.topic' and 'discovery.servers' + // has been defined + boolean doConversion = false; + + // contains "ueb.source.topics" or "dmaap.source.topics" + String sourceTopicsName = null; + + // contains "<TYPE>.source.topics.<TOPIC>" (<TYPE> = ueb|dmaap) + String sourcePrefix = null; + + // contains "ueb.sink.topics" or "dmaap.sink.topics" + String sinkTopicsName = null; + + // contains "<TYPE>.sink.topics.<TOPIC>" (<TYPE> = ueb|dmaap) + String sinkPrefix = null; + + /** + * Constructor - decide whether we are going to do conversion or not, + * and initialize accordingly. + * + * @param prop the initial list of properties + */ + PropBuilder(Properties prop) { + this.prop = new Properties(prop); + this.topic = prop.getProperty(DISCOVERY_TOPIC); + String servers = prop.getProperty(DISCOVERY_SERVERS); + if (topic != null && servers != null) { + // we do have property conversion to do + doConversion = true; + String type = topic.contains(".") ? "dmaap" : "ueb"; + sourceTopicsName = type + ".source.topics"; + sourcePrefix = sourceTopicsName + "." + topic; + sinkTopicsName = type + ".sink.topics"; + sinkPrefix = sinkTopicsName + "." + topic; + } + } + + /** + * If we are doing conversion, convert an internal property + * to something that 'TopicEndpointManager' can use. + * + * @param intName server pool property name (e.g. "discovery.servers") + * @param defaultValue value to use if property 'intName' is not specified + * @param extSuffix TopicEndpointManager suffix, including leading "." + */ + void convert(String intName, String defaultValue, String extSuffix) { + if (doConversion) { + String value = prop.getProperty(intName, defaultValue); + if (value != null) { + prop.setProperty(sourcePrefix + extSuffix, value); + prop.setProperty(sinkPrefix + extSuffix, value); + } + } + } + + /** + * Generate/update the '*.source.topics' and '*.sink.topics' parameters. + * + * @return the updated properties list + */ + Properties finish() { + if (doConversion) { + String currentValue = prop.getProperty(sourceTopicsName); + if (currentValue == null) { + // '*.source.topics' is not defined -- set it + prop.setProperty(sourceTopicsName, topic); + } else { + // '*.source.topics' is defined -- append to it + prop.setProperty(sourceTopicsName, currentValue + "," + topic); + } + currentValue = prop.getProperty(sinkTopicsName); + if (currentValue == null) { + // '*.sink.topics' is not defined -- set it + prop.setProperty(sinkTopicsName, topic); + } else { + // '*.sink.topics' is defined -- append to it + prop.setProperty(sinkTopicsName, currentValue + "," + topic); + } + } + return prop; + } + } + + /* ============================================================ */ + + /** + * This is the sender thread, which periodically sends out 'ping' messages. + */ + private class Publisher extends Thread { + /** + * Constructor -- read in the properties, and initialze 'publisher'. + */ + Publisher() { + super("Discovery Publisher Thread"); + } + + /** + * {@inheritDoc} + */ + @Override + public void run() { + // this loop will terminate once 'publisher' is set to 'null', + // or some other 'Publisher' instance replaces it + long cycleTime = getProperty(DISCOVER_PUBLISHER_LOOP_CYCLE_TIME, + DEFAULT_DISCOVER_PUBLISHER_LOOP_CYCLE_TIME); + while (this == publisherThread) { + try { + // wait 5 seconds (default) + Thread.sleep(cycleTime); + + // generate a 'ping' message + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos); + + // write the 'ping' data for this server + Server thisServer = Server.getThisServer(); + thisServer.writeServerData(dos); + String encodedData = + new String(Base64.getEncoder().encode(bos.toByteArray())); + + // base64-encoded value is passed as JSON parameter 'pingData' + LinkedHashMap<String, String> map = new LinkedHashMap<>(); + map.put("pingData", encodedData); + String jsonString = new Gson().toJson(map, Map.class); + for (TopicSink publisher : publishers) { + publisher.send(jsonString); + } + } catch (InterruptedException e) { + logger.error("Exception in Discovery.Publisher.run():", e); + return; + } catch (Exception e) { + logger.error("Exception in Discovery.Publisher.run():", e); + // grace period -- we don't want to get UEB upset at us + try { + Thread.sleep(15000); + } catch (InterruptedException e2) { + logger.error("Discovery.Publisher sleep interrupted"); + } + return; + } + } + } + } +} diff --git a/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/Events.java b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/Events.java new file mode 100644 index 00000000..176d39ac --- /dev/null +++ b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/Events.java @@ -0,0 +1,103 @@ +/* + * ============LICENSE_START======================================================= + * feature-server-pool + * ================================================================================ + * Copyright (C) 2020 AT&T Intellectual Property. All rights reserved. + * ================================================================================ + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============LICENSE_END========================================================= + */ + +package org.onap.policy.drools.serverpool; + +import java.util.Collection; +import java.util.Queue; +import java.util.concurrent.ConcurrentLinkedQueue; + +/** + * This interface is used to distribute notifications of various system + * events, such as new 'Server' instances, or a server failing. + */ +public interface Events { + // set of listeners receiving event notifications + static final Queue<Events> listeners = + new ConcurrentLinkedQueue<>(); + + /** + * add a listener to the set of listeners receiving events. + * + * @param handler the listener + */ + public static void register(Events handler) { + // if it is already here, remove it first + listeners.remove(handler); + + // add to the end of the queue + listeners.add(handler); + } + + /** + * remove a listener from the set of listeners. + */ + public static boolean unregister(Events handler) { + return listeners.remove(handler); + } + + public static Collection<Events> getListeners() { + return listeners; + } + + /* ============================================================ */ + + /** + * Notification that a new server has been discovered. + * + * @param server this is the new server + */ + public default void newServer(Server server) { + } + + /** + * Notification that a server has failed. + * + * @param server this is the server that failed + */ + public default void serverFailed(Server server) { + } + + /** + * Notification that a new lead server has been selected. + * + * @param server this is the new lead server + */ + public default void newLeader(Server server) { + } + + /** + * Notification that the lead server has gone down. + * + * @param server the lead server that failed + */ + public default void leaderFailed(Server server) { + } + + /** + * Notification that a new selection just completed, but the same + * leader has been chosen (this may be in response to a new server + * joining earlier). + * + * @param server the current leader, which has been confirmed + */ + public default void leaderConfirmed(Server server) { + } +} diff --git a/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/ExtendedObjectInputStream.java b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/ExtendedObjectInputStream.java new file mode 100644 index 00000000..5ec6f341 --- /dev/null +++ b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/ExtendedObjectInputStream.java @@ -0,0 +1,70 @@ +/* + * ============LICENSE_START======================================================= + * feature-server-pool + * ================================================================================ + * Copyright (C) 2020 AT&T Intellectual Property. All rights reserved. + * ================================================================================ + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============LICENSE_END========================================================= + */ + +package org.onap.policy.drools.serverpool; + +import java.io.IOException; +import java.io.InputStream; +import java.io.ObjectInputStream; +import java.io.ObjectStreamClass; + +/** + * This class provides an 'ObjectInputStream' variant that uses the + * specified 'ClassLoader' instance. + */ +public class ExtendedObjectInputStream extends ObjectInputStream { + // the 'ClassLoader' to use when doing class lookups + private ClassLoader classLoader; + + /** + * Constructor -- invoke the superclass, and save the 'ClassLoader'. + * + * @param in input stream to read from + * @param classLoader 'ClassLoader' to use when doing class lookups + */ + public ExtendedObjectInputStream(InputStream in, ClassLoader classLoader) throws IOException { + super(in); + this.classLoader = classLoader; + } + + /** + * {@inheritDoc} + */ + @Override + protected Class<?> resolveClass(ObjectStreamClass desc) throws IOException, ClassNotFoundException { + + // Standard ClassLoader implementations first attempt to load classes + // via the parent class loader, and then attempt to load it using the + // current class loader if that fails. For some reason, Drools container + // class loaders define a different order -- in theory, this is only a + // problem if different versions of the same class are accessible through + // different class loaders, which is exactly what happens in some Junit + // tests. + // + // This change restores the order, at least when deserializing objects + // into a Drools container. + try { + // try the parent class loader first + return classLoader.getParent().loadClass(desc.getName()); + } catch (ClassNotFoundException e) { + return classLoader.loadClass(desc.getName()); + } + } +} diff --git a/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/FeatureServerPool.java b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/FeatureServerPool.java new file mode 100644 index 00000000..748a38f3 --- /dev/null +++ b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/FeatureServerPool.java @@ -0,0 +1,986 @@ +/* + * ============LICENSE_START======================================================= + * feature-server-pool + * ================================================================================ + * Copyright (C) 2020 AT&T Intellectual Property. All rights reserved. + * ================================================================================ + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============LICENSE_END========================================================= + */ + +package org.onap.policy.drools.serverpool; + +import static org.onap.policy.drools.serverpool.ServerPoolProperties.BUCKET_DROOLS_TIMEOUT; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.BUCKET_TIME_TO_LIVE; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_BUCKET_DROOLS_TIMEOUT; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_BUCKET_TIME_TO_LIVE; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.getProperty; + +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.Serializable; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Base64; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Properties; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; + +import javax.ws.rs.client.Entity; +import javax.ws.rs.client.WebTarget; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; + +import lombok.AllArgsConstructor; + +import org.drools.core.definitions.InternalKnowledgePackage; +import org.drools.core.impl.KnowledgeBaseImpl; +import org.kie.api.runtime.KieSession; +import org.kie.api.runtime.rule.FactHandle; +import org.onap.policy.common.endpoints.event.comm.Topic.CommInfrastructure; +import org.onap.policy.common.endpoints.event.comm.TopicListener; +import org.onap.policy.common.utils.coder.CoderException; +import org.onap.policy.common.utils.coder.StandardCoder; +import org.onap.policy.common.utils.coder.StandardCoderObject; +import org.onap.policy.drools.control.api.DroolsPdpStateControlApi; +import org.onap.policy.drools.core.DroolsRunnable; +import org.onap.policy.drools.core.PolicyContainer; +import org.onap.policy.drools.core.PolicySession; +import org.onap.policy.drools.core.PolicySessionFeatureApi; +import org.onap.policy.drools.core.lock.PolicyResourceLockManager; +import org.onap.policy.drools.features.PolicyControllerFeatureApi; +import org.onap.policy.drools.features.PolicyEngineFeatureApi; +import org.onap.policy.drools.system.PolicyController; +import org.onap.policy.drools.system.PolicyControllerConstants; +import org.onap.policy.drools.system.PolicyEngine; +import org.onap.policy.drools.system.PolicyEngineConstants; +import org.onap.policy.drools.utils.Pair; +import org.onap.policy.drools.utils.PropertyUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * </p> + * This class hooks the server pool implementation into DroolsPDP. + * <dl> + * <dt>PolicyEngineFeatureApi</dt><dd> - the <i>afterStart</i> hook is where we initialize.</dd> + * <dt>PolicyControllerFeatureApi</dt><dd> - the <i>beforeOffer</i> hook is used to look + * at incoming topic messages, and decide whether to process them + * on this host, or forward to another host.</dd> + * </dl> + */ +public class FeatureServerPool + implements PolicyEngineFeatureApi, PolicySessionFeatureApi, + PolicyControllerFeatureApi, DroolsPdpStateControlApi { + private static Logger logger = + LoggerFactory.getLogger(FeatureServerPool.class); + + // used for JSON <-> String conversion + private static StandardCoder coder = new StandardCoder(); + + private static final String configFile = + "config/feature-server-pool.properties"; + + /* + * Properties used when searching for keyword entries + * + * The following types are supported: + * + * 1) keyword.<topic>.path=<field-list> + * 2) keyword.path=<field-list> + * 3) ueb.source.topics.<topic>.keyword=<field-list> + * 4) ueb.source.topics.keyword=<field-list> + * 5) dmaap.source.topics.<topic>.keyword=<field-list> + * 6) dmaap.source.topics.keyword=<field-list> + * + * 1, 3, and 5 are functionally equivalent + * 2, 4, and 6 are functionally equivalent + */ + + static final String KEYWORD_PROPERTY_START_1 = "keyword."; + static final String KEYWORD_PROPERTY_END_1 = ".path"; + static final String KEYWORD_PROPERTY_START_2 = "ueb.source.topics."; + static final String KEYWORD_PROPERTY_END_2 = ".keyword"; + static final String KEYWORD_PROPERTY_START_3 = "dmaap.source.topics."; + static final String KEYWORD_PROPERTY_END_3 = ".keyword"; + + /* + * maps topic names to a keyword table derived from <field-list> (above) + * + * Example <field-list>: requestID,CommonHeader.RequestID + * + * Table generated from this example has length 2: + * table[0] = {"requestID"} + * table[1] = {"CommonHeader", "RequestID"} + */ + private static HashMap<String,String[][]> topicToPaths = new HashMap<>(); + + // this table is used for any topics that aren't in 'topicToPaths' + private static String[][] defaultPaths = new String[0][]; + + // extracted from properties + private static long droolsTimeoutMillis; + private static String timeToLiveSecond; + + /******************************/ + /* 'OrderedService' interface */ + /******************************/ + + /** + * {@inheritDoc} + */ + @Override + public int getSequenceNumber() { + // we need to make sure we have an early position in 'selectThreadModel' + // (in case there is feature that provides a thread model) + return -1000000; + } + + /**************************************/ + /* 'PolicyEngineFeatureApi' interface */ + /**************************************/ + + /** + * {@inheritDoc} + */ + @Override + public boolean afterStart(PolicyEngine engine) { + logger.info("Starting FeatureServerPool"); + Server.startup(configFile); + TargetLock.startup(); + droolsTimeoutMillis = + getProperty(BUCKET_DROOLS_TIMEOUT, DEFAULT_BUCKET_DROOLS_TIMEOUT); + int intTimeToLive = + getProperty(BUCKET_TIME_TO_LIVE, DEFAULT_BUCKET_TIME_TO_LIVE); + timeToLiveSecond = String.valueOf(intTimeToLive); + buildKeywordTable(); + Bucket.Backup.register(new DroolsSessionBackup()); + Bucket.Backup.register(new TargetLock.LockBackup()); + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public PolicyResourceLockManager beforeCreateLockManager( + PolicyEngine engine, Properties properties) { + + return TargetLock.getLockFactory(); + } + + /*=====================================*/ + /* 'PolicySessionFeatureApi' interface */ + /*=====================================*/ + + /** + * {@inheritDoc} + */ + @Override + public boolean insertDrools( + final PolicySession session, final Object object) { + + final String keyword = Keyword.lookupKeyword(object); + if (keyword == null) { + // no keyword was found, so we process locally + KieSession kieSession = session.getKieSession(); + if (kieSession != null) { + kieSession.insert(object); + } + return true; + } + + /* + * 'keyword' determines the destination host, + * which may be local or remote + */ + Bucket.forwardAndProcess(keyword, new Bucket.Message() { + @Override + public void process() { + // if we reach this point, we process locally + KieSession kieSession = session.getKieSession(); + if (kieSession != null) { + kieSession.insert(object); + } + } + + @Override + public void sendToServer(Server server, int bucketNumber) { + // this object needs to sent to a remote host -- + // first, serialize the object + byte[] data = null; + try { + data = Util.serialize(object); + } catch (IOException e) { + logger.error("insertDrools: can't serialize object of {}", + object.getClass(), e); + return; + } + + // construct the message to insert remotely + Entity<String> entity = Entity.entity( + new String(Base64.getEncoder().encode(data), StandardCharsets.UTF_8), + MediaType.APPLICATION_OCTET_STREAM_TYPE); + server.post("session/insertDrools", entity, + new Server.PostResponse() { + @Override + public WebTarget webTarget(WebTarget webTarget) { + PolicyContainer pc = session.getPolicyContainer(); + String encodedSessionName = + pc.getGroupId() + ":" + pc.getArtifactId() + ":" + + session.getName(); + + return webTarget + .queryParam("keyword", keyword) + .queryParam("session", encodedSessionName) + .queryParam("bucket", bucketNumber) + .queryParam("ttl", timeToLiveSecond); + } + + @Override + public void response(Response response) { + logger.info("/session/insertDrools response code = {}", + response.getStatus()); + } + }); + } + }); + return true; + } + + /******************************************/ + /* 'PolicyControllerFeatureApi' interface */ + /******************************************/ + + /** + * This method is called from 'AggregatedPolicyController.onTopicEvent', + * and provides a way to intercept the message before it is decoded and + * delivered to a local Drools session. + * + * @param controller the PolicyController instance receiving the message + * @param protocol communication infrastructure type + * @param topic topic name + * @param event event message as a string + * @return 'false' if this event should be processed locally, or 'true' + * if the message has been forwarded to a remote host, so local + * processing should be bypassed + */ + @Override + public boolean beforeOffer(final PolicyController controller, + final CommInfrastructure protocol, + final String topic, + final String event) { + // choose the table, based upon the topic + String[][] table = topicToPaths.getOrDefault(topic, defaultPaths); + + // build a JSON object from the event + StandardCoderObject sco; + + try { + sco = coder.decode(event, StandardCoderObject.class); + } catch (CoderException e) { + return false; + } + String keyword = null; + + for (String[] path : table) { + /* + * Each entry in 'table' is a String[] containing an encoding + * of a possible keyword field. Suppose the value is 'a.b.c.d.e' -- + * 'path' would be encoded as 'String[] {"a", "b", "c", "d", "e"}' + */ + String fieldName = path[path.length - 1]; + String conversionFunctionName = null; + int index = fieldName.indexOf(':'); + + if (index > 0) { + conversionFunctionName = fieldName.substring(index + 1); + fieldName = fieldName.substring(0, index); + path = Arrays.copyOf(path, path.length); + path[path.length - 1] = fieldName; + } + keyword = sco.getString(path); + } + + if (keyword == null) { + // couldn't find any keywords -- just process this message locally + logger.warn("Can't locate bucket keyword within message"); + return false; + } + + /* + * build a message object implementing the 'Bucket.Message' interface -- + * it will be processed locally, forwarded, or queued based upon the + * current state. + */ + TopicMessage message = + new TopicMessage(keyword, controller, protocol, topic, event); + int bucketNumber = Bucket.bucketNumber(keyword); + if (Bucket.forward(bucketNumber, message)) { + // message was queued or forwarded -- abort local processing + return true; + } + + /* + * the bucket happens to be assigned to this server, and wasn't queued -- + * return 'false', so it will be processed locally + */ + logger.info("Keyword={}, bucket={} -- owned by this server", + keyword, bucketNumber); + return false; + } + + /** + * Incoming topic message has been forwarded from a remote host. + * + * @param bucketNumber the bucket number calculated on the remote host + * @param keyword the keyword associated with the message + * @param controllerName the controller the message was directed to + * on the remote host + * @param protocol String value of the 'Topic.CommInfrastructure' value + * (UEB, DMAAP, NOOP, or REST -- NOOP and REST shouldn't be used + * here) + * @param topic the UEB/DMAAP topic name + * @param event this is the JSON message + */ + static void topicMessage( + int bucketNumber, String keyword, String controllerName, + String protocol, String topic, String event) { + + // @formatter:off + logger.info("Incoming topic message: Keyword={}, bucket={}\n" + + " controller = {}\n" + + " topic = {}", + keyword, bucketNumber, controllerName, topic); + // @formatter:on + + // locate the 'PolicyController' + PolicyController controller = PolicyControllerConstants.getFactory().get(controllerName); + if (controller == null) { + /* + * This controller existed on the sender's host, but doesn't exist + * on the destination. This is a problem -- we are counting on all + * hosts being configured with the same controllers. + */ + logger.error("Can't locate controller '{}' for incoming topic message", + controllerName); + } else if (controller instanceof TopicListener) { + /* + * This is the destination host -- repeat the 'onTopicEvent' + * method (the one that invoked 'beforeOffer' on the originating host). + * Note that this message could be forwarded again if the sender's + * bucket table was somehow different from ours -- perhaps there was + * an update in progress. + * + * TBD: it would be nice to limit the number of hops, in case we + * somehow have a loop. + */ + ((TopicListener)controller).onTopicEvent( + CommInfrastructure.valueOf(protocol), topic, event); + } else { + /* + * This 'PolicyController' was also a 'TopicListener' on the sender's + * host -- it isn't on this host, and we are counting on them being + * config + */ + logger.error("Controller {} is not a TopicListener", controllerName); + } + } + + /** + * An incoming '/session/insertDrools' message was received. + * + * @param keyword the keyword associated with the incoming object + * @param sessionName encoded session name(groupId:artifactId:droolsSession) + * @param bucket the bucket associated with keyword + * @param ttl similar to IP time-to-live -- it controls the number of hops + * the message may take + * @param data base64-encoded serialized data for the object + */ + static void incomingInsertDrools( + String keyword, String sessionName, int bucket, int ttl, byte[] data) { + + logger.info("Incoming insertDrools: keyword={}, session={}, bucket={}, ttl={}", + keyword, sessionName, bucket, ttl); + + if (Bucket.isKeyOnThisServer(keyword)) { + // process locally + + // [0]="<groupId>" [1]="<artifactId>", [2]="<sessionName>" + String[] nameSegments = sessionName.split(":"); + + // locate the 'PolicyContainer' and 'PolicySession' + PolicySession policySession = locatePolicySession(nameSegments); + + if (policySession == null) { + logger.error("incomingInsertDrools: Can't find PolicySession={}", + sessionName); + } else { + KieSession kieSession = policySession.getKieSession(); + if (kieSession != null) { + try { + // deserialization needs to use the correct class loader + Object obj = Util.deserialize( + Base64.getDecoder().decode(data), + policySession.getPolicyContainer().getClassLoader()); + kieSession.insert(obj); + } catch (IOException | ClassNotFoundException + | IllegalArgumentException e) { + logger.error("incomingInsertDrools: failed to read data " + + "for session '{}'", sessionName, e); + } + } + } + } else if ((ttl -= 1) > 0) { + /* + * This host is not the intended destination -- this could happen + * if it was sent from another site. Forward the message in the + * same thread. + */ + forwardInsertDroolsMessage(bucket, keyword, sessionName, ttl, data); + } + } + + /** + * step through all 'PolicyContainer' instances looking + * for a matching 'artifactId' & 'groupId'. + * @param nameSegments name portion from sessionName + * @return policySession match artifactId and groupId + */ + private static PolicySession locatePolicySession(String[] nameSegments) { + PolicySession policySession = null; + if (nameSegments.length == 3) { + for (PolicyContainer pc : PolicyContainer.getPolicyContainers()) { + if (nameSegments[1].equals(pc.getArtifactId()) + && nameSegments[0].equals(pc.getGroupId())) { + policySession = pc.getPolicySession(nameSegments[2]); + break; + } + } + } + return policySession; + } + + /** + * Forward the insertDrools message in the same thread. + */ + private static void forwardInsertDroolsMessage(int bucket, String keyword, + String sessionName, int ttl, byte[] data) { + Server server = Bucket.bucketToServer(bucket); + WebTarget webTarget = server.getWebTarget("session/insertDrools"); + if (webTarget != null) { + logger.info("Forwarding 'session/insertDrools' " + + "(key={},session={},bucket={},ttl={})", + keyword, sessionName, bucket, ttl); + Entity<String> entity = + Entity.entity(new String(data, StandardCharsets.UTF_8), + MediaType.APPLICATION_OCTET_STREAM_TYPE); + webTarget + .queryParam("keyword", keyword) + .queryParam("session", sessionName) + .queryParam("bucket", bucket) + .queryParam("ttl", ttl) + .request().post(entity); + } + } + + /** + * This method builds the table that is used to locate the appropriate + * keywords within incoming JSON messages (e.g. 'requestID'). The + * associated values are then mapped into bucket numbers. + */ + private static void buildKeywordTable() { + Properties prop = ServerPoolProperties.getProperties(); + + // iterate over all of the properties, picking out those we are + // interested in + for (String name : prop.stringPropertyNames()) { + String topic = null; + String begin; + String end; + + if (name.startsWith(KEYWORD_PROPERTY_START_1) + && name.endsWith(KEYWORD_PROPERTY_END_1)) { + // 1) keyword.<topic>.path=<field-list> + // 2) keyword.path=<field-list> + begin = KEYWORD_PROPERTY_START_1; + end = KEYWORD_PROPERTY_END_1; + } else if (name.startsWith(KEYWORD_PROPERTY_START_2) + && name.endsWith(KEYWORD_PROPERTY_END_2)) { + // 3) ueb.source.topics.<topic>.keyword=<field-list> + // 4) ueb.source.topics.keyword=<field-list> + begin = KEYWORD_PROPERTY_START_2; + end = KEYWORD_PROPERTY_END_2; + } else if (name.startsWith(KEYWORD_PROPERTY_START_3) + && name.endsWith(KEYWORD_PROPERTY_END_3)) { + // 5) dmaap.source.topics.<topic>.keyword=<field-list> + // 6) dmaap.source.topics.keyword=<field-list> + begin = KEYWORD_PROPERTY_START_3; + end = KEYWORD_PROPERTY_END_3; + } else { + // we aren't interested in this property + continue; + } + + int beginIndex = begin.length(); + int endIndex = name.length() - end.length(); + if (beginIndex < endIndex) { + // <topic> is specified, so this table is limited to this + // specific topic + topic = name.substring(beginIndex, endIndex); + } + + // now, process the value + // Example: requestID,CommonHeader.RequestID + String[] commaSeparatedEntries = prop.getProperty(name).split(","); + String[][] paths = new String[commaSeparatedEntries.length][]; + for (int i = 0 ; i < commaSeparatedEntries.length ; i += 1) { + paths[i] = commaSeparatedEntries[i].split("\\."); + } + + if (topic == null) { + // these paths are used for any topics not explicitly + // in the 'topicToPaths' table + defaultPaths = paths; + } else { + // these paths are specific to 'topic' + topicToPaths.put(topic, paths); + } + } + } + + /*======================================*/ + /* 'DroolsPdpStateControlApi' interface */ + /*======================================*/ + + /* + * Stop the processing of messages and server pool participation(non-Javadoc) + * Note: This is not static because it should only be used if feature-server-pool + * has been enabled. + * (non-Javadoc) + * @see org.onap.policy.drools.control.api.DroolsPdpStateControlApi#shutdown() + */ + @Override + public void shutdown() { + PolicyEngineConstants.getManager().deactivate(); + Server.shutdown(); + } + + /* + * Stop the processing of messages and server pool participation(non-Javadoc) + * Note: This is not static because it should only be used if feature-server-pool + * has been enabled. + * (non-Javadoc) + * @see org.onap.policy.drools.control.api.DroolsPdpStateControlApi#restart() + */ + @Override + public void restart() { + MainLoop.startThread(); + Discovery.startDiscovery(); + PolicyEngineConstants.getManager().activate(); + } + + /* ============================================================ */ + + /** + * This class implements the 'Bucket.Message' interface for UEB/DMAAP + * messages. + */ + @AllArgsConstructor + private static class TopicMessage implements Bucket.Message { + /* + * the keyword associated with this message + * (which determines the bucket number). + */ + private final String keyword; + + // the controller receiving this message + private final PolicyController controller; + + // enumeration: UEB or DMAAP + private final CommInfrastructure protocol; + + // UEB/DMAAP topic + private final String topic; + + // JSON message as a String + private final String event; + + /** + * Process this message locally using 'TopicListener.onTopicEvent' + * (the 'PolicyController' instance is assumed to implement + * the 'TopicListener' interface as well). + */ + @Override + public void process() { + if (controller instanceof TopicListener) { + /* + * This is the destination host -- repeat the 'onTopicEvent' method + * (the one that invoked 'beforeOffer' on the originating host). + * Note that this message could be forwarded again if the sender's + * bucket table was somehow different from ours -- perhaps there was + * an update in progress. + * + * TBD: it would be nice to limit the number of hops, in case we + * somehow have a loop. + */ + ((TopicListener)controller).onTopicEvent(protocol, topic, event); + } else { + /* + * This 'PolicyController' was also a 'TopicListener' on the sender's + * host -- it isn't on this host, and we are counting on them being + * configured the same way. + */ + logger.error("Controller {} is not a TopicListener", + controller.getName()); + } + } + + /** + * Send this message to a remote server for processing (presumably, it + * is the destination host). + * + * @param server the Server instance to send the message to + * @param bucketNumber the bucket number to send it to + */ + @Override + public void sendToServer(Server server, int bucketNumber) { + // if we reach this point, we have determined the remote server + // that should process this message + + // @formatter:off + logger.info("Outgoing topic message: Keyword={}, bucket={}\n" + + " controller = {}" + + " topic = {}" + + " sender = {}" + + " receiver = {}", + keyword, bucketNumber, controller.getName(), topic, + Server.getThisServer().getUuid(), server.getUuid()); + // @formatter:on + + Entity<String> entity = Entity.entity(event, MediaType.APPLICATION_JSON); + server.post("bucket/topic", entity, new Server.PostResponse() { + @Override + public WebTarget webTarget(WebTarget webTarget) { + return webTarget + .queryParam("bucket", bucketNumber) + .queryParam("keyword", keyword) + .queryParam("controller", controller.getName()) + .queryParam("protocol", protocol.toString()) + .queryParam("topic", topic); + } + + @Override + public void response(Response response) { + // TODO: eventually, we will want to do something different + // based upon success/failure + } + }); + } + } + + /* ============================================================ */ + + /** + * Backup data associated with a Drools session. + */ + static class DroolsSessionBackup implements Bucket.Backup { + /** + * {@inheritDoc} + */ + @Override + public Bucket.Restore generate(int bucketNumber) { + // Go through all of the Drools sessions, and generate backup data. + // If there is no data to backup for this bucket, return 'null' + + DroolsSessionRestore restore = new DroolsSessionRestore(); + return restore.backup(bucketNumber) ? restore : null; + } + } + + /* ============================================================ */ + + /** + * This class is used to generate and restore backup Drools data. + */ + static class DroolsSessionRestore implements Bucket.Restore, Serializable { + // backup data for all Drools sessions on this host + private final List<SingleSession> sessions = new LinkedList<>(); + + /** + * {@inheritDoc} + */ + boolean backup(int bucketNumber) { + /* + * There may be multiple Drools sessions being backed up at the same + * time. There is one 'Pair' in the list for each session being + * backed up. + */ + LinkedList<Pair<CompletableFuture<List<Object>>, PolicySession>> + pendingData = new LinkedList<>(); + for (PolicyContainer pc : PolicyContainer.getPolicyContainers()) { + for (PolicySession session : pc.getPolicySessions()) { + // Wraps list of objects, to be populated in the session + final CompletableFuture<List<Object>> droolsObjectsWrapper = + new CompletableFuture<>(); + + // 'KieSessionObject' + final KieSession kieSession = session.getKieSession(); + + logger.info("{}: about to fetch data for session {}", + this, session.getFullName()); + kieSession.insert(new DroolsRunnable() { + @Override + public void run() { + List<Object> droolsObjects = new ArrayList<>(); + for (FactHandle fh : kieSession.getFactHandles()) { + Object obj = kieSession.getObject(fh); + String keyword = Keyword.lookupKeyword(obj); + if (keyword != null + && Bucket.bucketNumber(keyword) == bucketNumber) { + // bucket matches -- include this object + droolsObjects.add(obj); + /* + * delete this factHandle from Drools memory + * this classes are used in bucket migration, + * so the delete is intentional. + */ + kieSession.delete(fh); + } + } + + // send notification that object list is complete + droolsObjectsWrapper.complete(droolsObjects); + } + }); + + // add pending operation to the list + pendingData.add(new Pair<>(droolsObjectsWrapper, session)); + } + } + + /** + * data copying can start as soon as we receive results + * from pending sessions (there may not be any) + */ + copyDataFromSession(pendingData); + return !sessions.isEmpty(); + } + + /** + * Copy data from pending sessions. + * @param pendingData a list of policy sessions + */ + private void copyDataFromSession(List<Pair<CompletableFuture<List<Object>>, PolicySession>> + pendingData) { + long endTime = System.currentTimeMillis() + droolsTimeoutMillis; + + for (Pair<CompletableFuture<List<Object>>, PolicySession> pair : + pendingData) { + PolicySession session = pair.second(); + long delay = endTime - System.currentTimeMillis(); + if (delay < 0) { + /** + * we have already reached the time limit, so we will + * only process data that has already been received + */ + delay = 0; + } + try { + List<Object> droolsObjects = + pair.first().get(delay, TimeUnit.MILLISECONDS); + + // if we reach this point, session data read has completed + logger.info("{}: session={}, got {} object(s)", + this, session.getFullName(), + droolsObjects.size()); + if (!droolsObjects.isEmpty()) { + sessions.add(new SingleSession(session, droolsObjects)); + } + } catch (TimeoutException e) { + logger.error("{}: Timeout waiting for data from session {}", + this, session.getFullName()); + } catch (Exception e) { + logger.error("{}: Exception writing output data", this, e); + } + } + } + + /** + * {@inheritDoc} + */ + @Override + public void restore(int bucketNumber) { + /* + * There may be multiple Drools sessions being restored at the same + * time. There is one entry in 'sessionLatches' for each session + * being restored. + */ + LinkedList<CountDownLatch> sessionLatches = new LinkedList<>(); + for (SingleSession session : sessions) { + try { + CountDownLatch sessionLatch = session.restore(); + if (sessionLatch != null) { + // there is a restore in progress -- add it to the list + sessionLatches.add(sessionLatch); + } + } catch (IOException | ClassNotFoundException e) { + logger.error("Exception in {}", this, e); + } + } + + // wait for all sessions to be updated + try { + for (CountDownLatch sessionLatch : sessionLatches) { + if (!sessionLatch.await(droolsTimeoutMillis, TimeUnit.MILLISECONDS)) { + logger.error("{}: timed out waiting for session latch", this); + } + } + } catch (InterruptedException e) { + logger.error("Exception in {}", this, e); + } + } + } + + /* ============================================================ */ + + /** + * Each instance of this class corresponds to a Drools session that has + * been backed up, or is being restored. + */ + static class SingleSession implements Serializable { + // the group id associated with the Drools container + String groupId; + + // the artifact id associated with the Drools container + String artifactId; + + // the session name within the Drools container + String sessionName; + + // serialized data associated with this session (and bucket) + byte[] data; + + /** + * Constructor - initialize the 'SingleSession' instance, so it can + * be serialized. + * + * @param session the Drools session being backed up + * @param droolsObjects the Drools objects from this session associated + * with the bucket currently being backed up + */ + SingleSession(PolicySession session, List<Object> droolsObjects) throws IOException { + // 'groupId' and 'artifactId' are set from the 'PolicyContainer' + PolicyContainer pc = session.getPolicyContainer(); + groupId = pc.getGroupId(); + artifactId = pc.getArtifactId(); + + // 'sessionName' is set from the 'PolicySession' + sessionName = session.getName(); + + /* + * serialize the Drools objects -- we serialize them here, because they + * need to be deserialized within the scope of the Drools session + */ + data = Util.serialize(droolsObjects); + } + + CountDownLatch restore() throws IOException, ClassNotFoundException { + PolicySession session = null; + + // locate the 'PolicyContainer', and 'PolicySession' + for (PolicyContainer pc : PolicyContainer.getPolicyContainers()) { + if (artifactId.equals(pc.getArtifactId()) + && groupId.equals(pc.getGroupId())) { + session = pc.getPolicySession(sessionName); + return insertSessionData(session, new ByteArrayInputStream(data)); + } + } + logger.error("{}: unable to locate session name {}", this, sessionName); + return null; + } + + /** + * Deserialize session data, and insert the objects into the session + * from within the Drools session thread. + * + * @param session the associated PolicySession instance + * @param bis the data to be deserialized + * @return a CountDownLatch, which will indicate when the operation has + * completed (null in case of failure) + * @throws IOException IO errors while creating or reading from + * the object stream + * @throws ClassNotFoundException class not found during deserialization + */ + private CountDownLatch insertSessionData(PolicySession session, ByteArrayInputStream bis) + throws IOException, ClassNotFoundException { + ClassLoader classLoader = session.getPolicyContainer().getClassLoader(); + ExtendedObjectInputStream ois = + new ExtendedObjectInputStream(bis, classLoader); + + /* + * associate the current thread with the session, + * and deserialize + */ + session.setPolicySession(); + Object obj = ois.readObject(); + + if (obj instanceof List) { + final List<?> droolsObjects = (List<?>)obj; + logger.info("{}: session={}, got {} object(s)", + this, session.getFullName(), droolsObjects.size()); + + // signal when session update is complete + final CountDownLatch sessionLatch = new CountDownLatch(1); + + // 'KieSession' object + final KieSession kieSession = session.getKieSession(); + + // run the following within the Drools session thread + kieSession.insert(new DroolsRunnable() { + @Override + public void run() { + try { + /* + * Insert all of the objects -- note that this is running + * in the session thread, so no other rules can fire + * until all of the objects are inserted. + */ + for (Object obj : droolsObjects) { + kieSession.insert(obj); + } + } finally { + // send notification that the inserts have completed + sessionLatch.countDown(); + } + } + }); + return sessionLatch; + } else { + logger.error("{}: Invalid session data for session={}, type={}", + this, session.getFullName(), obj.getClass().getName()); + } + return null; + } + } +} diff --git a/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/Keyword.java b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/Keyword.java new file mode 100644 index 00000000..6c88ebd0 --- /dev/null +++ b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/Keyword.java @@ -0,0 +1,507 @@ +/* + * ============LICENSE_START======================================================= + * feature-server-pool + * ================================================================================ + * Copyright (C) 2020 AT&T Intellectual Property. All rights reserved. + * ================================================================================ + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============LICENSE_END========================================================= + */ + +package org.onap.policy.drools.serverpool; + +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Function; + +import lombok.AllArgsConstructor; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class supports the lookup of keywords from objects within Drools + * sessions. It maps the class of the object into an object implementing + * the 'Keyword.Lookup' interface. At present, this requires writing + * special code for each class that can exist in a Drools session that is + * assignable and relocatable via a bucket. In theory, it would be possible + * to populate this table through properties, which would use the reflective + * interface, and indicate the methods and fields to use to do this lookup. + */ +public class Keyword { + private static Logger logger = LoggerFactory.getLogger(Keyword.class); + + // this table can be used to map an object class into the method + // to invoke to do the lookup + private static ConcurrentHashMap<Class, Lookup> classToLookup = + new ConcurrentHashMap<>(); + + // this is a pre-defined 'Lookup' instance that always returns 'null' + private static Lookup nullLookup = new Lookup() { + @Override + public String getKeyword(Object obj) { + return null; + } + }; + + /** + * This method takes the object's class, looks it up in the 'classToLookup' + * table, and then performs the lookup to get the keyword. When a direct + * lookup on a class fails, it will attempt to find a match using inheritance + * rules -- if an appropriate match is found, the 'classToLookup' table is + * updated, so it will be easier next time. If no match is found, the table + * is also updated, but the 'value' will be 'nullLookup'. + * + * @param obj object to to the lookup on + * @return a String keyword, if found; 'null' if not + */ + public static String lookupKeyword(Object obj) { + Lookup lu = classToLookup.get(obj.getClass()); + if (lu != null) { + return lu.getKeyword(obj); + } + // no entry for this class yet -- + // try to locate a matching entry using 'inheritance' rules + Class<?> thisClass = obj.getClass(); + Class<?> matchingClass = null; + for (Map.Entry<Class, Lookup> entry : classToLookup.entrySet()) { + if (entry.getKey().isAssignableFrom(thisClass) + && (matchingClass == null + || matchingClass.isAssignableFrom(entry.getKey()))) { + // we have a first match, or a more specific match + matchingClass = entry.getKey(); + lu = entry.getValue(); + } + } + + /* + * whether we found a match or not, update the table accordingly + * no match found -- see if the 'keyword.<CLASS-NAME>.lookup' + * properties can provide a solution. + */ + if (lu == null && (lu = buildReflectiveLookup(thisClass)) == null) { + lu = nullLookup; + } + + // update table + classToLookup.put(thisClass, lu); + return lu.getKeyword(obj); + } + + /** + * explicitly place an entry in the table. + * + * @param clazz the class to do the lookup on + * @param handler an instance implementing the 'Lookup' interface, + * can handle instances of type 'clazz' + */ + public static void setLookupHandler(Class<?> clazz, Lookup handler) { + classToLookup.put(clazz, handler); + } + + /* ============================================================ */ + + /** + * These are the interface that must be implemented by objects in the + * 'classToLookup' table. + */ + public interface Lookup { + /** + * Map the object into a keyword string. + * + * @param obj the object to lookup, which should be an instance of the + * associated class in the 'classToLookup' table + * @return the keyword, if found; 'null' if not + */ + public String getKeyword(Object obj); + } + + /* ============================================================ */ + + // this table maps class name to a sequence of method calls and field + // references, based upon 'keyword.<CLASS-NAME>.lookup' entries found + // in the property list + private static Map<String,String> classNameToSequence = null; + + static final String KEYWORD_PROPERTY_START = "keyword."; + static final String KEYWORD_PROPERTY_END = ".lookup"; + + /** + * Attempt to build a 'Lookup' instance for a particular class based upon + * properties. + * + * @param clazz the class to build an entry for + * @return a 'Lookup' instance to do the lookup, or 'null' if one can't + * be generated from the available properties + */ + private static synchronized Lookup buildReflectiveLookup(Class<?> clazz) { + if (classNameToSequence == null) { + classNameToSequence = new HashMap<>(); + Properties prop = ServerPoolProperties.getProperties(); + + /* + * iterate over all of the properties, picking out those + * that match the name 'keyword.<CLASS-NAME>.lookup' + */ + for (String name : prop.stringPropertyNames()) { + if (name.startsWith(KEYWORD_PROPERTY_START) + && name.endsWith(KEYWORD_PROPERTY_END)) { + // this property matches -- locate the '<CLASS-NAME>' part + int beginIndex = KEYWORD_PROPERTY_START.length(); + int endIndex = name.length() + - KEYWORD_PROPERTY_END.length(); + if (beginIndex < endIndex) { + // add it to the table + classNameToSequence.put(name.substring(beginIndex, endIndex), + prop.getProperty(name)); + } + } + } + } + + return lookupClassByName(classNameToSequence, clazz); + } + + /** + * Look for the "best match" for class 'clazz' in the hash table. + * First, look for the name of 'clazz' itself, followed by all of + * interfaces. If no match is found, repeat with the superclass, + * and all the way up the superclass chain. + */ + private static Lookup lookupClassByName(Map<String, String> classNameToSequence, + Class<?> clazz) { + Class<?> keyClass = null; + for (Class<?> cl = clazz ; cl != null ; cl = cl.getSuperclass()) { + if (classNameToSequence.containsKey(cl.getName())) { + // matches the class + keyClass = cl; + break; + } + for (Class<?> intf : cl.getInterfaces()) { + if (classNameToSequence.containsKey(intf.getName())) { + // matches one of the interfaces + keyClass = intf; + break; + } + // interface can have superclass + for (Class<?> cla = clazz; cla != null; cla = intf.getSuperclass()) { + if (classNameToSequence.containsKey(cla.getName())) { + // matches the class + keyClass = cla; + break; + } + } + } + if (keyClass != null) { + break; + } + } + + if (keyClass == null) { + // no matching class name found + return null; + } + // we found a matching key in the table -- now, process the values + Class<?> currentClass = keyClass; + + /** + * there may potentially be a chain of entries if multiple + * field and/or method calls are in the sequence -- this is the first + */ + ReflectiveLookup first = null; + + // this is the last entry in the list + ReflectiveLookup last = null; + + /** + * split the value into segments, where each segment has the form + * 'FIELD-NAME' or 'METHOD-NAME()', with an optional ':CONVERSION' + * at the end + */ + String sequence = classNameToSequence.get(keyClass.getName()); + ConversionFunctionLookup conversionFunctionLookup = null; + int index = sequence.indexOf(':'); + if (index >= 0) { + // conversion function specified + conversionFunctionLookup = + new ConversionFunctionLookup(sequence.substring(index + 1)); + sequence = sequence.substring(0, index); + } + for (String segment : sequence.split("\\.")) { + ReflectiveLookup current = null; + ReflectiveOperationException error = null; + try { + if (segment.endsWith("()")) { + // this segment is a method lookup + current = new MethodLookup(currentClass, + segment.substring(0, segment.length() - 2)); + } else { + // this segment is a field lookup + current = new FieldLookup(currentClass, segment); + } + } catch (ReflectiveOperationException e) { + // presumably the field or method does not exist in this class + error = e; + } + if (current == null) { + logger.error("Keyword.buildReflectiveLookup: build error " + + "(class={},value={},segment={})", + clazz.getName(), + classNameToSequence.get(keyClass.getName()), + segment, + error); + return null; + } + + // if we reach this point, we processed this segment successfully + currentClass = current.nextClass(); + if (first == null) { + // the initial segment + first = current; + } else { + // link to the most recently created segment + last.next = current; + } + // update most recently created segment + last = current; + } + + // add optional conversion function ('null' if it doesn't exist) + last.next = conversionFunctionLookup; + + // successful - return the first 'Lookup' instance in the chain + return first; + } + + /* ============================================================ */ + + /** + * Abstract superclass of 'FieldLookup' and 'MethodLookup'. + */ + private abstract static class ReflectiveLookup implements Lookup { + // link to the next 'Lookup' instance in the chain + Lookup next = null; + + /** + * Return the next 'class' instance. + * + * @return the class associated with the return value of the + * field or method lookup + */ + abstract Class<?> nextClass(); + } + + /* ============================================================ */ + + /** + * This class is used to do a field lookup. + */ + private static class FieldLookup extends ReflectiveLookup { + // the reflective 'Field' instance associated with this lookup + Field field; + + /** + * Constructor. + * + * @param clazz the 'class' we are doing the field lookup on + * @param segment a segment from the property value, which is just the + * field name + */ + FieldLookup(Class<?> clazz, String segment) throws NoSuchFieldException { + field = clazz.getField(segment); + } + + /********************************/ + /* 'ReflectiveLookup' interface */ + /********************************/ + + /** + * {@inheritDoc} + */ + @Override + Class<?> nextClass() { + return field.getType(); + } + + /**********************/ + /* 'Lookup' interface */ + /**********************/ + + /** + * {@inheritDoc} + */ + @Override + public String getKeyword(Object obj) { + try { + // do the field lookup + Object rval = field.get(obj); + if (rval == null) { + return null; + } + + // If there is no 'next' entry specified, this value is the + // keyword. Otherwise, move on to the next 'Lookup' entry in + // the chain. + return next == null ? rval.toString() : next.getKeyword(rval); + } catch (Exception e) { + logger.error("Keyword.FieldLookup error: field={}", + field, e); + return null; + } + } + } + + /* ============================================================ */ + + /** + * This class is used to do a method call on the target object. + */ + private static class MethodLookup extends ReflectiveLookup { + // the reflective 'Method' instance associated with this lookup + Method method; + + /** + * Constructor. + * + * @param clazz the 'class' we are doing the method lookup on + * @param name a method name extracted from a segment from the + * property value, which is the + */ + MethodLookup(Class<?> clazz, String name) throws NoSuchMethodException { + method = clazz.getMethod(name); + } + + /*==============================*/ + /* 'ReflectiveLookup' interface */ + /*==============================*/ + + /** + * {@inheritDoc} + */ + @Override + Class<?> nextClass() { + return method.getReturnType(); + } + + /*====================*/ + /* 'Lookup' interface */ + /*====================*/ + + /** + * {@inheritDoc} + */ + @Override + public String getKeyword(Object obj) { + try { + // do the method call + Object rval = method.invoke(obj); + if (rval == null) { + return null; + } + + // If there is no 'next' entry specified, this value is the + // keyword. Otherwise, move on to the next 'Lookup' entry in + // the chain. + return next == null ? rval.toString() : next.getKeyword(rval); + } catch (Exception e) { + logger.error("Keyword.MethodLookup error: method={}", + method, e); + return null; + } + } + } + + /* ============================================================ */ + + /* + * Support for named "conversion functions", which take an input keyword, + * and return a possibly different keyword derived from it. The initial + * need is to take a string which consists of a UUID and a suffix, and + * return the base UUID. + */ + + // used to lookup optional conversion functions + private static Map<String, Function<String, String>> conversionFunction = + new ConcurrentHashMap<>(); + + // conversion function 'uuid': + // truncate strings to 36 characters(uuid length) + static final int UUID_LENGTH = 36; + + static { + conversionFunction.put("uuid", new Function<String, String>() { + @Override + public String apply(String value) { + // truncate strings to 36 characters + return value != null && value.length() > UUID_LENGTH + ? value.substring(0, UUID_LENGTH) : value; + } + }); + } + + /** + * Add a conversion function. + * + * @param name the conversion function name + * @param function the object that does the transformation + */ + public static void addConversionFunction(String name, Function<String, String> function) { + conversionFunction.put(name, function); + } + + /** + * Apply a named conversion function to a keyword. + * + * @param inputKeyword this is the keyword extracted from a message or object + * @param functionName this is the name of the conversion function to apply + * (if 'null', no conversion is done) + * @return the converted keyword + */ + public static String convertKeyword(String inputKeyword, String functionName) { + if (functionName == null || inputKeyword == null) { + // don't do any conversion -- just return the input keyword + return inputKeyword; + } + + // look up the function + Function<String, String> function = conversionFunction.get(functionName); + if (function == null) { + logger.error("{}: conversion function not found", functionName); + return null; + } + + // call the conversion function, and return the value + return function.apply(inputKeyword); + } + + /** + * This class is used to invoke a conversion function. + */ + @AllArgsConstructor + private static class ConversionFunctionLookup implements Lookup { + // the conversion function name + private final String functionName; + + /** + * {@inheritDoc} + */ + @Override + public String getKeyword(Object obj) { + return obj == null ? null : convertKeyword(obj.toString(), functionName); + } + } +} diff --git a/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/Leader.java b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/Leader.java new file mode 100644 index 00000000..9d864bd7 --- /dev/null +++ b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/Leader.java @@ -0,0 +1,573 @@ +/* + * ============LICENSE_START======================================================= + * feature-server-pool + * ================================================================================ + * Copyright (C) 2020 AT&T Intellectual Property. All rights reserved. + * ================================================================================ + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============LICENSE_END========================================================= + */ + +package org.onap.policy.drools.serverpool; + +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_LEADER_STABLE_IDLE_CYCLES; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_LEADER_STABLE_VOTING_CYCLES; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.LEADER_STABLE_IDLE_CYCLES; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.LEADER_STABLE_VOTING_CYCLES; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.getProperty; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.nio.charset.StandardCharsets; +import java.util.Base64; +import java.util.HashSet; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.UUID; + +import javax.ws.rs.client.Entity; +import javax.ws.rs.core.MediaType; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class handles the election of the lead server. The lead server + * handles bucket assignments, and also is the server running the + * 'Discovery' procedure long-term (other servers do run the procedure + * until a leader is elected). + * Note that everything in this class is run under the 'MainLoop' thread, + * with the exception of the invocation and first two statements of the + * 'voteData' method. + */ +class Leader { + private static Logger logger = LoggerFactory.getLogger(Leader.class); + + // Listener class to handle state changes that may lead to a new election + private static EventHandler eventHandler = new EventHandler(); + + static { + Events.register(eventHandler); + } + + // Server currently in the leader roll + private static Server leader = null; + + // Vote state machine -- it is null, unless a vote is in progress + private static VoteCycle voteCycle = null; + + private static UUID emptyUUID = new UUID(0L, 0L); + + /*==================================================*/ + /* Some properties extracted at initialization time */ + /*==================================================*/ + + // how many cycles of stability before voting starts + private static int stableIdleCycles; + + // how may cycles of stability before declaring a winner + private static int stableVotingCycles; + + /** + * Invoked at startup, or after some events -- immediately start a new vote. + */ + static void startup() { + // fetch some static properties + stableIdleCycles = getProperty(LEADER_STABLE_IDLE_CYCLES, + DEFAULT_LEADER_STABLE_IDLE_CYCLES); + stableVotingCycles = getProperty(LEADER_STABLE_VOTING_CYCLES, + DEFAULT_LEADER_STABLE_VOTING_CYCLES); + + startVoting(); + } + + /** + * start, or restart voting. + */ + private static void startVoting() { + if (voteCycle == null) { + voteCycle = new VoteCycle(); + MainLoop.addBackgroundWork(voteCycle); + } else { + voteCycle.serverChanged(); + } + } + + /** + * Return the current leader. + * + * @return the current leader ('null' if none has been selected) + */ + public static Server getLeader() { + return leader; + } + + /** + * Handle an incoming /vote REST message. + * + * @param data base64-encoded data, containing vote data + */ + static void voteData(byte[] data) { + // decode base64 data + final byte[] packet = Base64.getDecoder().decode(data); + + MainLoop.queueWork(new Runnable() { + /** + * This method is running within the 'MainLoop' thread. + */ + @Override + public void run() { + // create the 'VoteCycle' state machine, if needed + if (voteCycle == null) { + voteCycle = new VoteCycle(); + MainLoop.addBackgroundWork(voteCycle); + } + try { + // pass data to 'VoteCycle' state machine + voteCycle.packetReceived(packet); + } catch (IOException e) { + logger.error("Exception in 'Leader.voteData", e); + } + } + }); + } + + /* ============================================================ */ + + /** + * There is a single instance of this class (Leader.eventHandler), which + * is registered to listen for notifications of state transitions. Note + * that all of these methods are running within the 'MainLoop' thread. + */ + private static class EventHandler implements Events { + /** + * {@inheritDoc} + */ + @Override + public void newServer(Server server) { + // a new server has joined -- start/restart the VoteCycle state machine + startVoting(); + } + + /** + * {@inheritDoc} + */ + @Override + public void serverFailed(Server server) { + if (server == leader) { + // the lead server has failed -- + // start/restart the VoteCycle state machine + leader = null; + startVoting(); + + // send out a notification that the lead server has failed + for (Events listener : Events.getListeners()) { + listener.leaderFailed(server); + } + } else if (voteCycle != null) { + // a vote is in progress -- restart the state machine + // (don't do anything if there is no vote in progress) + voteCycle.serverChanged(); + } + } + } + + /* ============================================================ */ + + /** + * This is the 'VoteCycle' state machine -- it runs as background work + * on the 'MainLoop' thread, and goes away when a leader is elected. + */ + private static class VoteCycle implements Runnable { + enum State { + // server just started up -- 5 second grace period + STARTUP, + + // voting in progress -- changes have occurred in the last cycle + VOTING, + } + + // maps UUID voted for into the associated data + private final TreeMap<UUID, VoteData> uuidToVoteData = + new TreeMap<>(Util.uuidComparator); + + // maps voter UUID into the associated data + private final TreeMap<UUID, VoterData> uuidToVoterData = + new TreeMap<>(Util.uuidComparator); + + // sorted list of 'VoteData' (most preferable to least) + private final TreeSet<VoteData> voteData = new TreeSet<>(); + + // data to send out next cycle + private final HashSet<VoterData> updatedVotes = new HashSet<>(); + + private State state = State.STARTUP; + private int cycleCount = stableIdleCycles; + + /** + * Constructor - if there is no leader, or this server is the leader, + * start the 'Discovery' thread. + */ + VoteCycle() { + if (leader == null || leader == Server.getThisServer()) { + Discovery.startDiscovery(); + } + } + + /** + * A state change has occurred that invalidates any vote in progress -- + * restart the VoteCycle state machine. + */ + void serverChanged() { + // clear all of the tables + uuidToVoteData.clear(); + uuidToVoterData.clear(); + voteData.clear(); + updatedVotes.clear(); + + // wait for things to stabilize before continuing + state = State.STARTUP; + cycleCount = stableIdleCycles; + } + + /** + * {@inheritDoc} + */ + @Override + public void run() { + switch (state) { + case STARTUP: { + // 5-second grace period -- wait for things to stablize before + // starting the vote + if ((cycleCount -= 1) <= 0) { + logger.info("VoteCycle: {} seconds have passed", + stableIdleCycles); + //MainLoop.removeBackgroundWork(this); + updateMyVote(); + sendOutUpdates(); + state = State.VOTING; + cycleCount = stableVotingCycles; + } + break; + } + + case VOTING: { + // need to be in the VOTING state without any vote changes + // for 5 seconds -- once this happens, the leader is chosen + if (sendOutUpdates()) { + // changes have occurred -- set the grace period to 5 seconds + cycleCount = stableVotingCycles; + } else if ((cycleCount -= 1) <= 0) { + // 5 second grace period has passed -- the leader is one with + // the most votes, which is the first entry in 'voteData' + Server oldLeader = leader; + leader = Server.getServer(voteData.first().uuid); + if (leader != oldLeader) { + // the leader has changed -- send out notifications + for (Events listener : Events.getListeners()) { + listener.newLeader(leader); + } + } else { + // the election is over, and the leader has been confirmed + for (Events listener : Events.getListeners()) { + listener.leaderConfirmed(leader); + } + } + if (leader == Server.getThisServer()) { + // this is the lead server -- + // make sure the 'Discovery' threads are running + Discovery.startDiscovery(); + } else { + // this is not the lead server -- stop 'Discovery' threads + Discovery.stopDiscovery(); + } + + // we are done with voting -- clean up, and report results + MainLoop.removeBackgroundWork(this); + voteCycle = null; + + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(bos); + + out.println("Voting results:"); + + // x(36) xxxxx x(36) + // UUID Votes Voter + String format = "%-36s %5s %-36s\n"; + out.format(format, "UUID", "Votes", "Voter(s)"); + out.format(format, "----", "-----", "--------"); + + for (VoteData vote : voteData) { + if (vote.voters.isEmpty()) { + out.format(format, vote.uuid, 0, ""); + } else { + boolean headerNeeded = true; + for (VoterData voter : vote.voters) { + if (headerNeeded) { + out.format(format, vote.uuid, + vote.voters.size(), voter.uuid); + headerNeeded = false; + } else { + out.format(format, "", "", voter.uuid); + } + } + } + } + + logger.info(bos.toString()); + } + break; + } + default: + logger.error("Unknown state: {}", state); + break; + } + } + + /** + * Process an incoming /vote REST message. + * + * @param packet vote data, containing one or more votes + */ + private void packetReceived(byte[] packet) throws IOException { + DataInputStream dis = + new DataInputStream(new ByteArrayInputStream(packet)); + + while (dis.available() != 0) { + // message is a series of: + // 16-bytes voter UUID + // 16-bytes vote UUID + // 8-bytes timestamp + long tmp = dis.readLong(); // most significant bits + UUID voter = new UUID(tmp, dis.readLong()); + + tmp = dis.readLong(); + UUID vote = new UUID(tmp, dis.readLong()); + + long timestamp = dis.readLong(); + + // process the single vote + processVote(voter, vote, timestamp); + } + } + + /** + * Process a single incoming vote. + * + * @param UUID voter the UUID of the Server making this vote + * @param UUID vote the UUID of the Server that 'voter' voted for + * @param timestamp the time when the vote was made + */ + private void processVote(UUID voter, UUID vote, long timestamp) { + // fetch old data for this voter + VoterData voterData = uuidToVoterData.computeIfAbsent(voter, + (key) -> new VoterData(voter, timestamp)); + if (timestamp >= voterData.timestamp) { + // this is a new vote for this voter -- update the timestamp + voterData.timestamp = timestamp; + } else { + // already processed vote, and it may even be obsolete + return; + } + + // fetch the old vote, if any, for this voter + VoteData oldVoteData = voterData.vote; + VoteData newVoteData = null; + + if (vote != null) { + newVoteData = uuidToVoteData.computeIfAbsent(vote, (key) -> new VoteData(vote)); + } + + if (oldVoteData != newVoteData) { + // the vote has changed -- update the 'voterData' entry, + // and include this in the next set of outgoing messages + logger.info("{} voting for {}", voter, vote); + voterData.vote = newVoteData; + updatedVotes.add(voterData); + + if (oldVoteData != null) { + // remove the old vote data + voteData.remove(oldVoteData); + oldVoteData.voters.remove(voterData); + if (oldVoteData.voters.isEmpty()) { + // no voters left -- remove the entry + uuidToVoteData.remove(oldVoteData.uuid); + } else { + // reinsert in a new position + voteData.add(oldVoteData); + } + } + + if (newVoteData != null) { + // update the new vote data + voteData.remove(newVoteData); + newVoteData.voters.add(voterData); + voteData.add(newVoteData); + } + } + } + + /** + * If any updates have occurred, send then out to all servers on + * the "notify list". + * + * @return 'true' if one or more votes have changed, 'false' if not + */ + private boolean sendOutUpdates() { + try { + if (updatedVotes.isEmpty()) { + // no changes to send out + return false; + } + + // possibly change vote based on current information + updateMyVote(); + + // generate message to send out + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos); + + // go through all of the updated votes + for (VoterData voterData : updatedVotes) { + // voter UUID + dos.writeLong(voterData.uuid.getMostSignificantBits()); + dos.writeLong(voterData.uuid.getLeastSignificantBits()); + + // vote UUID + UUID vote = + (voterData.vote == null ? emptyUUID : voterData.vote.uuid); + dos.writeLong(vote.getMostSignificantBits()); + dos.writeLong(vote.getLeastSignificantBits()); + + // timestamp + dos.writeLong(voterData.timestamp); + } + updatedVotes.clear(); + + // create an 'Entity' that can be sent out to all hosts + Entity<String> entity = Entity.entity( + new String(Base64.getEncoder().encode(bos.toByteArray()), StandardCharsets.UTF_8), + MediaType.APPLICATION_OCTET_STREAM_TYPE); + + // send out to all servers on the notify list + for (Server server : Server.getNotifyList()) { + server.post("vote", entity); + } + return true; + } catch (IOException e) { + logger.error("Exception in VoteCycle.sendOutUpdates", e); + return false; + } + } + + /** + * (Possibly) change this servers vote, based upon votes of other voters. + */ + private void updateMyVote() { + UUID myVote = null; + + if (uuidToVoterData.size() * 2 < Server.getServerCount()) { + // fewer than half of the nodes have voted + if (leader != null) { + // choose the current leader + myVote = leader.getUuid(); + } else { + // choose the first entry in the servers list + myVote = Server.getFirstServer().getUuid(); + } + } else { + // choose the first entry we know about + for (VoteData vote : voteData) { + if (Server.getServer(vote.uuid) != null) { + myVote = vote.uuid; + break; + } + } + } + if (myVote != null) { + // update the vote for this host, and include it in the list + processVote(Server.getThisServer().getUuid(), myVote, + System.currentTimeMillis()); + } + } + } + + /* ============================================================ */ + + /** + * This class corresponds to a single vote recipient -- + * the Server being voted for. + */ + private static class VoteData implements Comparable<VoteData> { + // uuid voted for + private UUID uuid; + + // the set of all voters that voted for this server + private HashSet<VoterData> voters = new HashSet<>(); + + /** + * Constructor -- set the UUID. + */ + VoteData(UUID uuid) { + this.uuid = uuid; + } + + /*================================*/ + /* Comparable<VoteData> interface */ + /*================================*/ + + /** + * {@inheritDoc} + */ + @Override + public int compareTo(VoteData other) { + // favor highest vote count + // in case of a tie, compare UUIDs (favor smallest) + + int rval = other.voters.size() - voters.size(); + if (rval == 0) { + // vote counts equal -- favor the smaller UUID + rval = Util.uuidComparator.compare(uuid, other.uuid); + } + return rval; + } + } + + /* ============================================================ */ + + /** + * This class corresponds to the vote of a single server. + */ + private static class VoterData { + // voter UUID + private UUID uuid; + + // most recently cast vote from this voter + private VoteData vote = null; + + // time when the vote was cast + private long timestamp = 0; + + /** + * Constructor - store the UUID and timestamp. + */ + private VoterData(UUID uuid, long timestamp) { + this.uuid = uuid; + this.timestamp = timestamp; + } + } +} diff --git a/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/MainLoop.java b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/MainLoop.java new file mode 100644 index 00000000..1ed7ecb2 --- /dev/null +++ b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/MainLoop.java @@ -0,0 +1,186 @@ +/* + * ============LICENSE_START======================================================= + * feature-server-pool + * ================================================================================ + * Copyright (C) 2020 AT&T Intellectual Property. All rights reserved. + * ================================================================================ + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============LICENSE_END========================================================= + */ + +package org.onap.policy.drools.serverpool; + +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_MAINLOOP_CYCLE; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.MAINLOOP_CYCLE; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.getProperty; + +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.LinkedTransferQueue; +import java.util.concurrent.TimeUnit; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class provides a single thread that is used for 'Server' and 'Bucket' + * updates. This simplifies things because it greatly reduces the need for + * synchronization within these classes. + */ +class MainLoop extends Thread { + private static Logger logger = LoggerFactory.getLogger(MainLoop.class); + + // this queue is used to send work to the 'MainLoop' thread, for things + // like processing incoming messages + private static LinkedTransferQueue<Runnable> incomingWork = + new LinkedTransferQueue<>(); + + // this is used for work that should be invoked every cycle + private static ConcurrentLinkedQueue<Runnable> backgroundWork = + new ConcurrentLinkedQueue<>(); + + // this is the 'MainLoop' thread + private static volatile MainLoop mainLoop = null; + + // main loop cycle time + private static long cycleTime; + + /** + * If it isn't already running, start the 'MainLoop' thread. + */ + public static synchronized void startThread() { + cycleTime = getProperty(MAINLOOP_CYCLE, DEFAULT_MAINLOOP_CYCLE); + if (mainLoop == null) { + mainLoop = new MainLoop(); + mainLoop.start(); + } + } + + /** + * If it is currently running, stop the 'MainLoop' thread. + */ + public static synchronized void stopThread() { + // this won't be immediate, but the thread should discover it shortly + MainLoop saveMainLoop = mainLoop; + + mainLoop = null; + if (saveMainLoop != null) { + saveMainLoop.interrupt(); + } + } + + /** + * Add some work to the 'incomingWork' queue -- this runs once, and is + * automatically removed from the queue. + * + * @param work this is the Runnable to invoke + */ + public static void queueWork(Runnable work) { + incomingWork.offer(work); + } + + /** + * Add some work to the 'backgroundWork' queue -- this runs every cycle, + * until it is manually removed. + * + * @param work this is the Runnable to invoke every cycle + */ + public static void addBackgroundWork(Runnable work) { + // if it is already here, remove it first + backgroundWork.remove(work); + + // add to the end of the queue + backgroundWork.add(work); + } + + /** + * Remove work from the 'backgroundWork' queue. + * + * @param work this is the Runnable to remove from the queue + * @return true if the background work was found, and removed + */ + public static boolean removeBackgroundWork(Runnable work) { + return backgroundWork.remove(work); + } + + /** + * Constructor. + */ + private MainLoop() { + super("Main Administrative Loop"); + } + + /** + * This is the main processing loop for "administrative" messages, which + * manage 'Server' states. + * 1) Process incoming messages (other threads are reading in and queueing + * the messages), making note of information that should forwarded to + * other servers. + * 2) Send out updates to all servers on the 'notify' list + * 3) Go through list of all 'Server' entries, and see which ones have + * taken too long to respond -- those are treated as 'failed' + */ + @Override + public void run() { + while (this == mainLoop) { + try { + // the following reads in messages over a period of 1 second + handleIncomingWork(); + + // send out notifications to other hosts + Server.sendOutData(); + + // search for hosts which have taken too long to respond + Server.searchForFailedServers(); + + // work that runs every cycle + for (Runnable work : backgroundWork) { + try { + work.run(); + } catch (Exception e) { + logger.error("Exception in MainLoop background work", e); + } + } + } catch (Exception e) { + logger.error("Exception in MainLoop", e); + } + } + } + + /** + * Poll for and process incoming messages for up to 1 second. + */ + static void handleIncomingWork() throws InterruptedException { + long currentTime = System.currentTimeMillis();; + long wakeUpTime = currentTime + cycleTime; + long timeDiff; + + // receive incoming messages + while ((timeDiff = wakeUpTime - currentTime) > 0) { + try { + Runnable work = + incomingWork.poll(timeDiff, TimeUnit.MILLISECONDS); + if (work == null) { + // timeout -- we are done processing messages for now + return; + } + work.run(); + } catch (InterruptedException e) { + logger.error("Interrupted in MainLoop"); + throw(e); + } catch (Exception e) { + logger.error("Exception in MainLoop incoming work", e); + } + currentTime = System.currentTimeMillis(); + } + } +} diff --git a/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/RestServerPool.java b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/RestServerPool.java new file mode 100644 index 00000000..1c4cc7ba --- /dev/null +++ b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/RestServerPool.java @@ -0,0 +1,447 @@ +/* + * ============LICENSE_START======================================================= + * feature-server-pool + * ================================================================================ + * Copyright (C) 2020 AT&T Intellectual Property. All rights reserved. + * ================================================================================ + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============LICENSE_END========================================================= + */ + +package org.onap.policy.drools.serverpool; + +import io.swagger.annotations.Api; +import io.swagger.annotations.ApiOperation; +import io.swagger.annotations.Info; +import io.swagger.annotations.SwaggerDefinition; +import io.swagger.annotations.Tag; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.nio.charset.StandardCharsets; +import java.util.UUID; + +import javax.ws.rs.Consumes; +import javax.ws.rs.GET; +import javax.ws.rs.POST; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; +import javax.ws.rs.QueryParam; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; + +import org.onap.policy.drools.serverpool.Bucket; +import org.onap.policy.drools.serverpool.FeatureServerPool; +import org.onap.policy.drools.serverpool.Leader; +import org.onap.policy.drools.serverpool.Server; +import org.onap.policy.drools.serverpool.TargetLock; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class contains methods for processing incoming REST messages. + */ + +@Path("/") +@Api +@SwaggerDefinition( + info = @Info( + description = "PDP-D Server Pool Telemetry Service", + version = "v1.0", + title = "PDP-D Server Pool Telemetry" + ), + consumes = {MediaType.APPLICATION_JSON}, + produces = {MediaType.APPLICATION_JSON, MediaType.TEXT_PLAIN}, + schemes = {SwaggerDefinition.Scheme.HTTP}, + tags = { + @Tag(name = "pdp-d-server-pool-telemetry", description = "Drools PDP Server Pool Telemetry Operations") + } + ) +public class RestServerPool { + private static Logger logger = LoggerFactory.getLogger(RestServerPool.class); + + /** + * Handle the '/test' REST call. + */ + @GET + @Path("/test") + @ApiOperation( + value = "Perform an incoming /test request", + notes = "Provides an acknowledge message back to requestor", + response = String.class + ) + @Produces(MediaType.TEXT_PLAIN) + public String test() { + return "RestServerPool.test()"; + } + + /* ============================================================ */ + + /** + * Handle the '/admin' REST call. + */ + @POST + @Path("/admin") + @ApiOperation( + value = "Perform an incoming /admin request", + notes = "This rest call decodes incoming admin message (base-64-encoded) and " + + "send to main thread for processing" + ) + @Consumes(MediaType.APPLICATION_OCTET_STREAM) + public void adminRequest(byte[] data) { + Server.adminRequest(data); + } + + /** + * Handle the '/vote' REST call. + */ + @POST + @Path("/vote") + @ApiOperation( + value = "Perform an incoming /vote request", + notes = "The request data containing voter and vote data to be processed" + ) + @Consumes(MediaType.APPLICATION_OCTET_STREAM) + public void voteData(byte[] data) { + Leader.voteData(data); + } + + /** + * Handle the '/bucket/update' REST call. + */ + @POST + @Path("/bucket/update") + @ApiOperation( + value = "Perform an incoming /bucket/update request", + notes = "The request data include owner, state, primaryBackup and secondaryBackup" + ) + @Consumes(MediaType.APPLICATION_OCTET_STREAM) + public void updateBucket(byte[] data) { + Bucket.updateBucket(data); + } + + /** + * Handle the '/bucket/topic' REST call. + */ + @POST + @Path("/bucket/topic") + @ApiOperation( + value = "Perform an incoming /bucket/topic request", + notes = "Forward an incoming topic message from a remote host, the request data include " + + "bucketNumber the bucket number calculated on the remote host, keyword the keyword " + + "associated with the message, controllerName the controller the message was directed to " + + "on the remote host, protocol String value of the topic value (UEB, DMAAP, NOOP, or REST " + + "-- NOOP and REST shouldn't be used here), topic the UEB/DMAAP topic name, event this is " + + "the JSON message" + ) + @Consumes(MediaType.APPLICATION_JSON) + public void topicMessage(@QueryParam("bucket") Integer bucket, + @QueryParam("keyword") String keyword, + @QueryParam("controller") String controllerName, + @QueryParam("protocol") String protocol, + @QueryParam("topic") String topic, + String event) { + FeatureServerPool.topicMessage(bucket, keyword, controllerName, protocol, topic, event); + } + + /** + * Handle the '/bucket/sessionData' REST call. + */ + @POST + @Path("/bucket/sessionData") + @ApiOperation( + value = "Perform an incoming /bucket/sessionData request", + notes = "A message is received from the old owner of the bucket and send to new owner, " + + "the request data include bucketNumber the bucket number, dest the UUID of the intended " + + "destination, ttl controls the number of hops the message may take, data serialized data " + + "associated with this bucket, encoded using base64" + ) + @Consumes(MediaType.APPLICATION_OCTET_STREAM) + public void sessionData(@QueryParam("bucket") Integer bucket, + @QueryParam("dest") UUID dest, + @QueryParam("ttl") int ttl, + byte[] data) { + Bucket.sessionData(bucket, dest, ttl, data); + } + + /** + * Handle the '/session/insertDrools' REST call. + */ + @POST + @Path("/session/insertDrools") + @ApiOperation( + value = "Perform an incoming /session/insertDrools request", + notes = "An incoming /session/insertDrools message was received, the request data include " + + "keyword the keyword associated with the incoming object, sessionName encoded session name " + + "(groupId:artifactId:droolsSession), bucket the bucket associated with keyword, " + + "ttl controls the number of hops the message may take, data base64-encoded serialized data " + + "for the object" + ) + @Consumes(MediaType.APPLICATION_OCTET_STREAM) + public void insertDrools(@QueryParam("keyword") String keyword, + @QueryParam("session") String sessionName, + @QueryParam("bucket") int bucket, + @QueryParam("ttl") int ttl, + byte[] data) { + FeatureServerPool.incomingInsertDrools(keyword, sessionName, bucket, ttl, data); + } + + /** + * Handle the '/lock/lock' REST call. + */ + @GET + @Path("/lock/lock") + @ApiOperation( + value = "Perform an incoming /lock/lock request", + notes = "An incoming /lock/lock REST message is received, the request data include " + + "key string identifying the lock, which must hash to a bucket owned by the current host, " + + "ownerKey string key identifying the owner, uuid the UUID that uniquely identifies " + + "the original 'TargetLock', waitForLock this controls the behavior when 'key' is already " + + "locked - 'true' means wait for it to be freed, 'false' means fail, ttl controls the number " + + "of hops the message may take, the response is the message should be passed back to the " + + "requestor" + ) + @Consumes(MediaType.APPLICATION_OCTET_STREAM) + @Produces(MediaType.APPLICATION_OCTET_STREAM) + public Response lock(@QueryParam("key") String key, + @QueryParam("owner") String keyOwner, + @QueryParam("uuid") UUID uuid, + @QueryParam("wait") boolean waitForLock, + @QueryParam("ttl") int ttl) { + return TargetLock.incomingLock(key, keyOwner, uuid, waitForLock, ttl); + } + + /** + * Handle the '/lock/free' REST call. + */ + @GET + @Path("/lock/free") + @ApiOperation( + value = "Perform an incoming /lock/free request", + notes = "An incoming /lock/free REST message is received, the request data include " + + "key string identifying the lock, which must hash to a bucket owned by the current host, " + + "ownerKey string key identifying the owner, uuid the UUID that uniquely identifies " + + "the original 'TargetLock', ttl controls the number of hops the message may take, " + + "the response is the message should be passed back to the requestor" + ) + @Consumes(MediaType.APPLICATION_OCTET_STREAM) + @Produces(MediaType.APPLICATION_OCTET_STREAM) + public Response free(@QueryParam("key") String key, + @QueryParam("owner") String keyOwner, + @QueryParam("uuid") UUID uuid, + @QueryParam("ttl") int ttl) { + return TargetLock.incomingFree(key, keyOwner, uuid, ttl); + } + + /** + * Handle the '/lock/locked' REST call. + */ + @GET + @Path("/lock/locked") + @ApiOperation( + value = "Perform an incoming /lock/locked request, (this is a callback to an earlier " + + "requestor that the lock is now available)", + notes = "An incoming /lock/locked REST message is received, the request data include " + + "key string key identifying the lock, ownerKey string key identifying the owner " + + "which must hash to a bucket owned by the current host (it is typically a 'RequestID') " + + "uuid the UUID that uniquely identifies the original 'TargetLock', ttl controls the " + + "number of hops the message may take, the response is the message should be passed back " + + "to the requestor" + ) + @Consumes(MediaType.APPLICATION_OCTET_STREAM) + @Produces(MediaType.APPLICATION_OCTET_STREAM) + public Response locked(@QueryParam("key") String key, + @QueryParam("owner") String keyOwner, + @QueryParam("uuid") UUID uuid, + @QueryParam("ttl") int ttl) { + return TargetLock.incomingLocked(key, keyOwner, uuid, ttl); + } + + /** + * Handle the '/lock/audit' REST call. + */ + @POST + @Path("/lock/audit") + @ApiOperation( + value = "Perform an incoming /lock/audit request", + notes = "An incoming /lock/audit REST message is received, the request data include " + + "serverUuid the UUID of the intended destination server, ttl controls the number of hops, " + + "encodedData base64-encoded data, containing a serialized 'AuditData' instance " + + "the response is a serialized and base64-encoded 'AuditData'" + ) + @Consumes(MediaType.APPLICATION_OCTET_STREAM) + @Produces(MediaType.APPLICATION_OCTET_STREAM) + public byte[] lockAudit(@QueryParam("server") UUID server, + @QueryParam("ttl") int ttl, + byte[] data) { + return TargetLock.Audit.incomingAudit(server, ttl, data); + } + + /* ============================================================ */ + + /** + * Handle the '/cmd/dumpHosts' REST call. + */ + @GET + @Path("/cmd/dumpHosts") + @ApiOperation( + value = "Perform an incoming /cmd/dumpHosts request", + notes = "Dump out the current 'servers' table in a human-readable table form" + ) + @Produces(MediaType.TEXT_PLAIN) + public String dumpHosts() { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + Server.dumpHosts(new PrintStream(bos, true)); + return bos.toString(StandardCharsets.UTF_8); + } + + /** + * Handle the '/cmd/dumpBuckets' REST call. + */ + @GET + @Path("/cmd/dumpBuckets") + @ApiOperation( + value = "Perform an incoming /cmd/dumpBuckets request", + notes = "Dump out buckets information in a human-readable form" + ) + @Produces(MediaType.TEXT_PLAIN) + public String dumpBuckets() { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + Bucket.dumpBuckets(new PrintStream(bos, true)); + return bos.toString(StandardCharsets.UTF_8); + } + + /** + * Handle the '/cmd/ping' REST call. + */ + @GET + @Path("/cmd/ping") + @ApiOperation( + value = "Perform an incoming /cmd/ping request", + notes = "Send information about 'thisServer' to the list of hosts" + ) + @Produces(MediaType.TEXT_PLAIN) + public String ping(@QueryParam("hosts") String hosts) { + logger.info("Running '/cmd/ping', hosts={}", hosts); + + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + Server.pingHosts(new PrintStream(bos, true), hosts); + return bos.toString(StandardCharsets.UTF_8); + } + + /** + * Handle the '/cmd/bucketMessage' REST call. + */ + @GET + @Path("/cmd/bucketMessage") + @ApiOperation( + value = "Perform an incoming /cmd/bucketMessage request", + notes = "This is only used for testing the routing of messages between servers" + ) + @Produces(MediaType.TEXT_PLAIN) + public String bucketMessage(@QueryParam("keyword") String keyword, + @QueryParam("message") String message) + throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + Bucket.bucketMessage(new PrintStream(bos, true), keyword, message); + return bos.toString(StandardCharsets.UTF_8); + } + + /** + * Handle the '/bucket/bucketResponse' REST call. + */ + @POST + @Path("/bucket/bucketResponse") + @ApiOperation( + value = "Perform an incoming /cmd/bucketResponse request", + notes = "This runs on the destination host, and is the continuation of an operation " + + "triggered by the /cmd/bucketMessage REST message running on the originating host" + ) + @Consumes(MediaType.TEXT_PLAIN) + @Produces(MediaType.TEXT_PLAIN) + public String bucketResponse(@QueryParam("bucket") Integer bucket, + @QueryParam("keyword") String keyword, + byte[] data) { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + Bucket.bucketResponse(new PrintStream(bos, true), bucket, keyword, data); + return bos.toString(StandardCharsets.UTF_8); + } + + /** + * Handle the '/lock/moveBucket' REST call. + */ + @GET + @Path("/cmd/moveBucket") + @ApiOperation( + value = "Perform an incoming /cmd/moveBucket request", + notes = "This is only used for testing bucket migration. It only works on the lead server" + ) + @Produces(MediaType.TEXT_PLAIN) + public String moveBucket(@QueryParam("bucket") Integer bucketNumber, + @QueryParam("host") String newHost) { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + Bucket.moveBucket(new PrintStream(bos, true), bucketNumber, newHost); + return bos.toString(StandardCharsets.UTF_8); + } + + /** + * Handle the '/lock/dumpBucketAdjuncts' REST call. + */ + @GET + @Path("/cmd/dumpBucketAdjuncts") + @ApiOperation( + value = "Perform an incoming /cmd/dumpBucketAdjuncts request", + notes = "Dump out all buckets with adjuncts" + ) + @Produces(MediaType.TEXT_PLAIN) + public String dumpBucketAdjuncts() { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + Bucket.dumpAdjuncts(new PrintStream(bos, true)); + return bos.toString(StandardCharsets.UTF_8); + } + + /** + * Handle the '/lock/dumpLocks' REST call. + */ + @GET + @Path("/cmd/dumpLocks") + @ApiOperation( + value = "Perform an incoming /cmd/dumpLocks request", + notes = "Dump out locks info, detail 'true' provides additional bucket and host information" + ) + @Produces(MediaType.TEXT_PLAIN) + public String dumpLocks(@QueryParam("detail") boolean detail) + throws IOException, InterruptedException, ClassNotFoundException { + + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + TargetLock.DumpLocks.dumpLocks(new PrintStream(bos, true), detail); + return bos.toString(StandardCharsets.UTF_8); + } + + /** + * Handle the '/lock/dumpLocksData' REST call. + */ + @GET + @Path("/lock/dumpLocksData") + @ApiOperation( + value = "Perform an incoming /cmd/dumpLocksData request", + notes = "Generate a byte stream containing serialized 'HostData'" + ) + @Produces(MediaType.APPLICATION_OCTET_STREAM) + public String dumpLocksData(@QueryParam("server") UUID server, + @QueryParam("ttl") int ttl) throws IOException { + return new String(TargetLock.DumpLocks.dumpLocksData(server, ttl), StandardCharsets.UTF_8); + } +} diff --git a/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/Server.java b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/Server.java new file mode 100644 index 00000000..52e3d2dc --- /dev/null +++ b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/Server.java @@ -0,0 +1,1352 @@ +/* + * ============LICENSE_START======================================================= + * feature-server-pool + * ================================================================================ + * Copyright (C) 2020 AT&T Intellectual Property. All rights reserved. + * ================================================================================ + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============LICENSE_END========================================================= + */ + +package org.onap.policy.drools.serverpool; + +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_HTTPS; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_SELF_SIGNED_CERTIFICATES; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_SERVER_ADAPTIVE_GAP_ADJUST; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_SERVER_CONNECT_TIMEOUT; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_SERVER_INITIAL_ALLOWED_GAP; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_SERVER_IP_ADDRESS; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_SERVER_PORT; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_SERVER_READ_TIMEOUT; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_SERVER_THREADS_CORE_POOL_SIZE; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_SERVER_THREADS_KEEP_ALIVE_TIME; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_SERVER_THREADS_MAXIMUM_POOL_SIZE; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.HOST_LIST; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.SERVER_ADAPTIVE_GAP_ADJUST; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.SERVER_CONNECT_TIMEOUT; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.SERVER_HTTPS; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.SERVER_INITIAL_ALLOWED_GAP; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.SERVER_IP_ADDRESS; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.SERVER_PORT; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.SERVER_READ_TIMEOUT; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.SERVER_SELF_SIGNED_CERTIFICATES; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.SERVER_THREADS_CORE_POOL_SIZE; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.SERVER_THREADS_KEEP_ALIVE_TIME; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.SERVER_THREADS_MAXIMUM_POOL_SIZE; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.SITE_IP_ADDRESS; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.SITE_PORT; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.getProperty; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.io.StringReader; +import java.lang.reflect.Field; +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.net.UnknownHostException; +import java.nio.charset.StandardCharsets; +import java.security.KeyManagementException; +import java.security.NoSuchAlgorithmException; +import java.text.SimpleDateFormat; +import java.util.Arrays; +import java.util.Base64; +import java.util.Collection; +import java.util.Date; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.Objects; +import java.util.Properties; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.FutureTask; +import java.util.concurrent.LinkedTransferQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; + +import javax.servlet.ServletException; +import javax.ws.rs.ProcessingException; +import javax.ws.rs.client.Client; +import javax.ws.rs.client.Entity; +import javax.ws.rs.client.WebTarget; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; + +import org.eclipse.jetty.server.ServerConnector; +import org.glassfish.jersey.client.ClientProperties; +import org.onap.policy.common.endpoints.event.comm.bus.internal.BusTopicParams; +import org.onap.policy.common.endpoints.http.client.HttpClient; +import org.onap.policy.common.endpoints.http.client.HttpClientConfigException; +import org.onap.policy.common.endpoints.http.client.HttpClientFactoryInstance; +import org.onap.policy.common.endpoints.http.server.HttpServletServer; +import org.onap.policy.common.endpoints.http.server.HttpServletServerFactoryInstance; +import org.onap.policy.drools.system.PolicyEngineConstants; +import org.onap.policy.drools.utils.PropertyUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class Server implements Comparable<Server> { + private static Logger logger = LoggerFactory.getLogger(Server.class); + + // maps UUID to Server object for all known servers + private static TreeMap<UUID, Server> servers = + new TreeMap<>(Util.uuidComparator); + + // maps UUID to Server object for all failed servers + // (so they aren't accidentally restored, due to updates from other hosts) + private static TreeMap<UUID, Server> failedServers = + new TreeMap<>(Util.uuidComparator); + + // subset of servers to be notified (null means it needs to be rebuilt) + private static LinkedList<Server> notifyList = null; + + // data to be sent out to notify list + private static TreeSet<Server> updatedList = new TreeSet<>(); + + // the server associated with the current host + private static Server thisServer = null; + + // the current REST server + private static HttpServletServer restServer; + + // incoming packets from HTTP + private static LinkedTransferQueue<byte[]> incomingPackets = + new LinkedTransferQueue<>(); + + /*==================================================*/ + /* Some properties extracted at initialization time */ + /*==================================================*/ + + // initial value of gap to allow between pings + private static long initialAllowedGap; + + // used in adaptive calculation of allowed gap between pings + private static long adaptiveGapAdjust; + + // time to allow for TCP connect (long) + private static String connectTimeout; + + // time to allow before TCP read timeout (long) + private static String readTimeout; + + // outgoing per-server thread pool parameters + private static int corePoolSize; + private static int maximumPoolSize; + private static long keepAliveTime; + + // https-related parameters + private static boolean useHttps; + private static boolean useSelfSignedCertificates; + + // list of remote host names + private static String[] hostList = new String[0]; + + /*=========================================================*/ + /* Fields included in every 'ping' message between servers */ + /*=========================================================*/ + + // unique id for this server + private UUID uuid; + + // counter periodically incremented to indicate the server is "alive" + private int count; + + // 16 byte MD5 checksum over additional data that is NOT included in + // every 'ping' message -- used to determine whether the data is up-to-date + private byte[] checksum; + + /*========================================================================*/ + /* The following data is included in the checksum, and doesn't change too */ + /* frequently (some fields may change as servers go up and down) */ + /*========================================================================*/ + + // IP address and port of listener + private InetSocketAddress socketAddress; + + // site IP address and port + private InetSocketAddress siteSocketAddress = null; + + /*============================================*/ + /* Local information not included in checksum */ + /*============================================*/ + + // destination socket information + private InetSocketAddress destSocketAddress; + private String destName; + + // REST client fields + private HttpClient client; + private WebTarget target; + private ThreadPoolExecutor sendThreadPool = null; + + // time when the 'count' field was last updated + private long lastUpdateTime; + + // calculated field indicating the maximum time between updates + private long allowedGap = initialAllowedGap; + + // indicates whether the 'Server' instance is active or not (synchronized) + private boolean active = true; + + /* + * Tags for encoding of server data + */ + static final int END_OF_PARAMETERS_TAG = 0; + static final int SOCKET_ADDRESS_TAG = 1; + static final int SITE_SOCKET_ADDRESS_TAG = 2; + + /*==============================*/ + /* Comparable<Server> interface */ + /*==============================*/ + + /** + * Compare this instance to another one by comparing the 'uuid' field. + */ + @Override + public int compareTo(Server other) { + return Util.uuidComparator.compare(uuid, other.uuid); + } + + /** + * This method may be invoked from any thread, and is used as the main + * entry point when testing. + * + * @param args arguments contaning an '=' character are intepreted as + * a property, other arguments are presumed to be a property file. + */ + public static void main(String[] args) throws IOException { + Properties prop = new Properties(); + + for (String arg : args) { + // arguments with an '=' in them are a property definition; + // otherwise, they are a properties file name + + if (arg.contains("=")) { + prop.load(new StringReader(arg)); + } else { + prop.putAll(PropertyUtil.getProperties(arg)); + } + } + + String rval = startup(prop); + if (rval != null) { + logger.error("Server.startup failed: {}", rval); + } + } + + /** + * This method may be invoked from any thread, and performs initialization. + * + * @param propertiesFile the name of a property file + */ + public static String startup(String propertiesFile) { + Properties properties; + try { + properties = PropertyUtil.getProperties(propertiesFile); + } catch (IOException e) { + logger.error("Server.startup: exception reading properties", e); + properties = new Properties(); + } + return startup(properties); + } + + /** + * This method may be invoked from any thread, and performs initialization. + * + * @param properties contains properties used by the server + */ + public static String startup(Properties properties) { + ServerPoolProperties.setProperties(properties); + logger.info("startup: properties={}", properties); + + // fetch some static properties + initialAllowedGap = getProperty(SERVER_INITIAL_ALLOWED_GAP, + DEFAULT_SERVER_INITIAL_ALLOWED_GAP); + adaptiveGapAdjust = getProperty(SERVER_ADAPTIVE_GAP_ADJUST, + DEFAULT_SERVER_ADAPTIVE_GAP_ADJUST); + connectTimeout = + String.valueOf(getProperty(SERVER_CONNECT_TIMEOUT, + DEFAULT_SERVER_CONNECT_TIMEOUT)); + readTimeout = String.valueOf(getProperty(SERVER_READ_TIMEOUT, + DEFAULT_SERVER_READ_TIMEOUT)); + corePoolSize = getProperty(SERVER_THREADS_CORE_POOL_SIZE, + DEFAULT_SERVER_THREADS_CORE_POOL_SIZE); + maximumPoolSize = getProperty(SERVER_THREADS_MAXIMUM_POOL_SIZE, + DEFAULT_SERVER_THREADS_MAXIMUM_POOL_SIZE); + keepAliveTime = getProperty(SERVER_THREADS_KEEP_ALIVE_TIME, + DEFAULT_SERVER_THREADS_KEEP_ALIVE_TIME); + useHttps = getProperty(SERVER_HTTPS, DEFAULT_HTTPS); + useSelfSignedCertificates = getProperty(SERVER_SELF_SIGNED_CERTIFICATES, + DEFAULT_SELF_SIGNED_CERTIFICATES); + String hostListNames = getProperty(HOST_LIST, null); + if (hostListNames != null) { + hostList = hostListNames.split(","); + } + + String possibleError = null; + try { + // fetch server information + String ipAddressString = + getProperty(SERVER_IP_ADDRESS, DEFAULT_SERVER_IP_ADDRESS); + int port = getProperty(SERVER_PORT, DEFAULT_SERVER_PORT); + + possibleError = "Unknown Host: " + ipAddressString; + InetAddress address = InetAddress.getByName(ipAddressString); + InetSocketAddress socketAddress = new InetSocketAddress(address, port); + + restServer = HttpServletServerFactoryInstance.getServerFactory().build( + "SERVER-POOL", // name + useHttps, // https + socketAddress.getAddress().getHostAddress(),// host (maybe 0.0.0.0) + port, // port (can no longer be 0) + null, // contextPath + false, // swagger + false); // managed + restServer.addServletClass(null, RestServerPool.class.getName()); + + // add any additional servlets + for (ServerPoolApi feature : ServerPoolApi.impl.getList()) { + Collection<Class<?>> classes = feature.servletClasses(); + if (classes != null) { + for (Class<?> clazz : classes) { + restServer.addServletClass(null, clazz.getName()); + } + } + } + + // we may not know the port until after the server is started + restServer.start(); + + // determine the address to use + if (DEFAULT_SERVER_IP_ADDRESS.contentEquals(address.getHostAddress())) { + address = InetAddress.getLocalHost(); + } + + thisServer = new Server(new InetSocketAddress(address, port)); + + // TBD: is this really appropriate? + thisServer.newServer(); + + // start background thread + MainLoop.startThread(); + MainLoop.queueWork(new Runnable() { + @Override + public void run() { + // run this in the 'MainLoop' thread + Leader.startup(); + Bucket.startup(); + } + }); + logger.info("Listening on port {}", port); + + return null; + } catch (UnknownHostException e) { + logger.error("Server.startup: exception start server", e); + if (possibleError == null) { + possibleError = e.toString(); + } + return possibleError; + } + } + + /** + * Shut down all threads associate with server pool. + */ + public static void shutdown() { + Discovery.stopDiscovery(); + MainLoop.stopThread(); + TargetLock.shutdown(); + Util.shutdown(); + + HashSet<Server> allServers = new HashSet<>(); + allServers.addAll(servers.values()); + allServers.addAll(failedServers.values()); + + for (Server server : allServers) { + if (server.sendThreadPool != null) { + server.sendThreadPool.shutdown(); + } + } + if (restServer != null) { + restServer.shutdown(); + } + } + + /** + * Return the Server instance associated with the current host. + * + * @return the Server instance associated with the current host + */ + public static Server getThisServer() { + return thisServer; + } + + /** + * Return the first Server instance in the 'servers' list. + * + * @return the first Server instance in the 'servers' list + * (the one with the lowest UUID) + */ + public static Server getFirstServer() { + return servers.firstEntry().getValue(); + } + + /** + * Lookup a Server instance associated with a UUID. + * + * @param uuid the key to the lookup + @ @return the associated 'Server' instance, or 'null' if none + */ + public static Server getServer(UUID uuid) { + return servers.get(uuid); + } + + /** + * Return a count of the number of servers. + * + * @return a count of the number of servers + */ + public static int getServerCount() { + return servers.size(); + } + + /** + * Return the complete list of servers. + * + * @return the complete list of servers + */ + public static Collection<Server> getServers() { + return servers.values(); + } + + /** + * This method is invoked from the 'startup' thread, and creates a new + * 'Server' instance for the current server. + * + * @param socketAddress the IP address and port the listener is bound to + */ + private Server(InetSocketAddress socketAddress) { + this.uuid = UUID.randomUUID(); + this.count = 1; + this.socketAddress = socketAddress; + this.lastUpdateTime = System.currentTimeMillis(); + + // site information + + String siteIp = getProperty(SITE_IP_ADDRESS, null); + int sitePort = getProperty(SITE_PORT, 0); + if (siteIp != null && sitePort != 0) { + // we do have site information specified + try { + siteSocketAddress = new InetSocketAddress(siteIp, sitePort); + if (siteSocketAddress.getAddress() == null) { + logger.error("Couldn't resolve site address: {}", siteIp); + siteSocketAddress = null; + } + } catch (IllegalArgumentException e) { + logger.error("Illegal 'siteSocketAddress'", e); + siteSocketAddress = null; + } + } + + // TBD: calculate checksum + } + + /** + * Initialize a 'Server' instance from a 'DataInputStream'. If it is new, + * it may get inserted in the table. If it is an update, fields in an + * existing 'Server' may be updated. + * + * @param is the 'DataInputStream' + */ + Server(DataInputStream is) throws IOException { + // read in 16 byte UUID + uuid = Util.readUuid(is); + + // read in 4 byte counter value + count = is.readInt(); + + // read in 16 byte MD5 checksum + checksum = new byte[16]; + is.readFully(checksum); + + // optional parameters + int tag; + while ((tag = is.readUnsignedByte()) != END_OF_PARAMETERS_TAG) { + switch (tag) { + case SOCKET_ADDRESS_TAG: { + socketAddress = readSocketAddress(is); + break; + } + case SITE_SOCKET_ADDRESS_TAG: { + siteSocketAddress = readSocketAddress(is); + break; + } + default: + // ignore tag + logger.error("Illegal tag: {}", tag); + break; + } + } + } + + /** + * Read an 'InetSocketAddress' from a 'DataInputStream'. + * + * @param is the 'DataInputStream' + * @return the 'InetSocketAddress' + */ + private static InetSocketAddress readSocketAddress(DataInputStream is) + throws IOException, UnknownHostException { + + byte[] ipAddress = new byte[4]; + is.read(ipAddress, 0, 4); + int port = is.readUnsignedShort(); + return new InetSocketAddress(InetAddress.getByAddress(ipAddress), port); + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() { + return "Server[" + uuid + "]"; + } + + /** + * Return the UUID associated with this Server. + * + * @return the UUID associated with this Server + */ + public UUID getUuid() { + return uuid; + } + + /** + * Return the external InetSocketAddress of the site. + * + * @return the external InetSocketAddress of the site + * ('null' if it doesn't exist) + */ + public InetSocketAddress getSiteSocketAddress() { + return siteSocketAddress; + } + + /** + * This method may be called from any thread. + * + * @return 'true' if the this server is active, and 'false' if not + */ + public synchronized boolean isActive() { + return active; + } + + /** + * This method writes out the data associated with the current Server + * instance. + * + * @param os outout stream that should receive the data + */ + void writeServerData(DataOutputStream os) throws IOException { + // write out 16 byte UUID + Util.writeUuid(os, uuid); + + // write out 4 byte counter value + os.writeInt(count); + + // write out 16 byte MD5 checksum + // TBD: should this be implemented? + os.write(checksum == null ? new byte[16] : checksum); + + if (socketAddress != null) { + // write out socket address + os.writeByte(SOCKET_ADDRESS_TAG); + os.write(socketAddress.getAddress().getAddress(), 0, 4); + os.writeShort(socketAddress.getPort()); + } + + if (siteSocketAddress != null) { + // write out socket address + os.writeByte(SITE_SOCKET_ADDRESS_TAG); + os.write(siteSocketAddress.getAddress().getAddress(), 0, 4); + os.writeShort(siteSocketAddress.getPort()); + } + + os.writeByte(END_OF_PARAMETERS_TAG); + } + + /** + * Do any processing needed to create a new server. This method is invoked + * from the 'MainLoop' thread in every case except for the current server, + * in which case it is invoked in 'startup' prior to creating 'MainLoop'. + */ + private void newServer() { + Server failed = failedServers.get(uuid); + if (failed != null) { + // this one is on the failed list -- see if the counter has advanced + if ((count - failed.count) <= 0) { + // the counter has not advanced -- ignore + return; + } + + // the counter has advanced -- somehow, this server has returned + failedServers.remove(uuid); + synchronized (this) { + active = true; + } + logger.error("Server reawakened: {} ({})", uuid, socketAddress); + } + + lastUpdateTime = System.currentTimeMillis(); + servers.put(uuid, this); + updatedList.add(this); + + // notify list will need to be rebuilt + notifyList = null; + + if (socketAddress != null && this != thisServer) { + // initialize 'client' and 'target' fields + if (siteSocketAddress != null + && !siteSocketAddress.equals(thisServer.siteSocketAddress)) { + // destination is on a remote site + destSocketAddress = siteSocketAddress; + } else { + // destination is on the local site -- use direct addressing + destSocketAddress = socketAddress; + } + destName = socketAddressToName(destSocketAddress); + try { + // 'client' is used for REST messages to the destination + client = buildClient(uuid.toString(), destSocketAddress, destName); + + // initialize the 'target' field + target = getTarget(client); + } catch (KeyManagementException | NoSuchAlgorithmException + | NoSuchFieldException | IllegalAccessException + | ClassNotFoundException | HttpClientConfigException e) { + logger.error("Server.newServer: problems creating 'client'", e); + } + } + logger.info("New server: {} ({})", uuid, socketAddress); + for (Events listener : Events.getListeners()) { + listener.newServer(this); + } + } + + /** + * Check the server state in response to some issue. At present, only the + * 'destName' information is checked. + */ + private void checkServer() { + // recalculate 'destName' (we have seen DNS issues) + String newDestName = socketAddressToName(destSocketAddress); + if (newDestName.equals(destName)) { + return; + } + logger.warn("Remote host name for {} has changed from {} to {}", + destSocketAddress, destName, newDestName); + + // shut down old client, and rebuild + client.shutdown(); + client = null; + target = null; + + // update 'destName', and rebuild the client + destName = newDestName; + try { + // 'client' is used for REST messages to the destination + client = buildClient(uuid.toString(), destSocketAddress, destName); + + // initialize the 'target' field + target = getTarget(client); + } catch (KeyManagementException | NoSuchAlgorithmException + | NoSuchFieldException | IllegalAccessException + | ClassNotFoundException | HttpClientConfigException e) { + logger.error("Server.checkServer: problems recreating 'client'", e); + } + } + + /** + * Update server data. + * + * @param serverData this is a temporary 'Server' instance created from + * an incoming message, which is used to update fields within the + * 'Server' instance identified by 'this' + */ + private void updateServer(Server serverData) { + if (serverData.count > count) { + // an update has occurred + count = serverData.count; + + // TBD: calculate and verify checksum, more fields may be updated + + // adjust 'allowedGap' accordingly + long currentTime = System.currentTimeMillis(); + long gap = currentTime - lastUpdateTime; + + // adjust 'allowedGap' accordingly + // TBD: need properties to support overrides + gap = gap * 3 / 2 + adaptiveGapAdjust; + if (gap > allowedGap) { + // update 'allowedGap' immediately + allowedGap = gap; + } else { + // gradually pull the allowed gap down + // TBD: need properties to support overrides + allowedGap = (allowedGap * 15 + gap) / 16; + } + lastUpdateTime = currentTime; + + updatedList.add(this); + } + } + + /** + * a server has failed. + */ + private void serverFailed() { + // mark as inactive + synchronized (this) { + active = false; + } + + // remove it from the table + servers.remove(uuid); + + // add it to the failed servers table + failedServers.put(uuid, this); + + // clean up client information + if (client != null) { + client.shutdown(); + client = null; + target = null; + } + + // log an error message + logger.error("Server failure: {} ({})", uuid, socketAddress); + for (Events listener : Events.getListeners()) { + listener.serverFailed(this); + } + } + + /** + * Fetch, and possibily calculate, the "notify list" associated with this + * server. This is the list of servers to forward a server and bucket + * information to, and is approximately log2(n) in length, where 'n' is + * the total number of servers. + * It is calculated by starting with all of the servers sorted by UUID -- + * let's say the current server is at position 's'. The notify list will + * contain the server at positions: + * (s + 1) % n + * (s + 2) % n + * (s + 4) % n + * ... + * Using all powers of 2 less than 'n'. If the total server count is 50, + * this list has 6 entries. + * @return the notify list + */ + static Collection<Server> getNotifyList() { + // The 'notifyList' value is initially 'null', and it is reset to 'null' + // every time a new host joins, or one leaves. That way, it is marked for + // recalculation, but only when needed. + if (notifyList == null) { + // next index we are looking for + int dest = 1; + + // our current position in the Server table -- starting at 'thisServer' + UUID current = thisServer.uuid; + + // site socket address of 'current' + InetSocketAddress thisSiteSocketAddress = thisServer.siteSocketAddress; + + // hash set of all site socket addresses located + HashSet<InetSocketAddress> siteSocketAddresses = new HashSet<>(); + siteSocketAddresses.add(thisSiteSocketAddress); + + // the list we are building + notifyList = new LinkedList<Server>(); + + int index = 1; + for ( ; ; ) { + // move to the next key (UUID) -- if we hit the end of the table, + // wrap to the beginning + current = servers.higherKey(current); + if (current == null) { + current = servers.firstKey(); + } + if (current.equals(thisServer.uuid)) { + // we have looped through the entire list + break; + } + + // fetch associated server & site socket address + Server server = servers.get(current); + InetSocketAddress currentSiteSocketAddress = + server.siteSocketAddress; + + if (Objects.equals(thisSiteSocketAddress, + currentSiteSocketAddress)) { + // same site -- see if we should add this one + if (index == dest) { + // this is the next index we are looking for -- + // add the server + notifyList.add(server); + + // advance to the next offset (current-offset * 2) + dest = dest << 1; + } + index += 1; + } else if (!siteSocketAddresses.contains(currentSiteSocketAddress)) { + // we need at least one member from each site + notifyList.add(server); + siteSocketAddresses.add(currentSiteSocketAddress); + } + } + } + return notifyList; + } + + /** + * See if there is a host name associated with a destination socket address. + * + * @param dest the socket address of the destination + * @return the host name associated with the IP address, or the IP address + * if no associated host name is found. + */ + private static String socketAddressToName(InetSocketAddress dest) { + // destination IP address + InetAddress inetAddress = dest.getAddress(); + String destName = null; + + // go through the 'hostList' to see if there is a matching name + for (String hostName : hostList) { + try { + if (inetAddress.equals(InetAddress.getByName(hostName))) { + // this one matches -- use the name instead of the IP address + destName = hostName; + break; + } + } catch (UnknownHostException e) { + logger.debug("Server.socketAddressToName error", e); + } + } + + // default name = string value of IP address + return destName == null ? inetAddress.getHostAddress() : destName; + } + + /** + * Create an 'HttpClient' instance for a particular host. + * + * @param name of the host (currently a UUID or host:port string) + * @param dest the socket address of the destination + * @param destName the string name to use for the destination + */ + static HttpClient buildClient(String name, InetSocketAddress dest, String destName) + throws KeyManagementException, NoSuchAlgorithmException, + ClassNotFoundException, HttpClientConfigException { + + return HttpClientFactoryInstance.getClientFactory().build( + BusTopicParams.builder() + .clientName(name) // name + .useHttps(useHttps) // https + .allowSelfSignedCerts(useSelfSignedCertificates)// selfSignedCerts + .hostname(destName) // host + .port(dest.getPort()) // port + .managed(false) // managed + .build()); + } + + /** + * Extract the 'WebTarget' information from the 'HttpClient'. + * + * @param client the associated HttpClient instance + * @return a WebTarget referring to the previously-specified 'baseUrl' + */ + static WebTarget getTarget(HttpClient client) + throws NoSuchFieldException, IllegalAccessException { + // need access to the internal field 'client' + // TBD: We need a way to get this information without reflection + Field field = client.getClass().getDeclaredField("client"); + field.setAccessible(true); + Client rsClient = (Client)field.get(client); + field.setAccessible(false); + + rsClient.property(ClientProperties.CONNECT_TIMEOUT, connectTimeout); + rsClient.property(ClientProperties.READ_TIMEOUT, readTimeout); + + // For performance reasons, the root 'WebTarget' is generated only once + // at initialization time for each remote host. + return rsClient.target(client.getBaseUrl()); + } + + /** + * This method may be invoked from any thread, and is used to send a + * message to the destination server associated with this 'Server' instance. + * + * @param path the path relative to the base URL + * @param entity the "request entity" containing the body of the + * HTTP POST request + */ + public void post(final String path, final Entity<?> entity) { + post(path, entity, null); + } + + /** + * This method may be invoked from any thread, and is used to send a + * message to the destination server associated with this 'Server' instance. + * + * @param path the path relative to the base URL + * @param entity the "request entity" containing the body of the + * HTTP POST request (if 'null', an HTTP GET is used instead) + * @param responseCallback if non-null, this callback may be used to + * modify the WebTarget, and/or receive the POST response message + */ + public void post(final String path, final Entity<?> entity, + PostResponse responseCallback) { + if (target == null) { + return; + } + + getThreadPool().execute(new Runnable() { + /** + * This method is running within the 'MainLoop' thread. + */ + @Override + public void run() { + try { + WebTarget webTarget = target.path(path); + if (responseCallback != null) { + // give callback a chance to modify 'WebTarget' + webTarget = responseCallback.webTarget(webTarget); + + // send the response to the callback + Response response; + if (entity == null) { + response = webTarget.request().get(); + } else { + response = webTarget.request().post(entity); + } + responseCallback.response(response); + } else { + // just do the invoke, and ignore the response + if (entity == null) { + webTarget.request().get(); + } else { + webTarget.request().post(entity); + } + } + } catch (Exception e) { + logger.error("Failed to send to {} ({}, {})", + uuid, destSocketAddress, destName); + responseCallback.exceptionResponse(e); + MainLoop.queueWork(new Runnable() { + @Override + public void run() { + // the DNS cache may have been out-of-date when this server + // was first contacted -- fix the problem, if needed + checkServer(); + } + }); + } + } + }); + } + + /** + * This method may be invoked from any thread. + * + * @return the 'ThreadPoolExecutor' associated with this server + */ + public synchronized ThreadPoolExecutor getThreadPool() { + if (sendThreadPool == null) { + // build a thread pool for this Server + sendThreadPool = + new ThreadPoolExecutor(corePoolSize, maximumPoolSize, + keepAliveTime, TimeUnit.MILLISECONDS, + new LinkedTransferQueue<Runnable>()); + sendThreadPool.allowCoreThreadTimeOut(true); + } + return sendThreadPool; + } + + /** + * Lower-level method supporting HTTP, which requires that the caller's + * thread tolerate blocking. This method may be called from any thread. + * + * @param path the path relative to the base URL + * @return a 'WebTarget' instance pointing to this path + */ + public WebTarget getWebTarget(String path) { + return target == null ? null : target.path(path); + } + + /** + * This method may be invoked from any thread, but its real intent is + * to decode an incoming 'admin' message (which is Base-64-encoded), + * and send it to the 'MainLoop' thread for processing. + * + * @param data the base-64-encoded data + */ + static void adminRequest(byte[] data) { + final byte[] packet = Base64.getDecoder().decode(data); + Runnable task = () -> { + try { + ByteArrayInputStream bis = new ByteArrayInputStream(packet); + DataInputStream dis = new DataInputStream(bis); + + while (dis.available() != 0) { + Server serverData = new Server(dis); + + // TBD: Compare with current server + + Server server = servers.get(serverData.uuid); + if (server == null) { + serverData.newServer(); + } else { + server.updateServer(serverData); + } + } + } catch (Exception e) { + logger.error("Server.adminRequest: can't decode packet", e); + } + }; + MainLoop.queueWork(task); + } + + /** + * Send out information about servers 'updatedList' to all servers + * in 'notifyList' (may need to build or rebuild 'notifyList'). + */ + static void sendOutData() throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos); + + // include 'thisServer' in the data -- first, advance the count + if ((thisServer.count += 1) == 0) { + /* + * counter wrapped (0 is a special case); + * actually, we could probably leave this out, because it would take + * more than a century to wrap if the increment is 1 second + */ + thisServer.count = 1; + } + thisServer.lastUpdateTime = System.currentTimeMillis(); + thisServer.writeServerData(dos); + + // include all hosts in the updated list + for (Server server : updatedList) { + server.writeServerData(dos); + } + updatedList.clear(); + + // create an 'Entity' that can be sent out to all hosts in the notify list + Entity<String> entity = Entity.entity( + new String(Base64.getEncoder().encode(bos.toByteArray()), StandardCharsets.UTF_8), + MediaType.APPLICATION_OCTET_STREAM_TYPE); + for (Server server : getNotifyList()) { + server.post("admin", entity); + } + } + + /** + * Search for servers which have taken too long to respond. + */ + static void searchForFailedServers() { + long currentTime = System.currentTimeMillis(); + + // used to build a list of newly-failed servers + LinkedList<Server> failed = new LinkedList<>(); + for (Server server : servers.values()) { + if (server == thisServer) { + continue; + } + long gap = currentTime - server.lastUpdateTime; + if (gap > server.allowedGap) { + // add it to the failed list -- we don't call 'serverFailed' yet, + // because this updates the server list, and leads to a + // 'ConcurrentModificationException' + failed.add(server); + } + } + + // remove servers from our list + if (!failed.isEmpty()) { + for (Server server : failed) { + server.serverFailed(); + } + notifyList = null; + } + } + + /** + * This method may be invoked from any thread: + * Send information about 'thisServer' to the list of hosts. + * + * @param out the 'PrintStream' to use for displaying information + * @param hosts a comma-separated list of entries containing + * 'host:port' or just 'port' (current host is implied in this case) + */ + static void pingHosts(PrintStream out, String hosts) { + LinkedList<InetSocketAddress> addresses = new LinkedList<>(); + boolean error = false; + + for (String host : hosts.split(",")) { + try { + String[] segs = host.split(":"); + + switch (segs.length) { + case 1: + addresses.add(new InetSocketAddress(InetAddress.getLocalHost(), + Integer.parseInt(segs[0]))); + break; + case 2: + addresses.add(new InetSocketAddress(segs[0], + Integer.parseInt(segs[1]))); + break; + default: + out.println(host + ": Invalid host/port value"); + error = true; + break; + } + } catch (NumberFormatException e) { + out.println(host + ": Invalid port value"); + logger.error("Server.pingHosts error", e); + error = true; + } catch (UnknownHostException e) { + out.println(host + ": Unknown host"); + logger.error("Server.pingHosts error", e); + error = true; + } + } + if (!error) { + pingHosts(out, addresses); + } + } + + /** + * This method may be invoked from any thread: + * Send information about 'thisServer' to the list of hosts. + * + * @param out the 'PrintStream' to use for displaying information + * @param hosts a collection of 'InetSocketAddress' instances, which are + * the hosts to send the information to + */ + static void pingHosts(final PrintStream out, + final Collection<InetSocketAddress> hosts) { + FutureTask<Integer> ft = new FutureTask<>(new Callable<Integer>() { + @Override + public Integer call() { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos); + + // add information for this server only + try { + thisServer.writeServerData(dos); + + // create an 'Entity' that can be sent out to all hosts + Entity<String> entity = Entity.entity( + new String(Base64.getEncoder().encode(bos.toByteArray()), + StandardCharsets.UTF_8), + MediaType.APPLICATION_OCTET_STREAM_TYPE); + + // loop through hosts + for (InetSocketAddress host : hosts) { + HttpClient client = null; + + try { + client = buildClient(host.toString(), host, + socketAddressToName(host)); + getTarget(client).path("admin").request().post(entity); + client.shutdown(); + client = null; + } catch (KeyManagementException | NoSuchAlgorithmException e) { + out.println(host + ": Unable to create client connection"); + logger.error("Server.pingHosts error", e); + } catch (NoSuchFieldException | IllegalAccessException e) { + out.println(host + ": Unable to get link to target"); + logger.error("Server.pingHosts error", e); + } catch (Exception e) { + out.println(host + ": " + e); + logger.error("Server.pingHosts error", e); + } + if (client != null) { + client.shutdown(); + } + } + } catch (IOException e) { + out.println("Unable to generate 'ping' data: " + e); + logger.error("Server.pingHosts error", e); + } + return 0; + } + }); + + MainLoop.queueWork(ft); + try { + ft.get(60, TimeUnit.SECONDS); + } catch (InterruptedException | ExecutionException | TimeoutException e) { + logger.error("Server.pingHosts: error waiting for queued work", e); + } + } + + /** + * This method may be invoked from any thread: + * Dump out the current 'servers' table in a human-readable table form. + * + * @param out the 'PrintStream' to dump the table to + */ + public static void dumpHosts(final PrintStream out) { + FutureTask<Integer> ft = new FutureTask<Integer>(new Callable<Integer>() { + public Integer call() { + dumpHostsInternal(out); + return 0; + } + }); + MainLoop.queueWork(ft); + try { + ft.get(60, TimeUnit.SECONDS); + } catch (InterruptedException | ExecutionException | TimeoutException e) { + logger.error("Server.dumpHosts: error waiting for queued work", e); + } + } + + /** + * Dump out the current 'servers' table in a human-readable table form. + * + * @param out the 'PrintStream' to dump the table to + */ + private static void dumpHostsInternal(PrintStream out) { + // modifications to 'servers.values()' and 'notifyList'. + HashSet<Server> localNotifyList = new HashSet<>(getNotifyList()); + + // see if we have any site information + boolean siteData = false; + for (Server server : servers.values()) { + if (server.siteSocketAddress != null) { + siteData = true; + break; + } + } + + String format = "%1s %-36s %-15s %5s %5s %12s %7s %7s\n"; + SimpleDateFormat dateFormat = new SimpleDateFormat("kk:mm:ss.SSS"); + + if (siteData) { + format = "%1s %-36s %-15s %5s %-15s %5s %5s %12s %7s %7s\n"; + // @formatter:off + out.printf(format, "", "UUID", "IP Address", "Port", + "Site IP Address", "Port", + "Count", "Update Time", "Elapsed", "Allowed"); + out.printf(format, "", "----", "----------", "----", + "---------------", "----", + "-----", "-----------", "-------", "-------"); + // @formatter:on + } else { + // @formatter:off + out.printf(format, "", "UUID", "IP Address", "Port", + "Count", "Update Time", "Elapsed", "Allowed"); + out.printf(format, "", "----", "----------", "----", + "-----", "-----------", "-------", "-------"); + // @formatter:on + } + + long currentTime = System.currentTimeMillis(); + for (Server server : servers.values()) { + String thisOne = ""; + + if (server == thisServer) { + thisOne = "*"; + } else if (localNotifyList.contains(server)) { + thisOne = "n"; + } + /* + else if (newHosts.contains(server)) + { + thisOne = "N"; + } + */ + + if (siteData) { + String siteIp = ""; + String sitePort = ""; + if (server.siteSocketAddress != null) { + siteIp = + server.siteSocketAddress.getAddress().getHostAddress(); + sitePort = String.valueOf(server.siteSocketAddress.getPort()); + } + + out.printf(format, thisOne, server.uuid, + server.socketAddress.getAddress().getHostAddress(), + server.socketAddress.getPort(), + siteIp, sitePort, server.count, + dateFormat.format(new Date(server.lastUpdateTime)), + currentTime - server.lastUpdateTime, + server.allowedGap); + } else { + out.printf(format, thisOne, server.uuid, + server.socketAddress.getAddress().getHostAddress(), + server.socketAddress.getPort(), server.count, + dateFormat.format(new Date(server.lastUpdateTime)), + currentTime - server.lastUpdateTime, + server.allowedGap); + } + } + out.println("Count: " + servers.size()); + } + + /* ============================================================ */ + + /** + * This interface supports the 'post' method, and provides the opportunity + * to change the WebTarget and/or receive the POST response message. + */ + interface PostResponse { + /** + * Callback that can be used to modify 'WebTarget', and do things like + * add query parameters. + * + * @param webTarget the current WebTarget + * @return the updated WebTarget + */ + public default WebTarget webTarget(WebTarget webTarget) { + return webTarget; + } + + /** + * Callback that passes the POST response. + * + * @param response the POST response + */ + public default void response(Response response) { + } + + /** + * Callback that passes the POST exception response. + * + */ + public default void exceptionResponse(Exception exception) { + Response.ResponseBuilder response; + response = Response.serverError(); + this.response(response.build()); + } + } +} diff --git a/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/ServerPoolApi.java b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/ServerPoolApi.java new file mode 100644 index 00000000..c6337749 --- /dev/null +++ b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/ServerPoolApi.java @@ -0,0 +1,79 @@ +/* + * ============LICENSE_START======================================================= + * feature-server-pool + * ================================================================================ + * Copyright (C) 2020 AT&T Intellectual Property. All rights reserved. + * ================================================================================ + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============LICENSE_END========================================================= + */ + +package org.onap.policy.drools.serverpool; + +import java.util.Collection; + +import org.onap.policy.common.utils.services.OrderedService; +import org.onap.policy.common.utils.services.OrderedServiceImpl; + +public interface ServerPoolApi extends OrderedService { + /** + * 'ServerPoolApi.impl.getList()' returns an ordered list of objects + * implementing the 'ServerPoolApi' interface. + */ + public static OrderedServiceImpl<ServerPoolApi> impl = + new OrderedServiceImpl<>(ServerPoolApi.class); + + /** + * method gives all of the listening features the ability to add + * classes to the 'HttpServletServer'. + * + * @return a Collection of classes implementing REST methods + */ + public default Collection<Class<?>> servletClasses() { + return null; + } + + /** + * This is called in the case where no bucket migration data was received + * from the old owner of the bucket (such as if the old owner failed). + * It gives one or more features the opportunity to do the restore. + * + * @param bucket the bucket that needs restoring + */ + public default void restoreBucket(Bucket bucket) { + } + + /** + * This is called whenever a 'GlobalLocks' object is updated. It was added + * in order to support persistence, but may be used elsewhere as well. + * + * @param bucket the bucket containing the 'GlobalLocks' adjunct + * @param globalLocks the 'GlobalLocks' adjunct + */ + public default void lockUpdate(Bucket bucket, TargetLock.GlobalLocks globalLocks) { + } + + /** + * This is called when the state of a bucket has changed, but is currently + * stable, and it gives features the ability to do an audit. The intent is + * to make sure that the adjunct state is correct; in particular, to remove + * adjuncts that should no longer be there based upon the current state. + * Note that this method is called while being synchronized on the bucket. + * + * @param bucket the bucket to audit + * @param isOwner 'true' if the current host owns the bucket + * @param isBackup 'true' if the current host is a backup for the bucket + */ + public default void auditBucket(Bucket bucket, boolean isOwner, boolean isBackup) { + } +} diff --git a/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/ServerPoolProperties.java b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/ServerPoolProperties.java new file mode 100644 index 00000000..fb6a791e --- /dev/null +++ b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/ServerPoolProperties.java @@ -0,0 +1,332 @@ +/* + * ============LICENSE_START======================================================= + * feature-server-pool + * ================================================================================ + * Copyright (C) 2020 AT&T Intellectual Property. All rights reserved. + * ================================================================================ + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============LICENSE_END========================================================= + */ + +package org.onap.policy.drools.serverpool; + +import java.util.Properties; + +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ServerPoolProperties { + // 'Server' port listener + public static final String SERVER_IP_ADDRESS = "server.pool.server.ipAddress"; + public static final String SERVER_PORT = "server.pool.server.port"; + public static final String SERVER_HTTPS = "server.pool.server.https"; + public static final String SERVER_SELF_SIGNED_CERTIFICATES = + "server.pool.server.selfSignedCerts"; + + // 'site' information + public static final String SITE_IP_ADDRESS = "server.pool.server.site.ip"; + public static final String SITE_PORT = "server.pool.server.site.port"; + + // the default is to listen to all IP addresses on the host + public static final String DEFAULT_SERVER_IP_ADDRESS = "0.0.0.0"; + + // the default is to dynamically select a port + public static final int DEFAULT_SERVER_PORT = 0; + + // the default is to have HTTPS disabled + public static final boolean DEFAULT_HTTPS = false; + + // the default is to not use self-signed certificates + public static final boolean DEFAULT_SELF_SIGNED_CERTIFICATES = false; + + // list of remote server names to use in HTTP/HTTPS messages + // (instead of host names) + public static final String HOST_LIST = "server.pool.server.hostlist"; + + // 'Server' timeouts + public static final String SERVER_INITIAL_ALLOWED_GAP = "server.pool.server.allowedGap"; + public static final String SERVER_ADAPTIVE_GAP_ADJUST = + "server.adaptiveGapAdjust"; + public static final String SERVER_CONNECT_TIMEOUT = "server.pool.server.connectTimeout"; + public static final String SERVER_READ_TIMEOUT = "server.pool.server.readTimeout"; + + // at startup, initially allow 30 seconds between pings + public static final long DEFAULT_SERVER_INITIAL_ALLOWED_GAP = 30000; + + // when doing the adaptive calculation of the allowed gap between pings, + // adjust the time by adding 5 seconds (by default) + public static final long DEFAULT_SERVER_ADAPTIVE_GAP_ADJUST = 5000; + + // the default is to allow 10 seconds for a TCP connect + public static final long DEFAULT_SERVER_CONNECT_TIMEOUT = 10000; + + // the default is to allow 10 seconds for a TCP read response + public static final long DEFAULT_SERVER_READ_TIMEOUT = 10000; + + // outgoing per-server thread pool parameters + public static final String SERVER_THREADS_CORE_POOL_SIZE = + "server.pool.server.threads.corePoolSize"; + public static final String SERVER_THREADS_MAXIMUM_POOL_SIZE = + "server.pool.server.threads.maximumPoolSize"; + public static final String SERVER_THREADS_KEEP_ALIVE_TIME = + "server.pool.server.threads.keepAliveTime"; + + public static final int DEFAULT_SERVER_THREADS_CORE_POOL_SIZE = 5; + public static final int DEFAULT_SERVER_THREADS_MAXIMUM_POOL_SIZE = 10; + public static final long DEFAULT_SERVER_THREADS_KEEP_ALIVE_TIME = 5000; + + /*================*/ + /* Host Discovery */ + /*================*/ + + public static final String DISCOVERY_SERVERS = "server.pool.discovery.servers"; + public static final String DISCOVERY_TOPIC = "server.pool.discovery.topic"; + + // HTTP authentication + public static final String DISCOVERY_USERNAME = "server.pool.discovery.username"; + public static final String DISCOVERY_PASSWORD = "server.pool.discovery.password"; + + // Cambria authentication + public static final String DISCOVERY_API_KEY = "server.pool.discovery.apiKey"; + public static final String DISCOVERY_API_SECRET = "server.pool.discovery.apiSecret"; + + // timeouts + public static final String DISCOVERY_FETCH_TIMEOUT = + "server.pool.discovery.fetchTimeout"; + + // this value is passed to the UEB/DMAAP server, and controls how long + // a 'fetch' request will wait when there are no incoming messages + public static final String DEFAULT_DISCOVERY_FETCH_TIMEOUT = "60000"; + + // maximum message fetch limit + public static final String DISCOVERY_FETCH_LIMIT = "server.pool.discovery.fetchLimit"; + + // this value is passed to the UEB/DMAAP server, and controls how many + // requests may be returned in a single fetch + public static final String DEFAULT_DISCOVERY_FETCH_LIMIT = "100"; + + // publisher thread cycle time + public static final String DISCOVER_PUBLISHER_LOOP_CYCLE_TIME = + "discovery.publisherLoopCycleTime"; + + // default cycle time is 5 seconds + public static final long DEFAULT_DISCOVER_PUBLISHER_LOOP_CYCLE_TIME = 5000; + + // encryption + public static final String DISCOVERY_HTTPS = "server.pool.discovery.https"; + public static final String DISCOVERY_ALLOW_SELF_SIGNED_CERTIFICATES = + "server.pool.discovery.selfSignedCertificates"; + + /*============================*/ + /* Leader Election Parameters */ + /*============================*/ + + public static final String LEADER_STABLE_IDLE_CYCLES = + "server.pool.leader.stableIdleCycles"; + public static final String LEADER_STABLE_VOTING_CYCLES = + "server.pool.leader.stableVotingCycles"; + + // by default, wait for 5 cycles (seconds) of stability before voting starts + public static final int DEFAULT_LEADER_STABLE_IDLE_CYCLES = 5; + + // by default, wait for 5 cycles of stability before declaring a winner + public static final int DEFAULT_LEADER_STABLE_VOTING_CYCLES = 5; + + /*=====================*/ + /* MainLoop Parameters */ + /*=====================*/ + + public static final String MAINLOOP_CYCLE = "server.pool.mainLoop.cycle"; + + // by default, the main loop cycle is 1 second + public static final long DEFAULT_MAINLOOP_CYCLE = 1000; + + /*=============================*/ + /* Bucket Migration Parameters */ + /*=============================*/ + + // time-to-live controls how many hops a 'TargetLock' message can take + public static final String BUCKET_TIME_TO_LIVE = "bucket.ttl"; + + // bucket migration timeout when a server has been notified that it + // is the new owner of the bucket + public static final String BUCKET_CONFIRMED_TIMEOUT = + "bucket.confirmed.timeout"; + + // bucket migration timeout when a server has inferred that it may be + // the new owner, but it hasn't yet been confirmed + public static final String BUCKET_UNCONFIRMED_TIMEOUT = + "bucket.unconfirmed.timeout"; + + // timeout for operation run within a Drools session + public static final String BUCKET_DROOLS_TIMEOUT = + "bucket.drools.timeout"; + + // when a new owner of a bucket has completed the takeover of the + // bucket, but it hasn't yet been confirmed, there is an additional + // grace period before leaving the 'NewOwner' state + public static final String BUCKET_UNCONFIRMED_GRACE_PERIOD = + "bucket.unconfirmed.graceperiod"; + + // time-to-live = 5 hops + public static final int DEFAULT_BUCKET_TIME_TO_LIVE = 5; + + // 30 seconds timeout if it has been confirmed that we are the new owner + public static final long DEFAULT_BUCKET_CONFIRMED_TIMEOUT = 30000; + + // 10 seconds timeout if it has not been confirmed that we are the new owner + public static final long DEFAULT_BUCKET_UNCONFIRMED_TIMEOUT = 10000; + + // 10 seconds timeout waiting for a drools operation to complete + public static final long DEFAULT_BUCKET_DROOLS_TIMEOUT = 10000; + + // 10 seconds timeout waiting to be confirmed that we are the new owner + public static final long DEFAULT_BUCKET_UNCONFIRMED_GRACE_PERIOD = 10000; + + /*=======================*/ + /* TargetLock Parameters */ + /*=======================*/ + + // time-to-live controls how many hops a 'TargetLock' message can take + public static final String LOCK_TIME_TO_LIVE = "lock.ttl"; + + // how frequently should the audit run? + public static final String LOCK_AUDIT_PERIOD = "lock.audit.period"; + + // when the audit is rescheduled (e.g. due to a new server joining), this + // is the initial grace period, to allow time for bucket assignments, etc. + public static final String LOCK_AUDIT_GRACE_PERIOD = + "lock.audit.gracePeriod"; + + // there may be audit mismatches detected that are only due to the transient + // nature of the lock state -- we check the mismatches on both sides after + // this delay to see if we are still out-of-sync + public static final String LOCK_AUDIT_RETRY_DELAY = "lock.audit.retryDelay"; + + // time-to-live = 5 hops + public static final int DEFAULT_LOCK_TIME_TO_LIVE = 5; + + // run the audit every 5 minutes + public static final long DEFAULT_LOCK_AUDIT_PERIOD = 300000; + + // wait at least 60 seconds after an event before running the audit + public static final long DEFAULT_LOCK_AUDIT_GRACE_PERIOD = 60000; + + // wait 5 seconds to see if the mismatches still exist + public static final long DEFAULT_LOCK_AUDIT_RETRY_DELAY = 5000; + + /* ============================================================ */ + + private static Logger logger = + LoggerFactory.getLogger(ServerPoolProperties.class); + + // save initial set of properties + private static Properties properties = new Properties(); + + /** + * Store the application properties values. + * + * @param properties the properties to save + */ + public static void setProperties(Properties properties) { + ServerPoolProperties.properties = properties; + } + + /** + * Return the properties used when starting this server. + * + * @return the properties used when starting this server. + */ + public static Properties getProperties() { + return properties; + } + + /** + * Convenience method to fetch a 'long' property. + * + * @param name the property name + * @param defaultValue the value to use if the property is not defined, + * or has an illegal value + * @return the property value + */ + public static long getProperty(String name, long defaultValue) { + long rval = defaultValue; + String value = properties.getProperty(name); + if (StringUtils.isNotBlank(value)) { + // try to convert to a 'long' -- log a message in case of failure + try { + rval = Long.parseLong(value); + } catch (NumberFormatException e) { + logger.error("Property {}=\"{}\": illegal long -- " + + "using default of {}", name, value, defaultValue); + } + } + return rval; + } + + /** + * Convenience method to fetch an 'int' property. + * + * @param name the property name + * @param defaultValue the value to use if the property is not defined, + * or has an illegal value + * @return the property value + */ + public static int getProperty(String name, int defaultValue) { + int rval = defaultValue; + String value = properties.getProperty(name); + if (StringUtils.isNotBlank(value)) { + // try to convert to an 'int' -- log a message in case of failure + try { + rval = Integer.parseInt(value); + } catch (NumberFormatException e) { + logger.error("Property {}=\"{}\": illegal int -- " + + "using default of {}", name, value, defaultValue); + } + } + return rval; + } + + /** + * Convenience method to fetch a 'boolean' property. + * + * @param name the property name + * @param defaultValue the value to use if the property is not defined, + * or has an illegal value + * @return the property value + */ + public static boolean getProperty(String name, boolean defaultValue) { + boolean rval = defaultValue; + String value = properties.getProperty(name); + if (StringUtils.isNotBlank(value)) { + // try to convert to an 'boolean' -- log a message in case of failure + rval = Boolean.parseBoolean(value); + } + return rval; + } + + /** + * Convenience method to fetch a 'String' property + * (provided for consistency with 'long' and 'int' versions). + * + * @param name the property name + * @param defaultValue the value to use if the property is not defined, + * or has an illegal value + * @return the property value + */ + public static String getProperty(String name, String defaultValue) { + String value = properties.getProperty(name); + return (StringUtils.isNotBlank(value)) ? value : defaultValue; + } +} diff --git a/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/TargetLock.java b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/TargetLock.java new file mode 100644 index 00000000..7e4b795f --- /dev/null +++ b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/TargetLock.java @@ -0,0 +1,2821 @@ +/* + * ============LICENSE_START======================================================= + * feature-server-pool + * ================================================================================ + * Copyright (C) 2020 AT&T Intellectual Property. All rights reserved. + * ================================================================================ + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============LICENSE_END========================================================= + */ + +package org.onap.policy.drools.serverpool; + +import static org.junit.Assert.assertTrue; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_LOCK_AUDIT_GRACE_PERIOD; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_LOCK_AUDIT_PERIOD; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_LOCK_AUDIT_RETRY_DELAY; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.DEFAULT_LOCK_TIME_TO_LIVE; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.LOCK_AUDIT_GRACE_PERIOD; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.LOCK_AUDIT_PERIOD; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.LOCK_AUDIT_RETRY_DELAY; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.LOCK_TIME_TO_LIVE; +import static org.onap.policy.drools.serverpool.ServerPoolProperties.getProperty; + +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.PrintStream; +import java.io.Serializable; +import java.lang.ref.Reference; +import java.lang.ref.ReferenceQueue; +import java.lang.ref.WeakReference; +import java.util.ArrayList; +import java.util.Base64; +import java.util.Collection; +import java.util.Date; +import java.util.HashMap; +import java.util.IdentityHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Queue; +import java.util.TimerTask; +import java.util.TreeMap; +import java.util.UUID; +import java.util.concurrent.LinkedTransferQueue; +import java.util.concurrent.TimeUnit; +import javax.ws.rs.client.Entity; +import javax.ws.rs.client.WebTarget; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; + +import lombok.NonNull; +import org.onap.policy.drools.core.DroolsRunnable; +import org.onap.policy.drools.core.PolicyContainer; +import org.onap.policy.drools.core.PolicySession; +import org.onap.policy.drools.core.lock.Lock; +import org.onap.policy.drools.core.lock.LockCallback; +import org.onap.policy.drools.core.lock.PolicyResourceLockManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class provides a locking mechanism based upon a string key that + * identifies the lock, and another string key that identifies the owner. + * The existence of the 'TargetLock' instance doesn't mean that the + * corresponding lock has been acquired -- this is only the case if the + * instance is in the 'ACTIVE' state. + * A lock in the ACTIVE or WAITING state exists in two sets of tables, + * which may be on different hosts: + * LocalLocks - these two tables are associated with the owner key of the + * lock. They are in an adjunct to the bucket associated with this key, + * and the bucket is owned by the host containing the entry. + * GlobalLocks - this table is associated with the lock key. It is an + * adjunct to the bucket associated with this key, and the bucket is + * owned by the host containing the entry. + */ +public class TargetLock implements Lock, Serializable { + private static Logger logger = LoggerFactory.getLogger(TargetLock.class); + + // Listener class to handle state changes that require restarting the audit + private static EventHandler eventHandler = new EventHandler(); + + static { + // register Listener class + Events.register(eventHandler); + } + + // this is used to locate ACTIVE 'TargetLock' instances that have been + // abandoned -- as the GC cleans up the 'WeakReference' instances referring + // to these locks, we use that information to clean them up + private static ReferenceQueue<TargetLock> abandoned = new ReferenceQueue<>(); + + // some status codes + static final int ACCEPTED = 202; //Response.Status.ACCEPTED.getStatusCode(); + static final int NO_CONTENT = 204; //Response.Status.NO_CONTENT.getStatusCode(); + static final int LOCKED = 423; + + // Values extracted from properties + + private static String timeToLive; + private static long auditPeriod; + private static long auditGracePeriod; + private static long auditRetryDelay; + + // lock states: + // WAITING - in line to acquire the lock + // ACTIVE - currently holding the lock + // FREE - WAITING/ACTIVE locks that were explicitly freed + // LOST - could occur when a de-serialized ACTIVE lock can't be made + // ACTIVE because there is already an ACTIVE holder of the lock + public enum State { + WAITING, ACTIVE, FREE, LOST + } + + // this contains information that is placed in the 'LocalLocks' tables, + // and has a one-to-one correspondence with the 'TargetLock' instance + private Identity identity; + + // this is the only field that can change after initialization + private State state; + + // this is used to notify the application when a lock is available, + // or if it is not available + private LockCallback owner; + + // This is what is actually called by the infrastructure to do the owner + // notification. The owner may be running in a Drools session, in which case + // the actual notification should be done within that thread -- the 'context' + // object ensures that it happens this way. + private LockCallback context; + + /** + * This method triggers registration of 'eventHandler', and also extracts + * property values. + */ + static void startup() { + int intTimeToLive = + getProperty(LOCK_TIME_TO_LIVE, DEFAULT_LOCK_TIME_TO_LIVE); + timeToLive = String.valueOf(intTimeToLive); + auditPeriod = getProperty(LOCK_AUDIT_PERIOD, DEFAULT_LOCK_AUDIT_PERIOD); + auditGracePeriod = + getProperty(LOCK_AUDIT_GRACE_PERIOD, DEFAULT_LOCK_AUDIT_GRACE_PERIOD); + auditRetryDelay = + getProperty(LOCK_AUDIT_RETRY_DELAY, DEFAULT_LOCK_AUDIT_RETRY_DELAY); + } + + /** + * Shutdown threads. + */ + static void shutdown() { + AbandonedHandler ah = abandonedHandler; + + if (ah != null) { + abandonedHandler = null; + ah.interrupt(); + } + } + + /** + * Constructor - initializes the 'TargetLock' instance, and tries to go + * ACTIVE. The lock is initially placed in the WAITING state, and the owner + * and the owner will be notified when the success or failure of the lock + * attempt is determined. + * + * @param key string key identifying the lock + * @param ownerKey string key identifying the owner, which must hash to + * a bucket owned by the current host (it is typically a 'RequestID') + * @param owner owner of the lock (will be notified when going from + * WAITING to ACTIVE) + */ + public TargetLock(String key, String ownerKey, LockCallback owner) { + this(key, ownerKey, owner, true); + } + + /** + * Constructor - initializes the 'TargetLock' instance, and tries to go + * ACTIVE. The lock is initially placed in the WAITING state, and the owner + * and the owner will be notified when the success or failure of the lock + * attempt is determined. + * + * @param key string key identifying the lock + * @param ownerKey string key identifying the owner, which must hash to + * a bucket owned by the current host (it is typically a 'RequestID') + * @param owner owner of the lock (will be notified when going from + * WAITING to ACTIVE) + * @param waitForLock this controls the behavior when 'key' is already + * locked - 'true' means wait for it to be freed, 'false' means fail + */ + public TargetLock(final String key, final String ownerKey, + final LockCallback owner, final boolean waitForLock) { + if (key == null) { + throw(new IllegalArgumentException("TargetLock: 'key' can't be null")); + } + if (ownerKey == null) { + throw(new IllegalArgumentException("TargetLock: 'ownerKey' can't be null")); + } + if (!Bucket.isKeyOnThisServer(ownerKey)) { + // associated bucket is assigned to a different server + throw(new IllegalArgumentException("TargetLock: 'ownerKey=" + ownerKey + + "' not currently assigned to this server")); + } + if (owner == null) { + throw(new IllegalArgumentException("TargetLock: 'owner' can't be null")); + } + identity = new Identity(key, ownerKey); + state = State.WAITING; + this.owner = owner; + + // determine the context + PolicySession session = PolicySession.getCurrentSession(); + if (session != null) { + // deliver through a 'PolicySessionContext' class + Object lcontext = session.getAdjunct(PolicySessionContext.class); + if (lcontext == null || !(lcontext instanceof LockCallback)) { + context = new PolicySessionContext(session); + session.setAdjunct(PolicySessionContext.class, context); + } else { + context = (LockCallback)lcontext; + } + } else { + // no context to deliver through -- call back directly to owner + context = owner; + } + + // update 'LocalLocks' tables + final WeakReference<TargetLock> wr = new WeakReference<>(this, abandoned); + final LocalLocks localLocks = LocalLocks.get(ownerKey); + + synchronized (localLocks) { + localLocks.weakReferenceToIdentity.put(wr, identity); + localLocks.uuidToWeakReference.put(identity.uuid, wr); + } + + // The associated 'GlobalLocks' table may or may not be on a different + // host. Also, the following call may queue the message for later + // processing if the bucket is in a transient state. + Bucket.forwardAndProcess(key, new Bucket.Message() { + /** + * {@inheritDoc} + */ + @Override + public void process() { + // 'GlobalLocks' is on the same host + State newState = GlobalLocks.get(key).lock(key, ownerKey, identity.uuid, waitForLock); + logger.info("Lock lock request: key={}, owner={}, uuid={}, wait={} (resp={})", + key, ownerKey, identity.uuid, waitForLock, state); + + // The lock may now be ACTIVE, FREE, or WAITING -- we can notify + // the owner of the result now for ACTIVE or FREE. Also, the callback + // may occur while the constructor is still on the stack, although + // this won't happen in a Drools session. + setState(newState); + switch (newState) { + case ACTIVE: + // lock was successful - send notification + context.lockAvailable(TargetLock.this); + break; + case FREE: + // lock attempt failed - + // clean up local tables, and send notification + synchronized (localLocks) { + localLocks.weakReferenceToIdentity.remove(wr); + localLocks.uuidToWeakReference.remove(identity.uuid); + } + wr.clear(); + context.lockUnavailable(TargetLock.this); + break; + + case WAITING: + break; + + default: + logger.error("Unknown state: {}", newState); + break; + } + } + + /** + * {@inheritDoc} + */ + @Override + public void sendToServer(Server server, int bucketNumber) { + // actual lock is on a remote host -- send the request as + // a REST message + logger.info("Sending lock request to {}: key={}, owner={}, uuid={}, wait={}", + server, key, ownerKey, identity.uuid, waitForLock); + server.post("lock/lock", null, new Server.PostResponse() { + /** + * {@inheritDoc} + */ + @Override + public WebTarget webTarget(WebTarget webTarget) { + return webTarget + .queryParam("key", key) + .queryParam("owner", ownerKey) + .queryParam("uuid", identity.uuid.toString()) + .queryParam("wait", waitForLock) + .queryParam("ttl", timeToLive); + } + + /** + * {@inheritDoc} + */ + @Override + public void response(Response response) { + logger.info("Lock response={} (code={})", + response, response.getStatus()); + + /** + * there are three possible responses: + * 204 No Content - operation was successful + * 202 Accepted - operation is still in progress + * 423 (Locked) - lock in use, and 'waitForLock' is 'false' + */ + switch (response.getStatus()) { + case NO_CONTENT: { + // lock successful + setState(State.ACTIVE); + context.lockAvailable(TargetLock.this); + break; + } + + case LOCKED: { + // failed -- lock in use, and 'waitForLock == false' + setState(State.FREE); + synchronized (localLocks) { + localLocks.weakReferenceToIdentity.remove(wr); + localLocks.uuidToWeakReference.remove(identity.uuid); + } + wr.clear(); + context.lockUnavailable(TargetLock.this); + break; + } + + case ACCEPTED: + break; + + default: + logger.error("Unknown status: ", response.getStatus()); + break; + } + } + }); + } + }); + } + + /********************/ + /* 'Lock' Interface */ + /********************/ + + /** + * This method will free the current lock, or remove it from the waiting + * list if a response is pending. + * + * @return 'true' if successful, 'false' if it was not locked, or there + * appears to be corruption in 'LocalLocks' tables + */ + @Override + public boolean free() { + synchronized (this) { + if (state != State.ACTIVE && state != State.WAITING) { + // nothing to free + return false; + } + state = State.FREE; + } + + return identity.free(); + } + + /** + * Return 'true' if the lock is in the ACTIVE state. + * + * @return 'true' if the lock is in the ACTIVE state, and 'false' if not + */ + @Override + public synchronized boolean isActive() { + return state == State.ACTIVE; + } + + /** + * Return 'true' if the lock is not available. + * + * @return 'true' if the lock is in the FREE or LOST state, + * and 'false' if not + */ + @Override + public synchronized boolean isUnavailable() { + return state == State.FREE || state == State.LOST; + } + + /** + * Return 'true' if the lock is in the WAITING state. + * + * @return 'true' if the lock is in the WAITING state, and 'false' if not + */ + public synchronized boolean isWaiting() { + return state == State.WAITING; + } + + /** + * Return the lock's key. + * + * @return the lock's key + */ + @Override + public String getResourceId() { + return identity.key; + } + + /** + * Return the owner key field. + * + * @return the owner key field + */ + @Override + public String getOwnerKey() { + return identity.ownerKey; + } + + /** + * Extends the lock's hold time (not implemented yet). + */ + @Override + public void extend(int holdSec, LockCallback callback) { + } + + /********************/ + + /** + * Update the state. + * + * @param newState the new state value + */ + private synchronized void setState(State newState) { + state = newState; + } + + /** + * Return the currentstate of the lock. + * + * @return the current state of the lock + */ + public synchronized State getState() { + return state; + } + + /** + * This method is called when an incoming /lock/lock REST message is received. + * + * @param key string key identifying the lock, which must hash to a bucket + * owned by the current host + * @param ownerKey string key identifying the owner + * @param uuid the UUID that uniquely identifies the original 'TargetLock' + * @param waitForLock this controls the behavior when 'key' is already + * locked - 'true' means wait for it to be freed, 'false' means fail + * @param ttl similar to IP time-to-live -- it controls the number of hops + * the message may take + * @return the Response that should be passed back to the HTTP request + */ + static Response incomingLock(String key, String ownerKey, UUID uuid, boolean waitForLock, int ttl) { + if (!Bucket.isKeyOnThisServer(key)) { + // this is the wrong server -- forward to the correct one + // (we can use this thread) + if ((ttl -= 1) > 0) { + Server server = Bucket.bucketToServer(Bucket.bucketNumber(key)); + if (server != null) { + WebTarget webTarget = server.getWebTarget("lock/lock"); + if (webTarget != null) { + logger.warn("Forwarding 'lock/lock' to uuid {} " + + "(key={},owner={},uuid={},wait={},ttl={})", + server.getUuid(), key, ownerKey, uuid, + waitForLock, ttl); + return webTarget + .queryParam("key", key) + .queryParam("owner", ownerKey) + .queryParam("uuid", uuid.toString()) + .queryParam("wait", waitForLock) + .queryParam("ttl", String.valueOf(ttl)) + .request().get(); + } + } + } + + // if we reach this point, we didn't forward for some reason -- + // return failure by indicating it is locked and unavailable + logger.error("Couldn't forward 'lock/lock' " + + "(key={},owner={},uuid={},wait={},ttl={})", + key, ownerKey, uuid, waitForLock, ttl); + return Response.noContent().status(LOCKED).build(); + } + + State state = GlobalLocks.get(key).lock(key, ownerKey, uuid, waitForLock); + switch (state) { + case ACTIVE: + return Response.noContent().build(); + case WAITING: + return Response.noContent().status(Response.Status.ACCEPTED).build(); + default: + return Response.noContent().status(LOCKED).build(); + } + } + + /** + * This method is called when an incoming /lock/free REST message is received. + * + * @param key string key identifying the lock, which must hash to a bucket + * owned by the current host + * @param ownerKey string key identifying the owner + * @param uuid the UUID that uniquely identifies the original 'TargetLock' + * @param ttl similar to IP time-to-live -- it controls the number of hops + * the message may take + * @return the Response that should be passed back to the HTTP request + */ + static Response incomingFree(String key, String ownerKey, UUID uuid, int ttl) { + if (!Bucket.isKeyOnThisServer(key)) { + // this is the wrong server -- forward to the correct one + // (we can use this thread) + if ((ttl -= 1) > 0) { + Server server = Bucket.bucketToServer(Bucket.bucketNumber(key)); + if (server != null) { + WebTarget webTarget = server.getWebTarget("lock/free"); + if (webTarget != null) { + logger.warn("Forwarding 'lock/free' to uuid {} " + + "(key={},owner={},uuid={},ttl={})", + server.getUuid(), key, ownerKey, uuid, ttl); + return webTarget + .queryParam("key", key) + .queryParam("owner", ownerKey) + .queryParam("uuid", uuid.toString()) + .queryParam("ttl", String.valueOf(ttl)) + .request().get(); + } + } + } + + // if we reach this point, we didn't forward for some reason -- + // return failure by indicating it is locked and unavailable + logger.error("Couldn't forward 'lock/free' " + + "(key={},owner={},uuid={},ttl={})", + key, ownerKey, uuid, ttl); + return null; + } + + // TBD: should this return a more meaningful response? + GlobalLocks.get(key).unlock(key, uuid); + return null; + } + + /** + * This method is called when an incoming /lock/locked message is received + * (this is a callback to an earlier requestor that the lock is now + * available). + * + * @param key string key identifying the lock + * @param ownerKey string key identifying the owner, which must hash to + * a bucket owned by the current host (it is typically a 'RequestID') + * @param uuid the UUID that uniquely identifies the original 'TargetLock' + * @param ttl similar to IP time-to-live -- it controls the number of hops + * the message may take + * @return the Response that should be passed back to the HTTP request + */ + static Response incomingLocked(String key, String ownerKey, UUID uuid, int ttl) { + if (!Bucket.isKeyOnThisServer(ownerKey)) { + // this is the wrong server -- forward to the correct one + // (we can use this thread) + if ((ttl -= 1) > 0) { + Server server = Bucket.bucketToServer(Bucket.bucketNumber(key)); + if (server != null) { + WebTarget webTarget = server.getWebTarget("lock/locked"); + if (webTarget != null) { + logger.warn("Forwarding 'lock/locked' to uuid {} " + + "(key={},owner={},uuid={},ttl={})", + server.getUuid(), key, ownerKey, uuid, ttl); + return webTarget + .queryParam("key", key) + .queryParam("owner", ownerKey) + .queryParam("uuid", uuid.toString()) + .queryParam("ttl", String.valueOf(ttl)) + .request().get(); + } + } + } + + // if we reach this point, we didn't forward for some reason -- + // return failure by indicating it is locked and unavailable + logger.error("Couldn't forward 'lock/locked' " + + "(key={},owner={},uuid={},ttl={})", + key, ownerKey, uuid, ttl); + return Response.noContent().status(LOCKED).build(); + } + + TargetLock targetLock = null; + LocalLocks localLocks = LocalLocks.get(ownerKey); + synchronized (localLocks) { + WeakReference<TargetLock> wr = + localLocks.uuidToWeakReference.get(uuid); + + if (wr != null) { + targetLock = wr.get(); + if (targetLock == null) { + // lock has been abandoned + // (AbandonedHandler should usually find this first) + localLocks.weakReferenceToIdentity.remove(wr); + localLocks.uuidToWeakReference.remove(uuid); + } else { + // the lock has been made available -- update the state + // TBD: This could be outside of 'synchronized (localLocks)' + synchronized (targetLock) { + if (targetLock.state == State.WAITING) { + targetLock.state = State.ACTIVE; + } else { + // will return a failure -- not sure how this happened + logger.error("incomingLocked: {} is in state {}", + targetLock, targetLock.state); + targetLock = null; + } + } + } + } else { + // clean up what we can + localLocks.uuidToWeakReference.remove(uuid); + } + } + if (targetLock == null) { + // We can't locate the target lock + // TBD: This probably isn't the best error code to use + return Response.noContent().status(LOCKED).build(); + } else { + targetLock.context.lockAvailable(targetLock); + return Response.noContent().build(); + } + } + + /** + * This is called when the state of a bucket has changed, but is currently + * stable. Note that this method is called while being synchronized on the + * bucket. + * + * @param bucket the bucket to audit + * @param owner 'true' if the current host owns the bucket + * @param backup 'true' if the current host is a backup for the bucket + */ + static void auditBucket(Bucket bucket, boolean isOwner, boolean isBackup) { + if (!isOwner) { + // we should not have any 'TargetLock' adjuncts + if (bucket.removeAdjunct(LocalLocks.class) != null) { + logger.warn("Bucket {}: Removed superfluous " + + "'TargetLock.LocalLocks' adjunct", + bucket.getIndex()); + } + if (bucket.removeAdjunct(GlobalLocks.class) != null) { + logger.warn("Bucket {}: Removed superfluous " + + "'TargetLock.GlobalLocks' adjunct", + bucket.getIndex()); + } + } + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() { + return "TargetLock(key=" + identity.key + + ", ownerKey=" + identity.ownerKey + + ", uuid=" + identity.uuid + + ", state=" + state + ")"; + } + + /*****************/ + /* Serialization */ + /*****************/ + + /** + * This method modifies the behavior of 'TargetLock' deserialization by + * creating the corresponding 'LocalLocks' entries. + */ + private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException { + in.defaultReadObject(); + if (state == State.ACTIVE || state == State.WAITING) { + // need to build entries in 'LocalLocks' + LocalLocks localLocks = LocalLocks.get(identity.ownerKey); + WeakReference<TargetLock> wr = new WeakReference<>(this, abandoned); + + synchronized (localLocks) { + localLocks.weakReferenceToIdentity.put(wr, identity); + localLocks.uuidToWeakReference.put(identity.uuid, wr); + } + } + } + + /* ============================================================ */ + + private static class LockFactory implements PolicyResourceLockManager { + /*****************************************/ + /* 'PolicyResourceLockManager' interface */ + /*****************************************/ + + /** + * {@inheritDoc} + */ + @Override + public Lock createLock(String resourceId, String ownerKey, + int holdSec, LockCallback callback, + boolean waitForLock) { + // 'holdSec' isn't implemented yet + return new TargetLock(resourceId, ownerKey, callback, waitForLock); + } + + /*************************/ + /* 'Startable' interface */ + /*************************/ + + /** + * {@inheritDoc} + */ + @Override + public boolean start() { + return true; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean stop() { + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public void shutdown() { + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isAlive() { + return true; + } + + /************************/ + /* 'Lockable' interface */ + /************************/ + + /** + * {@inheritDoc} + */ + @Override + public boolean lock() { + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean unlock() { + return true; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isLocked() { + return false; + } + } + + private static LockFactory lockFactory = new LockFactory(); + + public static PolicyResourceLockManager getLockFactory() { + return lockFactory; + } + + /* ============================================================ */ + + /** + * There is a single instance of class 'TargetLock.EventHandler', which is + * registered to listen for notifications of state transitions. + */ + private static class EventHandler implements Events { + /** + * {@inheritDoc} + */ + @Override + public void newServer(Server server) { + // with an additional server, the offset within the audit period changes + Audit.scheduleAudit(); + } + + /** + * {@inheritDoc} + */ + @Override + public void serverFailed(Server server) { + // when one less server, the offset within the audit period changes + Audit.scheduleAudit(); + } + } + + /* ============================================================ */ + + /** + * This class usually has a one-to-one correspondence with a 'TargetLock' + * instance, unless the 'TargetLock' has been abandoned. + */ + private static class Identity implements Serializable { + // this is the key associated with the lock + String key; + + // this is the key associated with the lock requestor + String ownerKey; + + // this is a unique identifier assigned to the 'TargetLock' + UUID uuid; + + /** + * Constructor - initializes the 'Identity' instance, including the + * generation of the unique identifier. + * + * @param key string key identifying the lock + * @param ownerKey string key identifying the owner, which must hash to + * a bucket owned by the current host (it is typically a 'RequestID') + */ + private Identity(String key, String ownerKey) { + this.key = key; + this.ownerKey = ownerKey; + this.uuid = UUID.randomUUID(); + } + + /** + * Constructor - initializes the 'Identity' instance, with the 'uuid' + * value passed at initialization time (only used for auditing). + * + * @param key string key identifying the lock + * @param ownerKey string key identifying the owner, which must hash to + * @param uuid the UUID that uniquely identifies the original 'TargetLock' + */ + private Identity(String key, String ownerKey, UUID uuid) { + this.key = key; + this.ownerKey = ownerKey; + this.uuid = uuid; + } + + /** + * Free the lock associated with this 'Identity' instance. + * + * @return 'false' if the 'LocalLocks' data is not there, true' if it is + */ + private boolean free() { + // free the lock + Bucket.forwardAndProcess(key, new Bucket.Message() { + /** + * {@inheritDoc} + */ + @Override + public void process() { + // the global lock entry is also on this server + GlobalLocks.get(key).unlock(key, uuid); + } + + /** + * {@inheritDoc} + */ + @Override + public void sendToServer(Server server, int bucketNumber) { + logger.info("Sending free request to {}: key={}, owner={}, uuid={}", + server, key, ownerKey, uuid); + server.post("lock/free", null, new Server.PostResponse() { + @Override + public WebTarget webTarget(WebTarget webTarget) { + return webTarget + .queryParam("key", key) + .queryParam("owner", ownerKey) + .queryParam("uuid", uuid.toString()) + .queryParam("ttl", timeToLive); + } + + @Override + public void response(Response response) { + logger.info("Free response={} (code={})", + response, response.getStatus()); + switch (response.getStatus()) { + case NO_CONTENT: { + // free successful -- don't need to do anything + break; + } + + case LOCKED: { + // free failed + logger.error("TargetLock free failed, " + + "key={}, owner={}, uuid={}", + key, ownerKey, uuid); + break; + } + + default: + logger.error("Unknown status: ", response.getStatus()); + break; + } + } + }); + } + }); + + // clean up locallocks entry + LocalLocks localLocks = LocalLocks.get(ownerKey); + synchronized (localLocks) { + WeakReference<TargetLock> wr = + localLocks.uuidToWeakReference.get(uuid); + if (wr == null) { + return false; + } + + localLocks.weakReferenceToIdentity.remove(wr); + localLocks.uuidToWeakReference.remove(uuid); + wr.clear(); + } + return true; + } + + /***************************/ + /* 'Object' class override */ + /***************************/ + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object other) { + if (other instanceof Identity) { + Identity identity = (Identity)other; + return uuid.equals(identity.uuid) + && key.equals(identity.key) + && ownerKey.equals(identity.ownerKey); + } + return false; + } + } + + /* ============================================================ */ + + /** + * An instance of this class is used for 'TargetLock.context' when the + * lock is allocated within a Drools session. Its purpose is to ensure that + * the callback to 'TargetLock.owner' runs within the Drools thread. + */ + private static class PolicySessionContext implements LockCallback, Serializable { + // the 'PolicySession' instance in question + PolicySession policySession; + + /** + * Constructor - initialize the 'policySession' field. + * + * @param policySession the Drools session + */ + private PolicySessionContext(PolicySession policySession) { + this.policySession = policySession; + } + + /*********************/ + /* 'Owner' interface */ + /*********************/ + + /** + * {@inheritDoc} + */ + @Override + public void lockAvailable(final Lock lock) { + // Run 'owner.lockAvailable' within the Drools session + if (policySession != null) { + policySession.getKieSession().insert(new DroolsRunnable() { + @Override + public void run() { + ((TargetLock)lock).owner.lockAvailable(lock); + } + }); + } + } + + /** + * {@inheritDoc} + */ + @Override + public void lockUnavailable(Lock lock) { + // Run 'owner.unlockAvailable' within the Drools session + if (policySession != null) { + policySession.getKieSession().insert(new DroolsRunnable() { + @Override + public void run() { + ((TargetLock)lock).owner.lockUnavailable(lock); + } + }); + } + } + + /*****************/ + /* Serialization */ + /*****************/ + + /** + * Specializes serialization of 'PolicySessionContext'. + */ + private void writeObject(ObjectOutputStream out) throws IOException { + // 'PolicySession' can't be serialized directly -- + // store as 'groupId', 'artifactId', 'sessionName' + PolicyContainer pc = policySession.getPolicyContainer(); + + out.writeObject(pc.getGroupId()); + out.writeObject(pc.getArtifactId()); + out.writeObject(policySession.getName()); + } + + /** + * Specializes deserialization of 'PolicySessionContext'. + */ + private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { + // 'PolicySession' can't be serialized directly -- + // read in 'groupId', 'artifactId', 'sessionName' + String groupId = String.class.cast(in.readObject()); + String artifactId = String.class.cast(in.readObject()); + String sessionName = String.class.cast(in.readObject()); + + // locate the 'PolicySession' associated with + // 'groupId', 'artifactId', and 'sessionName' + for (PolicyContainer pc : PolicyContainer.getPolicyContainers()) { + if (artifactId.equals(pc.getArtifactId()) + && groupId.equals(pc.getGroupId())) { + // found 'PolicyContainer' -- look up the session + policySession = pc.getPolicySession(sessionName); + if (policySession == null) { + logger.error("TargetLock.PolicySessionContext.readObject: " + + "Can't find session {}:{}:{}", + groupId, artifactId, sessionName); + } + } + } + } + } + + /* ============================================================ */ + + /** + * This class contains two tables that have entries for any 'TargetLock' + * in the 'ACTIVE' or 'WAITING' state. This is the "client" end of the + * lock implementation. + */ + static class LocalLocks { + // this table makes it easier to clean up locks that have been + // abandoned (see 'AbandonedHandler') + private Map<WeakReference<TargetLock>, Identity> weakReferenceToIdentity = new IdentityHashMap<>(); + + // this table is used to locate a 'TargetLock' instance from a UUID + private Map<UUID, WeakReference<TargetLock>> uuidToWeakReference = + new HashMap<>(); + + /** + * Fetch the 'LocalLocks' entry associated with a particular owner key + * (it is created if necessary). + * + * @param ownerKey string key identifying the owner, which must hash to + * a bucket owned by the current host (it is typically a 'RequestID') + * @return the associated 'LocalLocks' instance (it should never be 'null') + */ + private static LocalLocks get(String ownerKey) { + return Bucket.getBucket(ownerKey).getAdjunct(LocalLocks.class); + } + } + + /* ============================================================ */ + + /** + * This class contains the actual lock table, which is the "server" end + * of the lock implementation. + */ + public static class GlobalLocks implements Serializable { + // this is the lock table, mapping 'key' to 'LockEntry', which indicates + // the current lock holder, and all those waiting + private Map<String, LockEntry> keyToEntry = new HashMap<>(); + + /** + * Fetch the 'GlobalLocks' entry associated with a particular key + * (it is created if necessary). + * + * @param key string key identifying the lock + * @return the associated 'GlobalLocks' instance + * (it should never be 'null') + */ + private static GlobalLocks get(String key) { + return Bucket.getBucket(key).getAdjunct(GlobalLocks.class); + } + + /** + * Do the 'lock' operation -- lock immediately, if possible. If not, + * get on the waiting list, if requested. + * + * @param key string key identifying the lock, which must hash to a bucket + * owned by the current host + * @param ownerKey string key identifying the owner + * @param uuid the UUID that uniquely identifies the original 'TargetLock' + * (on the originating host) + * @param waitForLock this controls the behavior when 'key' is already + * locked - 'true' means wait for it to be freed, 'false' means fail + * @return the lock State corresponding to the current request + */ + synchronized State lock(String key, String ownerKey, UUID uuid, boolean waitForLock) { + synchronized (keyToEntry) { + LockEntry entry = keyToEntry.get(key); + if (entry == null) { + // there is no existing entry -- create one, and return ACTIVE + entry = new LockEntry(key, ownerKey, uuid); + keyToEntry.put(key, entry); + sendUpdate(key); + return State.ACTIVE; + } + if (waitForLock) { + // the requestor is willing to wait -- get on the waiting list, + // and return WAITING + entry.waitingList.add(new Waiting(ownerKey, uuid)); + sendUpdate(key); + return State.WAITING; + } + + // the requestor is not willing to wait -- return FREE, + // which will be interpreted as a failure + return State.FREE; + } + } + + /** + * Free a lock or a pending lock request. + * + * @param key string key identifying the lock + * @param uuid the UUID that uniquely identifies the original 'TargetLock' + */ + synchronized void unlock(String key, UUID uuid) { + synchronized (keyToEntry) { + final LockEntry entry = keyToEntry.get(key); + if (entry == null) { + logger.error("GlobalLocks.unlock: unknown lock, key={}, uuid={}", + key, uuid); + return; + } + if (entry.currentOwnerUuid.equals(uuid)) { + // this is the current lock holder + if (entry.waitingList.isEmpty()) { + // free this lock + keyToEntry.remove(key); + } else { + // pass it on to the next one in the list + Waiting waiting = entry.waitingList.remove(); + entry.currentOwnerKey = waiting.ownerKey; + entry.currentOwnerUuid = waiting.ownerUuid; + + entry.notifyNewOwner(this); + } + sendUpdate(key); + } else { + // see if one of the waiting entries is being freed + for (Waiting waiting : entry.waitingList) { + if (waiting.ownerUuid.equals(uuid)) { + entry.waitingList.remove(waiting); + sendUpdate(key); + break; + } + } + } + } + } + + /** + * Notify all features that an update has occurred on this GlobalLock. + * + * @param key the key associated with the change + * (used to locate the bucket) + */ + private void sendUpdate(String key) { + Bucket bucket = Bucket.getBucket(key); + for (ServerPoolApi feature : ServerPoolApi.impl.getList()) { + feature.lockUpdate(bucket, this); + } + } + + /*===============*/ + /* Serialization */ + /*===============*/ + + private void writeObject(ObjectOutputStream out) throws IOException { + synchronized (this) { + out.defaultWriteObject(); + } + } + } + + /* ============================================================ */ + + /** + * Each instance of this object corresponds to a single key in the lock + * table. It includes the current holder of the lock, as well as + * any that are waiting. + */ + private static class LockEntry implements Serializable { + // string key identifying the lock + String key; + + // string key identifying the owner + String currentOwnerKey; + + // UUID identifying the original 'TargetLock + UUID currentOwnerUuid; + + // list of pending lock requests for this key + Queue<Waiting> waitingList = new LinkedList<>(); + + /** + * Constructor - initialize the 'LockEntry'. + * + * @param key string key identifying the lock, which must hash to a bucket + * owned by the current host + * @param ownerKey string key identifying the owner + * @param uuid the UUID that uniquely identifies the original 'TargetLock' + */ + private LockEntry(String key, String ownerKey, UUID uuid) { + this.key = key; + this.currentOwnerKey = ownerKey; + this.currentOwnerUuid = uuid; + } + + /** + * This method is called after the 'currentOwnerKey' and + * 'currentOwnerUuid' fields have been updated, and it notifies the new + * owner that they now have the lock. + * + * @param globalLocks the 'GlobalLocks' instance containing this entry + */ + private void notifyNewOwner(final GlobalLocks globalLocks) { + Bucket.forwardAndProcess(currentOwnerKey, new Bucket.Message() { + /** + * {@inheritDoc} + */ + @Override + public void process() { + // the new owner is on this host + incomingLocked(key, currentOwnerKey, currentOwnerUuid, 1); + } + + /** + * {@inheritDoc} + */ + @Override + public void sendToServer(Server server, int bucketNumber) { + // the new owner is on a remote host + logger.info("Sending locked notification to {}: key={}, owner={}, uuid={}", + server, key, currentOwnerKey, currentOwnerUuid); + server.post("lock/locked", null, new Server.PostResponse() { + @Override + public WebTarget webTarget(WebTarget webTarget) { + return webTarget + .queryParam("key", key) + .queryParam("owner", currentOwnerKey) + .queryParam("uuid", currentOwnerUuid.toString()) + .queryParam("ttl", timeToLive); + } + + @Override + public void response(Response response) { + logger.info("Locked response={} (code={})", + response, response.getStatus()); + switch (response.getStatus()) { + case NO_CONTENT: { + // successful -- we are done + break; + } + + default: { + // notification failed -- free this one + globalLocks.unlock(key, currentOwnerUuid); + break; + } + } + } + }); + } + }); + + } + } + + /* ============================================================ */ + + /** + * This corresponds to a member of 'LockEntry.waitingList' + */ + private static class Waiting implements Serializable { + // string key identifying the owner + String ownerKey; + + // uniquely identifies the new owner 'TargetLock' + UUID ownerUuid; + + /** + * Constructor. + * + * @param ownerKey string key identifying the owner + * @param ownerUuid uniquely identifies the new owner 'TargetLock' + */ + private Waiting(String ownerKey, UUID ownerUuid) { + this.ownerKey = ownerKey; + this.ownerUuid = ownerUuid; + } + } + + /* ============================================================ */ + + /** + * Backup data associated with a 'GlobalLocks' instance. + */ + static class LockBackup implements Bucket.Backup { + /** + * {@inheritDoc} + */ + @Override + public Bucket.Restore generate(int bucketNumber) { + Bucket bucket = Bucket.getBucket(bucketNumber); + + // just remove 'LocalLocks' -- it will need to be rebuilt from + // 'TargetLock' instances + bucket.removeAdjunct(LocalLocks.class); + + // global locks need to be transferred + GlobalLocks globalLocks = bucket.removeAdjunct(GlobalLocks.class); + return globalLocks == null ? null : new LockRestore(globalLocks); + } + } + + /* ============================================================ */ + + /** + * This class is used to restore a 'GlobalLocks' instance from a backup. + */ + static class LockRestore implements Bucket.Restore, Serializable { + GlobalLocks globalLocks; + + /** + * Constructor - runs as part of backup (deserialization bypasses this constructor). + * + * @param globalLocks GlobalLocks instance extracted as part of backup + */ + LockRestore(GlobalLocks globalLocks) { + this.globalLocks = globalLocks; + } + + /** + * {@inheritDoc} + */ + @Override + public void restore(int bucketNumber) { + // fetch bucket + Bucket bucket = Bucket.getBucket(bucketNumber); + + // update the adjunct + if (bucket.putAdjunct(globalLocks) != null) { + logger.error("LockRestore({}): GlobalLocks adjunct already existed", + bucketNumber); + } + + // notify features of the 'globalLocks' update + for (ServerPoolApi feature : ServerPoolApi.impl.getList()) { + feature.lockUpdate(bucket, globalLocks); + } + } + } + + /* ============================================================ */ + + /** + * This class is a deamon that monitors the 'abandoned' queue. If an + * ACTIVE 'TargetLock' is abandoned, the GC will eventually place the + * now-empty 'WeakReference' in this queue. + */ + private static class AbandonedHandler extends Thread { + AbandonedHandler() { + super("TargetLock.AbandonedHandler"); + } + + /** + * This method camps on the 'abandoned' queue, processing entries as + * they are received. + */ + @Override + public void run() { + while (abandonedHandler != null) { + try { + Reference<? extends TargetLock> wr = abandoned.remove(); + TargetLock notify = null; + + // At this point, we know that 'ref' is a + // 'WeakReference<TargetLock>' instance that has been abandoned, + // but we don't know what the associated 'Identity' instance + // is. Here, we search through every bucket looking for a + // matching entry. The assumption is that this is rare enough, + // and due to a bug, so it doesn't hurt to spend extra CPU time + // here. The alternative is to add some additional information + // to make this mapping quick, at the expense of a slight + // slow down of normal lock operations. + for (int i = 0 ; i < Bucket.BUCKETCOUNT ; i += 1) { + LocalLocks localLocks = + Bucket.getBucket(i).getAdjunctDontCreate(LocalLocks.class); + if (localLocks != null) { + // the adjunct does exist -- see if the WeakReference + // instance is known to this bucket + synchronized (localLocks) { + Identity identity = + localLocks.weakReferenceToIdentity.get(wr); + if (identity != null) { + // found it + logger.error("Abandoned TargetLock: bucket={}, " + + "key={}, ownerKey={}, uuid={}", + i, identity.key, identity.ownerKey, + identity.uuid); + identity.free(); + break; + } + } + } + } + } catch (Exception e) { + logger.error("TargetLock.AbandonedHandler exception", e); + } + } + } + } + + // create a single instance of 'AbandonedHandler', and start it + private static AbandonedHandler abandonedHandler = new AbandonedHandler(); + + static { + abandonedHandler.start(); + } + + /* ============================================================ */ + + /** + * This class handles the '/cmd/dumpLocks' REST command. + */ + static class DumpLocks { + // indicates whether a more detailed dump should be done + private boolean detail; + + // this table maps the 'TargetLock' UUID into an object containing + // both client (LocalLocks) and server (GlobalLocks) information + private Map<UUID, MergedData> mergedDataMap = + new TreeMap<>(Util.uuidComparator); + + // this table maps the 'TargetLock' key into the associated 'LockEntry' + // (server end) + private Map<String, LockEntry> lockEntries = new TreeMap<>(); + + // this table maps the 'TargetLock' key into entries that only exist + // on the client end + private Map<String, MergedData> clientOnlyEntries = new TreeMap<>(); + + // display format (although it is now dynamically adjusted) + private String format = "%-14s %-14s %-36s %-10s %s\n"; + + // calculation of maximum key length for display + private int keyLength = 10; + + // calculation of maximum owner key length for display + private int ownerKeyLength = 10; + + // 'true' if any comments need to be displayed (affects format) + private boolean commentsIncluded = false; + + /** + * Entry point for the '/cmd/dumpLocks' REST command. + * + * @param out where the output should be displayed + * @param detail 'true' provides additional bucket and host information + * (but abbreviates all UUIDs in order to avoid excessive + * line length) + */ + static void dumpLocks(PrintStream out, boolean detail) + throws InterruptedException, IOException, ClassNotFoundException { + + // the actual work is done in the constructor + new DumpLocks(out, detail); + } + + /** + * Entry point for the '/lock/dumpLocksData' REST command, which generates + * a byte stream for this particular host. + * + * @param serverUuid the UUID of the intended destination server + * @param ttl similar to IP time-to-live -- it controls the number of hops + * the message may take + * @return a base64-encoded byte stream containing serialized 'HostData' + */ + static byte[] dumpLocksData(UUID serverUuid, int ttl) throws IOException { + if (!Server.getThisServer().getUuid().equals(serverUuid)) { + if ((ttl -= 1) > 0) { + Server server = Server.getServer(serverUuid); + if (server != null) { + WebTarget webTarget = + server.getWebTarget("lock/dumpLocksData"); + if (webTarget != null) { + logger.info("Forwarding 'lock/dumpLocksData' to uuid {}", + serverUuid); + return webTarget + .queryParam("server", serverUuid.toString()) + .queryParam("ttl", String.valueOf(ttl)) + .request().get(byte[].class); + } + } + } + + // if we reach this point, we didn't forward for some reason + + logger.error("Couldn't forward 'lock/dumpLocksData to uuid {}", + serverUuid); + return null; + } + + return Base64.getEncoder().encode(Util.serialize(new HostData())); + } + + /** + * Constructor - does the '/cmd/dumpLocks' REST command. + * + * @param out where the output should be displayed + */ + DumpLocks(PrintStream out, boolean detail) + throws IOException, InterruptedException, ClassNotFoundException { + + this.detail = detail; + + // receives responses from '/lock/dumpLocksData' + final LinkedTransferQueue<Response> responseQueue = + new LinkedTransferQueue<>(); + + // generate a count of the number of external servers that should respond + int pendingResponseCount = 0; + + // iterate over all of the servers + for (final Server server : Server.getServers()) { + if (server == Server.getThisServer()) { + // skip this server -- we will handle it differently + continue; + } + + // keep a running count + pendingResponseCount += 1; + server.post("lock/dumpLocksData", null, new Server.PostResponse() { + @Override + public WebTarget webTarget(WebTarget webTarget) { + return webTarget + .queryParam("server", server.getUuid().toString()) + .queryParam("ttl", timeToLive); + } + + @Override + public void response(Response response) { + // responses are queued, and the main thread will collect them + responseQueue.put(response); + } + }); + } + + // this handles data associated with this server -- it also goes through + // serialization/deserialization, which provides a deep copy of the data + populateLockData(dumpLocksData(Server.getThisServer().getUuid(), 0)); + + // now, poll for responses from all of the the other servers + while (pendingResponseCount > 0) { + pendingResponseCount -= 1; + Response response = responseQueue.poll(60, TimeUnit.SECONDS); + if (response == null) { + // timeout -- we aren't expecting any more responses + break; + } + + // populate data associated with this server + populateLockData(response.readEntity(byte[].class)); + } + + // we have processed all of the servers that we are going to, + // now generate the output + dump(out); + } + + /** + * process base64-encoded data from a server (local or remote). + * + * @param data base64-encoded data (class 'HostData') + */ + void populateLockData(byte[] data) throws IOException, ClassNotFoundException { + Object decodedData = Util.deserialize(Base64.getDecoder().decode(data)); + if (decodedData instanceof HostData) { + // deserialized data + HostData hostData = (HostData)decodedData; + + // fetch 'Server' instance associated with the responding server + Server server = Server.getServer(hostData.hostUuid); + + // process the client-end data + for (ClientData clientData : hostData.clientDataList) { + // 'true' if the bucket associated with this 'ClientData' + // doesn't belong to the remote server, as far as we can tell + boolean serverMismatch = + Bucket.bucketToServer(clientData.bucketNumber) != server; + + // each 'ClientDataRecord' instance corresponds to an + // active 'Identity' (TargetLock) instance + for (ClientDataRecord cdr : clientData.clientDataRecords) { + // update maximum 'key' and 'ownerKey' lengths + updateKeyLength(cdr.identity.key); + updateOwnerKeyLength(cdr.identity.ownerKey); + + // fetch UUID + UUID uuid = cdr.identity.uuid; + + // fetch/generate 'MergeData' instance for this UUID + MergedData md = mergedDataMap.get(uuid); + if (md == null) { + md = new MergedData(uuid); + mergedDataMap.put(uuid, md); + } + + // update 'MergedData.clientDataRecord' + if (md.clientDataRecord == null) { + md.clientDataRecord = cdr; + } else { + md.comment("Duplicate client entry for UUID"); + } + + if (serverMismatch) { + // need to generate an additional error + md.comment(server.toString() + + "(client) does not own bucket " + + clientData.bucketNumber); + } + } + } + + // process the server-end data + for (ServerData serverData : hostData.serverDataList) { + // 'true' if the bucket associated with this 'ServerData' + // doesn't belong to the remote server, as far as we can tell + boolean serverMismatch = + Bucket.bucketToServer(serverData.bucketNumber) != server; + + // each 'LockEntry' instance corresponds to the current holder + // of a lock, and all requestors waiting for it to be freed + for (LockEntry le : serverData.globalLocks.keyToEntry.values()) { + // update maximum 'key' and 'ownerKey' lengths + updateKeyLength(le.key); + updateOwnerKeyLength(le.currentOwnerKey); + + // fetch uuid + UUID uuid = le.currentOwnerUuid; + + // fetch/generate 'MergeData' instance for this UUID + MergedData md = mergedDataMap.get(uuid); + if (md == null) { + md = new MergedData(uuid); + mergedDataMap.put(uuid, md); + } + + // update 'lockEntries' table entry + if (lockEntries.get(le.key) != null) { + md.comment("Duplicate server entry for key " + le.key); + } else { + lockEntries.put(le.key, le); + } + + // update 'MergedData.serverLockEntry' + // (leave 'MergedData.serverWaiting' as 'null', because + // this field is only used for waiting entries) + if (md.serverLockEntry == null) { + md.serverLockEntry = le; + } else { + md.comment("Duplicate server entry for UUID"); + } + + if (serverMismatch) { + // need to generate an additional error + md.comment(server.toString() + + "(server) does not own bucket " + + serverData.bucketNumber); + } + + // we need 'MergeData' entries for all waiting requests + for (Waiting waiting : le.waitingList) { + // update maximum 'ownerKey' length + updateOwnerKeyLength(waiting.ownerKey); + + // fetch uuid + uuid = waiting.ownerUuid; + + // fetch/generate 'MergeData' instance for this UUID + md = mergedDataMap.get(uuid); + if (md == null) { + md = new MergedData(uuid); + mergedDataMap.put(uuid, md); + } + + // update 'MergedData.serverLockEntry' and + // 'MergedData.serverWaiting' + if (md.serverLockEntry == null) { + md.serverLockEntry = le; + md.serverWaiting = waiting; + } else { + md.comment("Duplicate server entry for UUID"); + } + + if (serverMismatch) { + // need to generate an additional error + md.comment(server.toString() + + "(server) does not own bucket " + + serverData.bucketNumber); + } + } + } + } + } else { + logger.error("TargetLock.DumpLocks.populateLockData: " + + "received data has class " + + decodedData.getClass().getName()); + } + } + + /** + * Do some additional sanity checks on the 'MergedData', and then + * display all of the results. + * + * @param out where the output should be displayed + */ + void dump(PrintStream out) { + // iterate over the 'MergedData' instances looking for problems + for (MergedData md : mergedDataMap.values()) { + if (md.clientDataRecord == null) { + md.comment("Client data missing"); + } else if (md.serverLockEntry == null) { + md.comment("Server data missing"); + clientOnlyEntries.put(md.clientDataRecord.identity.key, md); + } else if (!md.clientDataRecord.identity.key.equals(md.serverLockEntry.key)) { + md.comment("Client key(" + md.clientDataRecord.identity.key + + ") server key(" + md.serverLockEntry.key + + ") mismatch"); + } else { + String serverOwnerKey = (md.serverWaiting == null + ? md.serverLockEntry.currentOwnerKey : md.serverWaiting.ownerKey); + if (!md.clientDataRecord.identity.ownerKey.equals(serverOwnerKey)) { + md.comment("Client owner key(" + + md.clientDataRecord.identity.ownerKey + + ") server owner key(" + serverOwnerKey + + ") mismatch"); + } + // TBD: test for state mismatch + } + } + + if (detail) { + // generate format based upon the maximum key length, maximum + // owner key length, and whether comments are included anywhere + format = "%-" + keyLength + "s %6s %-9s %-" + ownerKeyLength + + "s %6s %-9s %-9s %-10s" + (commentsIncluded ? " %s\n" : "\n"); + + // dump out the header + out.printf(format, "Key", "Bucket", "Host UUID", + "Owner Key", "Bucket", "Host UUID", + "Lock UUID", "State", "Comments"); + out.printf(format, "---", "------", "---------", + "---------", "------", "---------", + "---------", "-----", "--------"); + } else { + // generate format based upon the maximum key length, maximum + // owner key length, and whether comments are included anywhere + format = "%-" + keyLength + "s %-" + ownerKeyLength + + "s %-36s %-10s" + (commentsIncluded ? " %s\n" : "\n"); + + // dump out the header + out.printf(format, "Key", "Owner Key", "UUID", "State", "Comments"); + out.printf(format, "---", "---------", "----", "-----", "--------"); + } + + // iterate over the server table + for (LockEntry le : lockEntries.values()) { + // fetch merged data + MergedData md = mergedDataMap.get(le.currentOwnerUuid); + + // dump out record associated with lock owner + if (detail) { + out.printf(format, + le.key, getBucket(le.key), bucketOwnerUuid(le.key), + le.currentOwnerKey, getBucket(le.currentOwnerKey), + bucketOwnerUuid(le.currentOwnerKey), + abbrevUuid(le.currentOwnerUuid), + md.getState(), md.firstComment()); + } else { + out.printf(format, + le.key, le.currentOwnerKey, le.currentOwnerUuid, + md.getState(), md.firstComment()); + } + dumpMoreComments(out, md); + + // iterate over all requests waiting for this lock + for (Waiting waiting: le.waitingList) { + // fetch merged data + md = mergedDataMap.get(waiting.ownerUuid); + + // dump out record associated with waiting request + if (detail) { + out.printf(format, + "", "", "", + waiting.ownerKey, getBucket(waiting.ownerKey), + bucketOwnerUuid(waiting.ownerKey), + abbrevUuid(waiting.ownerUuid), + md.getState(), md.firstComment()); + } else { + out.printf(format, "", waiting.ownerKey, waiting.ownerUuid, + md.getState(), md.firstComment()); + } + dumpMoreComments(out, md); + } + } + + // client records that don't have matching server entries + for (MergedData md : clientOnlyEntries.values()) { + ClientDataRecord cdr = md.clientDataRecord; + if (detail) { + out.printf(format, + cdr.identity.key, getBucket(cdr.identity.key), + bucketOwnerUuid(cdr.identity.key), + cdr.identity.ownerKey, + getBucket(cdr.identity.ownerKey), + bucketOwnerUuid(cdr.identity.ownerKey), + abbrevUuid(cdr.identity.uuid), + md.getState(), md.firstComment()); + } else { + out.printf(format, cdr.identity.key, cdr.identity.ownerKey, + cdr.identity.uuid, md.getState(), md.firstComment()); + } + dumpMoreComments(out, md); + } + } + + /** + * This method converts a String keyword into the corresponding bucket + * number. + * + * @param key the keyword to be converted + * @return the bucket number + */ + private static int getBucket(String key) { + return Bucket.bucketNumber(key); + } + + /** + * Determine the abbreviated UUID associated with a key. + * + * @param key the keyword to be converted + * @return the abbreviated UUID of the bucket owner + */ + private static String bucketOwnerUuid(String key) { + // fetch the bucket + Bucket bucket = Bucket.getBucket(Bucket.bucketNumber(key)); + + // fetch the bucket owner (may be 'null' if unassigned) + Server owner = bucket.getOwner(); + + return owner == null ? "NONE" : abbrevUuid(owner.getUuid()); + } + + /** + * Convert a UUID to an abbreviated form, which is the + * first 8 hex digits of the UUID, followed by the character '*'. + * + * @param uuid the UUID to convert + * @return the abbreviated form + */ + private static String abbrevUuid(UUID uuid) { + return uuid.toString().substring(0, 8) + "*"; + } + + /** + * If the 'MergedData' instance has more than one comment, + * dump out comments 2-n. + * + * @param out where the output should be displayed + * @param md the MergedData instance + */ + void dumpMoreComments(PrintStream out, MergedData md) { + if (md.comments.size() > 1) { + Queue<String> comments = new LinkedList<>(md.comments); + + // remove the first entry, because it has already been displayed + comments.remove(); + for (String comment : comments) { + if (detail) { + out.printf(format, "", "", "", "", "", "", "", "", comment); + } else { + out.printf(format, "", "", "", "", comment); + } + } + } + } + + /** + * Check the length of the specified 'key', and update 'keyLength' if + * it exceeds the current maximum. + * + * @param key the key to be tested + */ + void updateKeyLength(String key) { + int length = key.length(); + if (length > keyLength) { + keyLength = length; + } + } + + /** + * Check the length of the specified 'ownerKey', and update + * 'ownerKeyLength' if it exceeds the current maximum. + * + * @param ownerKey the owner key to be tested + */ + void updateOwnerKeyLength(String ownerKey) { + int length = ownerKey.length(); + if (length > ownerKeyLength) { + ownerKeyLength = length; + } + } + + /* ============================== */ + + /** + * Each instance of this class corresponds to client and/or server + * data structures, and is used to check consistency between the two. + */ + class MergedData { + // the client/server UUID + UUID uuid; + + // client-side data (from LocalLocks) + ClientDataRecord clientDataRecord = null; + + // server-side data (from GlobalLocks) + LockEntry serverLockEntry = null; + Waiting serverWaiting = null; + + // detected problems, such as server/client mismatches + Queue<String> comments = new LinkedList<String>(); + + /** + * Constructor - initialize the 'uuid'. + * + * @param uuid the UUID that identifies the original 'TargetLock' + */ + MergedData(UUID uuid) { + this.uuid = uuid; + } + + /** + * add a comment to the list, and indicate that there are now + * comments present. + * + * @param co the comment to add + */ + void comment(String co) { + comments.add(co); + commentsIncluded = true; + } + + /** + * Return the first comment, or an empty string if there are no + * comments. + * + * @return the first comment, or an empty string if there are no + * comments (useful for formatting output). + */ + String firstComment() { + return comments.isEmpty() ? "" : comments.poll(); + } + + /** + * Return a string description of the state. + * + * @return a string description of the state. + */ + String getState() { + return clientDataRecord == null + ? "MISSING" : clientDataRecord.state.toString(); + } + } + + /** + * This class contains all of the data sent from each host to the + * host that is consolidating the information for display. + */ + static class HostData implements Serializable { + // the UUID of the host sending the data + UUID hostUuid; + + // all of the information derived from the 'LocalLocks' data + List<ClientData> clientDataList; + + // all of the information derived from the 'GlobalLocks' data + List<ServerData> serverDataList; + + /** + * Constructor - this goes through all of the lock tables, + * and populates 'clientDataList' and 'serverDataList'. + */ + HostData() { + // fetch UUID + hostUuid = Server.getThisServer().getUuid(); + + // initial storage for client and server data + clientDataList = new ArrayList<ClientData>(); + serverDataList = new ArrayList<ServerData>(); + + // go through buckets + for (int i = 0 ; i < Bucket.BUCKETCOUNT ; i += 1) { + Bucket bucket = Bucket.getBucket(i); + + // client data + LocalLocks localLocks = + bucket.getAdjunctDontCreate(LocalLocks.class); + if (localLocks != null) { + // we have client data for this bucket + ClientData clientData = new ClientData(i); + clientDataList.add(clientData); + + synchronized (localLocks) { + for (WeakReference<TargetLock> wr : + localLocks.weakReferenceToIdentity.keySet()) { + // Note: 'targetLock' may be 'null' if it has + // been abandoned, and garbage collected + TargetLock targetLock = wr.get(); + + // fetch associated 'identity' + Identity identity = + localLocks.weakReferenceToIdentity.get(wr); + if (identity != null) { + // add a new 'ClientDataRecord' for this bucket + clientData.clientDataRecords.add( + new ClientDataRecord(identity, + (targetLock == null ? null : + targetLock.getState()))); + } + } + } + } + + // server data + GlobalLocks globalLocks = + bucket.getAdjunctDontCreate(GlobalLocks.class); + if (globalLocks != null) { + // server data is already in serializable form + serverDataList.add(new ServerData(i, globalLocks)); + } + } + } + } + + /** + * Information derived from the 'LocalLocks' adjunct to a single bucket. + */ + static class ClientData implements Serializable { + // number of the bucket + int bucketNumber; + + // all of the client locks within this bucket + List<ClientDataRecord> clientDataRecords; + + /** + * Constructor - initially, there are no 'clientDataRecords'. + * + * @param bucketNumber the bucket these records are associated with + */ + ClientData(int bucketNumber) { + this.bucketNumber = bucketNumber; + clientDataRecords = new ArrayList<>(); + } + } + + /** + * This corresponds to the information contained within a + * single 'TargetLock'. + */ + static class ClientDataRecord implements Serializable { + // contains key, ownerKey, uuid + Identity identity; + + // state field of 'TargetLock' + // (may be 'null' if there is no 'TargetLock') + State state; + + /** + * Constructor - initialize the fields. + * + * @param identity contains key, ownerKey, uuid + * @param state the state if the 'TargetLock' exists, and 'null' if it + * has been garbage collected + */ + ClientDataRecord(Identity identity, State state) { + this.identity = identity; + this.state = state; + } + } + + /** + * Information derived from the 'GlobalLocks' adjunct to a single bucket. + */ + static class ServerData implements Serializable { + // number of the bucket + int bucketNumber; + + // server-side data associated with a single bucket + GlobalLocks globalLocks; + + /** + * Constructor - initialize the fields. + * + * @param bucketNumber the bucket these records are associated with + * @param globalLocks GlobalLocks instance associated with 'bucketNumber' + */ + ServerData(int bucketNumber, GlobalLocks globalLocks) { + this.bucketNumber = bucketNumber; + this.globalLocks = globalLocks; + } + } + } + + /* ============================================================ */ + + /** + * Instances of 'AuditData' are passed between servers as part of the + * 'TargetLock' audit. + */ + static class AuditData implements Serializable { + // sending UUID + UUID hostUuid; + + // client records that currently exist, or records to be cleared + // (depending upon message) -- client/server is from the senders side + List<Identity> clientData; + + // server records that currently exist, or records to be cleared + // (depending upon message) -- client/server is from the senders side + List<Identity> serverData; + + /** + * Constructor - set 'hostUuid' to the current host, and start with + * empty lists. + */ + AuditData() { + hostUuid = Server.getThisServer().getUuid(); + clientData = new ArrayList<Identity>(); + serverData = new ArrayList<Identity>(); + } + + /** + * This is called when we receive an incoming 'AuditData' object from + * a remote host. + * + * @param includeWarnings if 'true', generate warning messages + * for mismatches + * @return an 'AuditData' instance that only contains records we + * can't confirm + */ + AuditData generateResponse(boolean includeWarnings) { + AuditData response = new AuditData(); + + // compare remote servers client data with our server data + for (Identity identity : clientData) { + // we are the server in this case + Bucket bucket = Bucket.getBucket(identity.key); + GlobalLocks globalLocks = + bucket.getAdjunctDontCreate(GlobalLocks.class); + + if (globalLocks != null) { + Map<String, LockEntry> keyToEntry = globalLocks.keyToEntry; + synchronized (keyToEntry) { + LockEntry le = keyToEntry.get(identity.key); + if (le != null) { + if (identity.uuid.equals(le.currentOwnerUuid) + && identity.ownerKey.equals(le.currentOwnerKey)) { + // we found a match + continue; + } + + // check the waiting list + boolean match = false; + for (Waiting waiting : le.waitingList) { + if (identity.uuid.equals(waiting.ownerUuid) + && identity.ownerKey.equals(waiting.ownerKey)) { + // we found a match on the waiting list + match = true; + break; + } + } + if (match) { + // there was a match on the waiting list + continue; + } + } + } + } + + // If we reach this point, a match was not confirmed. Note that it + // is possible that we got caught in a transient state, so we need + // to somehow make sure that we don't "correct" a problem that + // isn't real. + + if (includeWarnings) { + logger.warn("TargetLock audit issue: server match not found " + + "(key={},ownerKey={},uuid={})", + identity.key, identity.ownerKey, identity.uuid); + } + + // it was 'clientData' to the sender, but 'serverData' to us + response.serverData.add(identity); + } + + // test server data + for (Identity identity : serverData) { + // we are the client in this case + Bucket bucket = Bucket.getBucket(identity.ownerKey); + LocalLocks localLocks = + bucket.getAdjunctDontCreate(LocalLocks.class); + if (localLocks != null) { + synchronized (localLocks) { + WeakReference<TargetLock> wr = + localLocks.uuidToWeakReference.get(identity.uuid); + if (wr != null) { + Identity identity2 = + localLocks.weakReferenceToIdentity.get(wr); + if (identity2 != null + && identity.key.equals(identity2.key) + && identity.ownerKey.equals(identity2.ownerKey)) { + // we have a match + continue; + } + } + } + } + + // If we reach this point, a match was not confirmed. Note that it + // is possible that we got caught in a transient state, so we need + // to somehow make sure that we don't "correct" a problem that + // isn't real. + if (includeWarnings) { + logger.warn("TargetLock audit issue: client match not found " + + "(key={},ownerKey={},uuid={})", + identity.key, identity.ownerKey, identity.uuid); + } + response.clientData.add(identity); + } + + return response; + } + + /** + * The response messages contain 'Identity' objects that match those + * in our outgoing '/lock/audit' message, but that the remote end could + * not confirm. Again, the definition of 'client' and 'server' are + * the remote ends' version. + * + * @param server the server we sent the request to + */ + void processResponse(Server server) { + if (clientData.isEmpty() && serverData.isEmpty()) { + // no mismatches + logger.info("TargetLock audit with {} completed -- no mismatches", + server); + return; + } + + // There are still mismatches -- note that 'clientData' and + // 'serverData' are from the remote end's perspective, which is the + // opposite of this end + + for (Identity identity : clientData) { + // these are on our server end -- we were showing a lock on this + // end, but the other end has no such client + logger.error("Audit mismatch (GlobalLocks): (key={},owner={},uuid={})", + identity.key, identity.ownerKey, identity.uuid); + + // free the lock + GlobalLocks.get(identity.key).unlock(identity.key, identity.uuid); + } + for (Identity identity : serverData) { + // these are on our client end + logger.error("Audit mismatch (LocalLocks): (key={},owner={},uuid={})", + identity.key, identity.ownerKey, identity.uuid); + + // clean up 'LocalLocks' tables + LocalLocks localLocks = LocalLocks.get(identity.ownerKey); + TargetLock targetLock = null; + synchronized (localLocks) { + WeakReference<TargetLock> wr = + localLocks.uuidToWeakReference.get(identity.uuid); + if (wr != null) { + targetLock = wr.get(); + localLocks.weakReferenceToIdentity.remove(wr); + localLocks.uuidToWeakReference + .remove(identity.uuid); + wr.clear(); + } + } + + if (targetLock != null) { + // may need to update state + synchronized (targetLock) { + if (targetLock.state != State.FREE) { + targetLock.state = State.LOST; + } + } + } + } + logger.info("TargetLock audit with {} completed -- {} mismatches", + server, clientData.size() + serverData.size()); + } + + /** + * Serialize and base64-encode this 'AuditData' instance, so it can + * be sent in a message. + * + * @return a byte array, which can be decoded and deserialized at + * the other end ('null' is returned if there were any problems) + */ + byte[] encode() { + try { + return Base64.getEncoder().encode(Util.serialize(this)); + } catch (IOException e) { + logger.error("TargetLock.AuditData.encode Exception", e); + return null; + } + } + + /** + * Base64-decode and deserialize a byte array. + * + * @param encodedData a byte array encoded via 'AuditData.encode' + * (typically on the remote end of a connection) + * @return an 'AuditData' instance if decoding was successful, + * and 'null' if not + */ + static AuditData decode(byte[] encodedData) { + try { + Object decodedData = + Util.deserialize(Base64.getDecoder().decode(encodedData)); + if (decodedData instanceof AuditData) { + return (AuditData)decodedData; + } else { + logger.error( + "TargetLock.AuditData.decode returned instance of class {}", + decodedData.getClass().getName()); + return null; + } + } catch (IOException | ClassNotFoundException e) { + logger.error("TargetLock.AuditData.decode Exception", e); + return null; + } + } + } + + /** + * This class contains methods that control the audit. Also, sn instance of + * 'Audit' is created for each audit that is in progress. + */ + static class Audit { + // if non-null, it means that we have a timer set that periodicall + // triggers the audit + static TimerTask timerTask = null; + + // maps 'Server' to audit data associated with that server + Map<Server,AuditData> auditMap = new IdentityHashMap<>(); + + /** + * Run a single audit cycle. + */ + static void runAudit() { + try { + logger.info("Starting TargetLock audit"); + Audit audit = new Audit(); + + // populate 'auditMap' table + audit.build(); + + // send to all of the servers in 'auditMap' (may include this server) + audit.send(); + } catch (InterruptedException e) { + logger.error("TargetLock audit interrupted", e); + Thread.currentThread().interrupt(); + } + } + + /** + * Schedule the audit to run periodically based upon defined properties. + */ + static void scheduleAudit() { + scheduleAudit(auditPeriod, auditGracePeriod); + } + + /** + * Schedule the audit to run periodically -- all of the hosts arrange to + * run their audit at a different time, evenly spaced across the audit + * period. + * + * @param period how frequently to run the audit, in milliseconds + * @param gracePeriod ensure that the audit doesn't run until at least + * 'gracePeriod' milliseconds have passed from the current time + */ + static synchronized void scheduleAudit(final long period, final long gracePeriod) { + + if (timerTask != null) { + // cancel current timer + timerTask.cancel(); + timerTask = null; + } + + // this needs to run in the 'MainLoop' thread, because it is dependent + // upon the list of servers, and our position in this list + MainLoop.queueWork(new Runnable() { + /** + * {@inheritDoc} + */ + @Override + public void run() { + // current list of servers + Collection<Server> servers = Server.getServers(); + + // count of the number of servers + int count = servers.size(); + + // will contain our position in this list + int index = 0; + + // current server + Server thisServer = Server.getThisServer(); + + for (Server server : servers) { + if (server == thisServer) { + break; + } + index += 1; + } + + // if index == count, we didn't find this server + // (which shouldn't happen) + + if (index < count) { + // The servers are ordered by UUID, and 'index' is this + // server's position within the list. Suppose the period is + // 60000 (60 seconds), and there are 5 servers -- the first one + // will run the audit at 0 seconds after the minute, the next + // at 12 seconds after the minute, then 24, 36, 48. + long offset = (period * index) / count; + + // the earliest time we want the audit to run + long time = System.currentTimeMillis() + gracePeriod; + long startTime = time - (time % period) + offset; + if (startTime <= time) { + startTime += period; + } + synchronized (Audit.class) { + if (timerTask != null) { + timerTask.cancel(); + } + timerTask = new TimerTask() { + @Override + public void run() { + runAudit(); + } + }; + + // now, schedule the timer + Util.timer.scheduleAtFixedRate( + timerTask, new Date(startTime), period); + } + } + } + }); + } + + /** + * Handle an incoming '/lock/audit' message. + * + * @param serverUuid the UUID of the intended destination server + * @param ttl similar to IP time-to-live -- it controls the number of hops + * @param data base64-encoded data, containing a serialized 'AuditData' + * instance + * @return a serialized and base64-encoded 'AuditData' response + */ + static byte[] incomingAudit(UUID serverUuid, int ttl, byte[] encodedData) { + if (!Server.getThisServer().getUuid().equals(serverUuid)) { + if ((ttl -= 1) > 0) { + Server server = Server.getServer(serverUuid); + if (server != null) { + WebTarget webTarget = server.getWebTarget("lock/audit"); + if (webTarget != null) { + logger.info("Forwarding 'lock/audit' to uuid {}", + serverUuid); + Entity<String> entity = + Entity.entity(new String(encodedData), + MediaType.APPLICATION_OCTET_STREAM_TYPE); + return webTarget + .queryParam("server", serverUuid.toString()) + .queryParam("ttl", String.valueOf(ttl)) + .request().post(entity, byte[].class); + } + } + } + + // if we reach this point, we didn't forward for some reason + + logger.error("Couldn't forward 'lock/audit to uuid {}", serverUuid); + return null; + } + + AuditData auditData = AuditData.decode(encodedData); + if (auditData != null) { + AuditData auditResp = auditData.generateResponse(true); + return auditResp.encode(); + } + return null; + } + + /** + * This method populates the 'auditMap' table by going through all of + * the client and server lock data, and sorting it according to the + * remote server. + */ + void build() { + for (int i = 0 ; i < Bucket.BUCKETCOUNT ; i += 1) { + Bucket bucket = Bucket.getBucket(i); + + // client data + LocalLocks localLocks = + bucket.getAdjunctDontCreate(LocalLocks.class); + if (localLocks != null) { + synchronized (localLocks) { + // we have client data for this bucket + for (Identity identity : + localLocks.weakReferenceToIdentity.values()) { + // find or create the 'AuditData' instance associated + // with the server owning the 'key' + AuditData auditData = getAuditData(identity.key); + if (auditData != null) { + auditData.clientData.add(identity); + } + } + } + } + + // server data + GlobalLocks globalLocks = + bucket.getAdjunctDontCreate(GlobalLocks.class); + if (globalLocks != null) { + // we have server data for this bucket + Map<String, LockEntry> keyToEntry = globalLocks.keyToEntry; + synchronized (keyToEntry) { + for (LockEntry le : keyToEntry.values()) { + // find or create the 'AuditData' instance associated + // with the current 'ownerKey' + AuditData auditData = getAuditData(le.currentOwnerKey); + if (auditData != null) { + // create an 'Identity' entry, and add it to + // the list associated with the remote server + auditData.serverData.add( + new Identity(le.key, le.currentOwnerKey, + le.currentOwnerUuid)); + } + + for (Waiting waiting : le.waitingList) { + // find or create the 'AuditData' instance associated + // with the waiting entry 'ownerKey' + auditData = getAuditData(waiting.ownerKey); + if (auditData != null) { + // create an 'Identity' entry, and add it to + // the list associated with the remote server + auditData.serverData.add( + new Identity(le.key, waiting.ownerKey, + waiting.ownerUuid)); + } + } + } + } + } + } + } + + /** + * Find or create the 'AuditData' structure associated with a particular + * key. + */ + AuditData getAuditData(String key) { + // map 'key -> bucket number', and then 'bucket number' -> 'server' + Server server = Bucket.bucketToServer(Bucket.bucketNumber(key)); + if (server != null) { + AuditData auditData = auditMap.get(server); + if (auditData == null) { + // doesn't exist yet -- create it + auditData = new AuditData(); + auditMap.put(server, auditData); + } + return auditData; + } + + // this happens when the bucket has not been assigned to a server yet + return null; + } + + /** + * Using the collected 'auditMap', send out the messages to all of the + * servers. + */ + void send() throws InterruptedException { + if (auditMap.isEmpty()) { + logger.info("TargetLock audit: no locks on this server"); + } else { + logger.info("TargetLock audit: sending audit information to {}", + auditMap.keySet()); + } + + for (final Server server : auditMap.keySet()) { + // fetch audit data + AuditData auditData = auditMap.get(server); + + if (server == Server.getThisServer()) { + // process this locally + final AuditData respData = auditData.generateResponse(true); + if (respData.clientData.isEmpty() + && respData.serverData.isEmpty()) { + // no mismatches + logger.info("TargetLock.Audit.send: " + + "no errors from self ({})", server); + continue; + } + + // do the rest in a separate thread + server.getThreadPool().execute(new Runnable() { + @Override + public void run() { + // wait a few seconds, and see if we still know of these + // errors + logger.info("TargetLock.Audit.send: " + + "mismatches from self ({})", server); + try { + Thread.sleep(auditRetryDelay); + } catch (InterruptedException e) { + logger.error("TargetLock.Audit.send: Interrupted " + + "handling audit response from self ({})", + server); + // just abort + Thread.currentThread().interrupt(); + return; + } + + // This will check against our own data -- any mismatches + // mean that things have changed since we sent out the + // first message. We will remove any mismatches from + // 'respData', and see if there are any left. + AuditData mismatches = respData.generateResponse(false); + + respData.serverData.removeAll(mismatches.clientData); + respData.clientData.removeAll(mismatches.serverData); + + if (respData.clientData.isEmpty() + && respData.serverData.isEmpty()) { + // no mismatches -- + // there must have been transient issues on our side + logger.info("TargetLock.Audit.send: " + + "no mismatches from self " + + "({}) after retry", server); + return; + } + + // any mismatches left in 'respData' are still issues + respData.processResponse(server); + } + }); + continue; + } + + // serialize + byte[] encodedData = auditData.encode(); + if (encodedData == null) { + // error has already been displayed + continue; + } + + // generate entity + Entity<String> entity = + Entity.entity(new String(encodedData), + MediaType.APPLICATION_OCTET_STREAM_TYPE); + + server.post("lock/audit", entity, new Server.PostResponse() { + @Override + public WebTarget webTarget(WebTarget webTarget) { + // include the 'uuid' keyword + return webTarget + .queryParam("server", server.getUuid().toString()) + .queryParam("ttl", timeToLive); + } + + @Override + public void response(Response response) { + // process the response here + AuditData respData = + AuditData.decode(response.readEntity(byte[].class)); + if (respData == null) { + logger.error("TargetLock.Audit.send: " + + "couldn't process response from {}", + server); + return; + } + + // if we reach this point, we got a response + if (respData.clientData.isEmpty() + && respData.serverData.isEmpty()) { + // no mismatches + logger.info("TargetLock.Audit.send: " + + "no errors from {}", server); + return; + } + + // wait a few seconds, and see if we still know of these + // errors + logger.info("TargetLock.Audit.send: mismatches from {}", + server); + try { + Thread.sleep(auditRetryDelay); + } catch (InterruptedException e) { + logger.error("TargetLock.Audit.send: Interrupted " + + "handling audit response from {}", + server); + // just abort + Thread.currentThread().interrupt(); + return; + } + + // This will check against our own data -- any mismatches + // mean that things have changed since we sent out the + // first message. We will remove any mismatches from + // 'respData', and see if there are any left. + AuditData mismatches = respData.generateResponse(false); + + respData.serverData.removeAll(mismatches.clientData); + respData.clientData.removeAll(mismatches.serverData); + + if (respData.clientData.isEmpty() + && respData.serverData.isEmpty()) { + // no mismatches -- + // there must have been transient issues on our side + logger.info("TargetLock.Audit.send: no mismatches from " + + "{} after retry", server); + return; + } + + // any mismatches left in 'respData' are still there -- + // hopefully, they are transient issues on the other side + AuditData auditData = new AuditData(); + auditData.clientData = respData.serverData; + auditData.serverData = respData.clientData; + + // serialize + byte[] encodedData = auditData.encode(); + if (encodedData == null) { + // error has already been displayed + return; + } + + // generate entity + Entity<String> entity = + Entity.entity(new String(encodedData), + MediaType.APPLICATION_OCTET_STREAM_TYPE); + + // send new list to other end + response = server + .getWebTarget("lock/audit") + .queryParam("server", server.getUuid().toString()) + .queryParam("ttl", timeToLive) + .request().post(entity); + + respData = AuditData.decode(response.readEntity(byte[].class)); + if (respData == null) { + logger.error("TargetLock.auditDataBuilder.send: " + + "couldn't process response from {}", + server); + return; + } + + // if there are mismatches left, they are presumably real + respData.processResponse(server); + } + }); + } + } + } +} diff --git a/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/Util.java b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/Util.java new file mode 100644 index 00000000..2ad0a401 --- /dev/null +++ b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/Util.java @@ -0,0 +1,181 @@ +/* + * ============LICENSE_START======================================================= + * feature-server-pool + * ================================================================================ + * Copyright (C) 2020 AT&T Intellectual Property. All rights reserved. + * ================================================================================ + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============LICENSE_END========================================================= + */ + +package org.onap.policy.drools.serverpool; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.nio.charset.StandardCharsets; +import java.util.Comparator; +import java.util.Timer; +import java.util.UUID; + +import org.apache.commons.io.IOUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class Util { + private static Logger logger = LoggerFactory.getLogger(Util.class); + // create a shared 'Timer' instance + public static final Timer timer = new Timer("Server Pool Timer", true); + + /** + * Internally, UUID objects use two 'long' variables, and the default + * comparison is signed, which means the order for the first and 16th digit + * is: '89abcdef01234567', while the order for the rest is + * '0123456789abcdef'. + * The following comparator uses the ordering '0123456789abcdef' for all + * digits. + */ + public static final Comparator<UUID> uuidComparator = + new Comparator<UUID>() { + public int compare(UUID u1, UUID u2) { + // compare most significant portion + int rval = Long.compareUnsigned(u1.getMostSignificantBits(), + u2.getMostSignificantBits()); + if (rval == 0) { + // most significant portion matches -- + // compare least significant portion + rval = Long.compareUnsigned(u1.getLeastSignificantBits(), + u2.getLeastSignificantBits()); + } + return rval; + } + }; + + /* ============================================================ */ + + /** + * write a UUID to an output stream. + * + * @param ds the output stream + * @param uuid the uuid to write + */ + public static void writeUuid(DataOutputStream ds, UUID uuid) throws IOException { + // write out 16 byte UUID + ds.writeLong(uuid.getMostSignificantBits()); + ds.writeLong(uuid.getLeastSignificantBits()); + } + + /** + * read a UUID from an input stream. + * + * @param ds the input stream + */ + public static UUID readUuid(DataInputStream ds) throws IOException { + long mostSigBits = ds.readLong(); + long leastSigBits = ds.readLong(); + return new UUID(mostSigBits, leastSigBits); + } + + /* ============================================================ */ + + /** + * Read from an 'InputStream' until EOF or until it is closed. This method + * may block, depending on the type of 'InputStream'. + * + * @param input This is the input stream + * @return A 'String' containing the contents of the input stream + */ + public static String inputStreamToString(InputStream input) { + try { + return IOUtils.toString(input, StandardCharsets.UTF_8); + } catch (IOException e) { + // TODO Auto-generated catch block + logger.error("Util.inputStreamToString error", e); + return ""; + } + } + + /* ============================================================ */ + + /** + * Serialize an object into a byte array. + * + * @param object the object to serialize + * @return a byte array containing the serialized object + * @throws IOException this may be an exception thrown by the output stream, + * a NotSerializableException if an object can't be serialized, or an + * InvalidClassException + */ + public static byte[] serialize(Object object) throws IOException { + try (ByteArrayOutputStream bos = new ByteArrayOutputStream(); + ObjectOutputStream oos = new ObjectOutputStream(bos)) { + oos.writeObject(object); + oos.flush(); + return bos.toByteArray(); + } + } + + /** + * Deserialize a byte array into an object. + * + * @param data a byte array containing the serialized object + * @return the deserialized object + * @throws IOException this may be an exception thrown by the input stream, + * a StreamCorrupted Exception if the information in the stream is not + * consistent, an OptionalDataException if the input data primitive data, + * rather than an object, or InvalidClassException + * @throws ClassNotFoundException if the class of a serialized object can't + * be found + */ + public static Object deserialize(byte[] data) throws IOException, ClassNotFoundException { + try (ByteArrayInputStream bis = new ByteArrayInputStream(data); + ObjectInputStream ois = new ObjectInputStream(bis)) { + return ois.readObject(); + } + } + + /** + * Deserialize a byte array into an object. + * + * @param data a byte array containing the serialized object + * @param classLoader the class loader to use when locating classes + * @return the deserialized object + * @throws IOException this may be an exception thrown by the input stream, + * a StreamCorrupted Exception if the information in the stream is not + * consistent, an OptionalDataException if the input data primitive data, + * rather than an object, or InvalidClassException + * @throws ClassNotFoundException if the class of a serialized object can't + * be found + */ + public static Object deserialize(byte[] data, ClassLoader classLoader) + throws IOException, ClassNotFoundException { + + try (ByteArrayInputStream bis = new ByteArrayInputStream(data); + ExtendedObjectInputStream ois = + new ExtendedObjectInputStream(bis, classLoader)) { + return ois.readObject(); + } + } + + /** + * Shutdown the timer thread. + */ + public static void shutdown() { + timer.cancel(); + } +} diff --git a/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/persistence/Persistence.java b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/persistence/Persistence.java new file mode 100644 index 00000000..295194d2 --- /dev/null +++ b/feature-server-pool/src/main/java/org/onap/policy/drools/serverpool/persistence/Persistence.java @@ -0,0 +1,875 @@ +/* + * ============LICENSE_START======================================================= + * feature-server-pool + * ================================================================================ + * Copyright (C) 2020 AT&T Intellectual Property. All rights reserved. + * ================================================================================ + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============LICENSE_END========================================================= + */ + +package org.onap.policy.drools.serverpool.persistence; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.ObjectOutputStream; +import java.util.Base64; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.IdentityHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import javax.ws.rs.Consumes; +import javax.ws.rs.POST; +import javax.ws.rs.Path; +import javax.ws.rs.QueryParam; +import javax.ws.rs.client.Entity; +import javax.ws.rs.client.WebTarget; +import javax.ws.rs.core.MediaType; + +import org.kie.api.event.rule.ObjectDeletedEvent; +import org.kie.api.event.rule.ObjectInsertedEvent; +import org.kie.api.event.rule.ObjectUpdatedEvent; +import org.kie.api.event.rule.RuleRuntimeEventListener; +import org.kie.api.runtime.KieSession; +import org.onap.policy.drools.core.DroolsRunnable; +import org.onap.policy.drools.core.PolicyContainer; +import org.onap.policy.drools.core.PolicySession; +import org.onap.policy.drools.core.PolicySessionFeatureApi; +import org.onap.policy.drools.serverpool.Bucket; +import org.onap.policy.drools.serverpool.Keyword; +import org.onap.policy.drools.serverpool.Server; +import org.onap.policy.drools.serverpool.ServerPoolApi; +import org.onap.policy.drools.serverpool.TargetLock.GlobalLocks; +import org.onap.policy.drools.serverpool.Util; +import org.onap.policy.drools.system.PolicyControllerConstants; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class provides a persistence implementation for 'feature-server-pool', + * backing up the data of selected Drools sessions and server-side 'TargetLock' + * data on separate hosts. + */ +public class Persistence implements PolicySessionFeatureApi, ServerPoolApi { + private static Logger logger = LoggerFactory.getLogger(Persistence.class); + + /***************************************/ + /* 'PolicySessionFeatureApi' interface */ + /***************************************/ + + /** + * {@inheritDoc} + */ + @Override + public int getSequenceNumber() { + return 1; + } + + /** + * {@inheritDoc} + */ + @Override + public void newPolicySession(PolicySession policySession) { + // a new Drools session is being created -- look at the properties + // 'persistence.<session-name>.type' and 'persistence.type' to determine + // whether persistence is enabled for this session + + // fetch properties file + PolicyContainer container = policySession.getPolicyContainer(); + Properties properties = PolicyControllerConstants.getFactory().get( + container.getGroupId(), container.getArtifactId()).getProperties(); + + // look at 'persistence.<session-name>.type', and 'persistence.type' + String type = properties.getProperty("persistence." + policySession.getName() + ".type"); + if (type == null) { + type = properties.getProperty("persistence.type"); + } + + if ("auto".equals(type) || "native".equals(type)) { + // persistence is enabled this session + policySession.setAdjunct(PersistenceRunnable.class, + new PersistenceRunnable(policySession)); + } + } + + /*****************************/ + /* 'ServerPoolApi' interface */ + /*****************************/ + + /** + * {@inheritDoc} + */ + @Override + public Collection<Class<?>> servletClasses() { + // the nested class 'Rest' contains additional REST calls + List<Class<?>> classes = new LinkedList<>(); + classes.add(Rest.class); + return classes; + } + + /** + * {@inheritDoc} + */ + @Override + public void restoreBucket(Bucket bucket) { + // if we reach this point, no data was received from the old server, which + // means we just initialized, or we did not have a clean bucket migration + + ReceiverBucketData rbd = bucket.removeAdjunct(ReceiverBucketData.class); + if (rbd != null) { + // there is backup data -- do a restore + rbd.restoreBucket(bucket); + } + } + + /** + * {@inheritDoc} + */ + @Override + public void lockUpdate(Bucket bucket, GlobalLocks globalLocks) { + // we received a notification from 'TargetLock' that 'GlobalLocks' data + // has changed (TBD: should any attempt be made to group updates that + // occur in close succession?) + + sendLockDataToBackups(bucket, globalLocks); + } + + /** + * {@inheritDoc} + */ + @Override + public void auditBucket(Bucket bucket, boolean isOwner, boolean isBackup) { + if (isOwner) { + // it may be that backup hosts have changed -- + // send out lock and session data + + // starting with lock data + GlobalLocks globalLocks = + bucket.getAdjunctDontCreate(GlobalLocks.class); + if (globalLocks != null) { + sendLockDataToBackups(bucket, globalLocks); + } + + // now, session data + SenderBucketData sbd = + bucket.getAdjunctDontCreate(SenderBucketData.class); + if (sbd != null) { + synchronized (sbd) { + // go through all of the sessions where we have persistent data + for (PolicySession session : sbd.sessionData.keySet()) { + Object obj = session.getAdjunct(PersistenceRunnable.class); + if (obj instanceof PersistenceRunnable) { + PersistenceRunnable pr = (PersistenceRunnable)obj; + synchronized (pr.modifiedBuckets) { + // mark bucket associated with this session + // as modified + pr.modifiedBuckets.add(bucket); + } + } + } + } + } + } else if (bucket.removeAdjunct(SenderBucketData.class) != null) { + logger.warn("Bucket {}: Removed superfluous " + + "'SenderBucketData' adjunct", + bucket.getIndex()); + } + if (!isBackup && bucket.removeAdjunct(ReceiverBucketData.class) != null) { + logger.warn("Bucket {}: Removed superfluous " + + "'ReceiverBucketData' adjunct", + bucket.getIndex()); + } + } + + /** + * This method supports 'lockUpdate' -- it has been moved to a separate + * 'static' method, so it can also be called after restoring 'GlobalLocks', + * so it can be backed up on its new servers. + * + * @param bucket the bucket containing the 'GlobalLocks' adjunct + * @param globalLocks the 'GlobalLocks' adjunct + */ + private static void sendLockDataToBackups(Bucket bucket, GlobalLocks globalLocks) { + final int bucketNumber = bucket.getIndex(); + SenderBucketData sbd = bucket.getAdjunct(SenderBucketData.class); + int lockCount = 0; + + // serialize the 'globalLocks' instance + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + try { + ObjectOutputStream oos = new ObjectOutputStream(bos); + synchronized (globalLocks) { + // the 'GlobalLocks' instance and counter are tied together + oos.writeObject(globalLocks); + lockCount = sbd.getLockCountAndIncrement(); + } + oos.close(); + } catch (IOException e) { + logger.error("Persistence.LockUpdate({})", bucketNumber, e); + return; + } + + // convert to Base64, and generate an 'Entity' for the REST call + byte[] serializedData = Base64.getEncoder().encode(bos.toByteArray()); + final Entity<String> entity = + Entity.entity(new String(serializedData), + MediaType.APPLICATION_OCTET_STREAM_TYPE); + final int count = lockCount; + + // build list of backup servers + Set<Server> servers = new HashSet<>(); + synchronized (bucket) { + servers.add(bucket.getPrimaryBackup()); + servers.add(bucket.getSecondaryBackup()); + } + for (final Server server : servers) { + if (server != null) { + // send out REST command + server.getThreadPool().execute(new Runnable() { + @Override + public void run() { + WebTarget webTarget = + server.getWebTarget("persistence/lock"); + if (webTarget != null) { + webTarget + .queryParam("bucket", bucketNumber) + .queryParam("count", count) + .queryParam("dest", server.getUuid()) + .request().post(entity); + } + } + }); + } + } + } + + /* ============================================================ */ + + /** + * One instance of this class exists for every Drools session that is + * being backed up. It implements the 'RuleRuntimeEventListener' interface, + * so it receives notifications of Drools object changes, and also implements + * the 'DroolsRunnable' interface, so it can run within the Drools session + * thread, which should reduce the chance of catching an object in a + * transient state. + */ + static class PersistenceRunnable implements DroolsRunnable, + RuleRuntimeEventListener { + // this is the Drools session associated with this instance + private PolicySession session; + + // this is the string "<groupId>:<artifactId>:<sessionName>" + private String encodedSessionName; + + // the buckets in this session which have modifications that still + // need to be backed up + private Set<Bucket> modifiedBuckets = new HashSet<>(); + + /** + * Constructor - save the session information, and start listing for + * updates. + */ + PersistenceRunnable(PolicySession session) { + PolicyContainer pc = session.getPolicyContainer(); + + this.session = session; + this.encodedSessionName = + pc.getGroupId() + ":" + pc.getArtifactId() + ":" + session.getName(); + session.getKieSession().addEventListener(this); + } + + /******************************/ + /* 'DroolsRunnable' interface */ + /******************************/ + + /** + * {@inheritDoc} + */ + @Override + public void run() { + try { + // save a snapshot of 'modifiedBuckets' + Set<Bucket> saveModifiedBuckets; + synchronized (modifiedBuckets) { + saveModifiedBuckets = new HashSet<>(modifiedBuckets); + modifiedBuckets.clear(); + } + + // iterate over all of the modified buckets, sending update data + // to all of the backup servers + for (Bucket bucket : saveModifiedBuckets) { + SenderBucketData sbd = + bucket.getAdjunctDontCreate(SenderBucketData.class); + if (sbd != null) { + // serialization occurs within the Drools session thread + SenderSessionBucketData ssbd = sbd.getSessionData(session); + byte[] serializedData = + ssbd.getLatestEncodedSerializedData(); + final int count = ssbd.getCount(); + final Entity<String> entity = + Entity.entity(new String(serializedData), + MediaType.APPLICATION_OCTET_STREAM_TYPE); + + // build list of backup servers + Set<Server> servers = new HashSet<>(); + synchronized (bucket) { + servers.add(bucket.getPrimaryBackup()); + servers.add(bucket.getSecondaryBackup()); + } + for (final Server server : servers) { + if (server != null) { + // send out REST command + server.getThreadPool().execute(new Runnable() { + @Override + public void run() { + WebTarget webTarget = + server.getWebTarget("persistence/session"); + if (webTarget != null) { + webTarget + .queryParam("bucket", + bucket.getIndex()) + .queryParam("session", + encodedSessionName) + .queryParam("count", count) + .queryParam("dest", server.getUuid()) + .request().post(entity); + } + } + }); + } + } + } + } + } catch (Exception e) { + logger.error("Persistence.PersistenceRunnable.run:", e); + } + } + + /****************************************/ + /* 'RuleRuntimeEventListener' interface */ + /****************************************/ + + /** + * {@inheritDoc} + */ + @Override + public void objectDeleted(ObjectDeletedEvent event) { + // determine Drools object that was deleted + Object object = event.getOldObject(); + + // determine keyword, if any + String keyword = Keyword.lookupKeyword(object); + if (keyword == null) { + // no keyword, so there is no associated bucket + return; + } + + // locate bucket and associated data + // (don't create adjunct if it isn't there -- there's nothing to delete) + Bucket bucket = Bucket.getBucket(keyword); + SenderBucketData sbd = + bucket.getAdjunctDontCreate(SenderBucketData.class); + if (sbd != null) { + // add bucket to 'modified' list + synchronized (modifiedBuckets) { + modifiedBuckets.add(bucket); + } + + // update set of Drools objects in this bucket + sbd.getSessionData(session).objectDeleted(object); + + // insert this 'DroolsRunnable' to do the backup (note that it + // may already be inserted from a previous update to this + // DroolsSession -- eventually, the rule will fire, and the 'run' + // method will be called) + session.getKieSession().insert(this); + } + } + + /** + * {@inheritDoc} + */ + @Override + public void objectInserted(ObjectInsertedEvent event) { + objectChanged(event.getObject()); + } + + /** + * {@inheritDoc} + */ + @Override + public void objectUpdated(ObjectUpdatedEvent event) { + objectChanged(event.getObject()); + } + + /** + * A Drools session object was either inserted or updated + * (both are treated the same way). + * + * @param object the object being inserted or updated + */ + private void objectChanged(Object object) { + // determine keyword, if any + String keyword = Keyword.lookupKeyword(object); + if (keyword == null) { + // no keyword, so there is no associated bucket + return; + } + + // locate bucket and associated data + Bucket bucket = Bucket.getBucket(keyword); + SenderBucketData sbd = bucket.getAdjunct(SenderBucketData.class); + + // add bucket to 'modified' list + synchronized (modifiedBuckets) { + modifiedBuckets.add(bucket); + } + + // update set of Drools objects in this bucket + sbd.getSessionData(session).objectChanged(object); + + // insert this 'DroolsRunnable' to do the backup (note that it + // may already be inserted from a previous update to this + // DroolsSession -- eventually, the rule will fire, and the 'run' + // method will be called) + session.getKieSession().insert(this); + } + } + + /* ============================================================ */ + + /** + * Per-session data for a single bucket on the sender's side. + */ + static class SenderSessionBucketData { + // the set of all objects in the session associated with this bucket + Map<Object,Object> droolsObjects = new IdentityHashMap<>(); + + // used by the receiver to determine whether an update is really newer + int count = 0; + + // serialized base64 form of 'droolsObjects' + // (TBD: determine if we are getting any benefit from caching this) + byte[] encodedSerializedData = null; + + // 'true' means that 'encodedSerializedData' is out-of-date + boolean rebuildNeeded = true; + + /** + * Notification that a Drools object associated with this bucket + * was deleted. + * + * @param object the object that was deleted + */ + synchronized void objectDeleted(Object object) { + if (droolsObjects.remove(object) != null) { + rebuildNeeded = true; + } + } + + /** + * Notification that a Drools object associated with this bucket + * was inserted or updated. + * + * @param object the object that was updated + */ + synchronized void objectChanged(Object object) { + droolsObjects.put(object, object); + rebuildNeeded = true; + } + + /** + * Serialize and base64-encode the objects in this Drools session. + * + * @return a byte array containing the encoded serialized objects + */ + synchronized byte[] getLatestEncodedSerializedData() { + if (rebuildNeeded) { + try { + // this should be run in the Drools session thread in order + // to avoid transient data + encodedSerializedData = + Base64.getEncoder().encode(Util.serialize(droolsObjects)); + count += 1; + } catch (IOException e) { + logger.error("Persistence.SenderSessionBucketData." + + "getLatestEncodedSerializedData: ", e); + } + rebuildNeeded = false; + } + return encodedSerializedData; + } + + /** + * Return a counter that will be used for update comparison. + * + * @return the value of a counter that can be used to determine whether + * an update is really newer than the previous update + */ + synchronized int getCount() { + return count; + } + } + + /* ============================================================ */ + + /** + * Data for a single bucket on the sender's side. + */ + public static class SenderBucketData { + // maps session name into SenderSessionBucketData + Map<PolicySession, SenderSessionBucketData> sessionData = + new IdentityHashMap<>(); + + // used by the receiver to determine whether an update is really newer + int lockCount = 0; + + /** + * Create or fetch the 'SenderSessionBucketData' instance associated + * with the specified session. + * + * @param session the 'PolicySession' object + * @return the associated 'SenderSessionBucketData' instance + */ + synchronized SenderSessionBucketData getSessionData(PolicySession session) { + // try to fetch the associated instance + SenderSessionBucketData rval = sessionData.get(session); + if (rval == null) { + // it doesn't exist, so create one + rval = new SenderSessionBucketData(); + sessionData.put(session, rval); + } + return rval; + } + + /** + * Return a counter that will be used for update comparison. + * + * @return the value of a counter that can be used to determine whether + * an update is really newer than the previous update + */ + int getLockCountAndIncrement() { + // note that this is synchronized using the 'GlobalLocks' instance + // within the same bucket + return lockCount++; + } + } + + /* ============================================================ */ + + /** + * Data for a single bucket and session on the receiver's side. + */ + static class ReceiverSessionBucketData { + // used to determine whether an update is really newer + int count = -1; + + // serialized base64 form of 'droolsObjects' + byte[] encodedSerializedData = null; + } + + /* ============================================================ */ + + /** + * Data for a single bucket on the receiver's side -- this adjunct is used + * to store encoded data on a backup host. It will only be needed if the + * bucket owner fails. + */ + public static class ReceiverBucketData { + // maps session name into encoded data + Map<String, ReceiverSessionBucketData> sessionData = new HashMap<>(); + + // used by the receiver to determine whether an update is really newer + int lockCount = -1; + + // encoded lock data + byte[] lockData = null; + + /** + * This method is called in response to the '/persistence/session' + * REST message. It stores the base64-encoded and serialized data + * for a particular bucket and session. + * + * @param bucketNumber identifies the bucket + * @param sessionName identifies the Drools session + * @param count counter used to determine whether data is really newer + * @param data base64-encoded serialized data for this bucket and session + */ + static void receiveSession(int bucketNumber, String sessionName, int count, byte[] data) { + // fetch the bucket + Bucket bucket = Bucket.getBucket(bucketNumber); + + // create/fetch the 'ReceiverBucketData' adjunct + ReceiverBucketData rbd = bucket.getAdjunct(ReceiverBucketData.class); + synchronized (rbd) { + // update the session data + ReceiverSessionBucketData rsbd = rbd.sessionData.get(sessionName); + if (rsbd == null) { + rsbd = new ReceiverSessionBucketData(); + rbd.sessionData.put(sessionName, rsbd); + } + + if ((count - rsbd.count) > 0 || count == 0) { + // this is new data + rsbd.count = count; + rsbd.encodedSerializedData = data; + } + } + } + + /** + * This method is called in response to the '/persistence/lock' + * REST message. It stores the base64-encoded and serialized + * server-side lock data associated with this bucket. + * + * @param bucketNumber identifies the bucket + * @param count counter used to determine whether data is really newer + * @param data base64-encoded serialized lock data for this bucket + */ + static void receiveLockData(int bucketNumber, int count, byte[] data) { + // fetch the bucket + Bucket bucket = Bucket.getBucket(bucketNumber); + + // create/fetch the 'ReceiverBucketData' adjunct + ReceiverBucketData rbd = bucket.getAdjunct(ReceiverBucketData.class); + synchronized (rbd) { + // update the lock data + if ((count - rbd.lockCount) > 0 || count == 0) { + rbd.lockCount = count; + rbd.lockData = data; + } + } + } + + /** + * This method is called when a bucket is being restored from persistent + * data, which indicates that a clean migration didn't occur. + * Drools session and/or lock data is restored. + * + * @param bucket the bucket being restored + */ + synchronized void restoreBucket(Bucket bucket) { + // one entry for each Drools session being restored -- + // indicates when the restore is complete (restore runs within + // the Drools session thread) + List<CountDownLatch> sessionLatches = new LinkedList<>(); + for (String sessionName : sessionData.keySet()) { + // [0]="<groupId>" [1]="<artifactId>", [2]="<sessionName>" + String[] nameSegments = sessionName.split(":"); + PolicySession policySession = null; + + // locate the 'PolicyContainer' and 'PolicySession' + if (nameSegments.length == 3) { + // step through all 'PolicyContainer' instances looking + // for a matching 'artifactId' & 'groupId' + for (PolicyContainer pc : PolicyContainer.getPolicyContainers()) { + if (nameSegments[1].equals(pc.getArtifactId()) + && nameSegments[0].equals(pc.getGroupId())) { + // 'PolicyContainer' matches -- try to fetch the session + policySession = pc.getPolicySession(nameSegments[2]); + break; + } + } + } + + if (policySession == null) { + logger.error("Persistence.ReceiverBucketData.restoreBucket: " + + "Can't find PolicySession{}", sessionName); + continue; + } + + Object obj = null; + try { + // deserialization needs to use the correct 'ClassLoader' + ReceiverSessionBucketData rsbd = sessionData.get(sessionName); + obj = Util.deserialize(Base64.getDecoder().decode(rsbd.encodedSerializedData), + policySession.getPolicyContainer().getClassLoader()); + } catch (IOException | ClassNotFoundException | IllegalArgumentException e) { + logger.error("Persistence.ReceiverBucketData.restoreBucket: " + + "Failed to read data for session '{}'", + sessionName, e); + + // can't decode -- skip this session + continue; + } + + if (!(obj instanceof Map)) { + logger.error("Persistence.ReceiverBucketData.restoreBucket: " + + "Session '{}' data has class {}, expected 'Map'", + sessionName, obj.getClass().getName()); + + // wrong object type decoded -- skip this session + continue; + } + + // if we reach this point, we have decoded the persistent data + + final Map<?,?> droolsObjects = (Map<?,?>) obj; + + // signal when restore is complete + final CountDownLatch sessionLatch = new CountDownLatch(1); + + // 'KieSession' object + final KieSession kieSession = policySession.getKieSession(); + + // run the following within the Drools session thread + kieSession.insert(new DroolsRunnable() { + /** + * {@inheritDoc} + */ + @Override + public void run() { + try { + // insert all of the Drools objects into the session + for (Object obj : droolsObjects.keySet()) { + kieSession.insert(obj); + } + } finally { + // signal completion + sessionLatch.countDown(); + } + } + }); + + // add this to the set of 'CountDownLatch's we are waiting for + sessionLatches.add(sessionLatch); + } + + // restore lock data + if (lockData != null) { + Object obj = null; + try { + // decode lock data + obj = Util.deserialize(Base64.getDecoder().decode(lockData)); + if (obj instanceof GlobalLocks) { + bucket.putAdjunct(obj); + + // send out updated date + sendLockDataToBackups(bucket, (GlobalLocks)obj); + } else { + logger.error("Persistence.ReceiverBucketData.restoreBucket: " + + "Expected 'GlobalLocks', got '{}'", + obj.getClass().getName()); + } + } catch (IOException | ClassNotFoundException | IllegalArgumentException e) { + logger.error("Persistence.ReceiverBucketData.restoreBucket: " + + "Failed to read lock data", e); + // skip the lock data + } + + } + + // wait for all of the sessions to update + try { + for (CountDownLatch sessionLatch : sessionLatches) { + if (!sessionLatch.await(10000L, TimeUnit.MILLISECONDS)) { + logger.error("{}: timed out waiting for session latch", + this); + } + } + } catch (InterruptedException e) { + logger.error("Exception in {}", this, e); + Thread.currentThread().interrupt(); + } + } + } + + /* ============================================================ */ + + @Path("/") + public static class Rest { + /** + * Handle the '/persistence/session' REST call. + */ + @POST + @Path("/persistence/session") + @Consumes(MediaType.APPLICATION_OCTET_STREAM) + public void receiveSession(@QueryParam("bucket") int bucket, + @QueryParam("session") String sessionName, + @QueryParam("count") int count, + @QueryParam("dest") UUID dest, + byte[] data) { + logger.debug("/persistence/session: (bucket={},session={},count={}) " + + "got {} bytes of data", + bucket, sessionName, count, data.length); + if (dest == null || dest.equals(Server.getThisServer().getUuid())) { + ReceiverBucketData.receiveSession(bucket, sessionName, count, data); + } else { + // This host is not the intended destination -- this could happen + // if it was sent from another site. Leave off the 'dest' param + // when forwarding the message, to ensure that we don't have + // an infinite forwarding loop, if the site data happens to be bad. + Server server; + WebTarget webTarget; + + if ((server = Server.getServer(dest)) != null + && (webTarget = + server.getWebTarget("persistence/session")) != null) { + Entity<String> entity = + Entity.entity(new String(data), + MediaType.APPLICATION_OCTET_STREAM_TYPE); + webTarget + .queryParam("bucket", bucket) + .queryParam("session", sessionName) + .queryParam("count", count) + .request().post(entity); + } + } + } + + /** + * Handle the '/persistence/lock' REST call. + */ + @POST + @Path("/persistence/lock") + @Consumes(MediaType.APPLICATION_OCTET_STREAM) + public void receiveLockData(@QueryParam("bucket") int bucket, + @QueryParam("count") int count, + @QueryParam("dest") UUID dest, + byte[] data) { + logger.debug("/persistence/lock: (bucket={},count={}) " + + "got {} bytes of data", bucket, count, data.length); + if (dest == null || dest.equals(Server.getThisServer().getUuid())) { + ReceiverBucketData.receiveLockData(bucket, count, data); + } else { + // This host is not the intended destination -- this could happen + // if it was sent from another site. Leave off the 'dest' param + // when forwarding the message, to ensure that we don't have + // an infinite forwarding loop, if the site data happens to be bad. + Server server; + WebTarget webTarget; + + if ((server = Server.getServer(dest)) != null + && (webTarget = server.getWebTarget("persistence/lock")) != null) { + Entity<String> entity = + Entity.entity(new String(data), + MediaType.APPLICATION_OCTET_STREAM_TYPE); + webTarget + .queryParam("bucket", bucket) + .queryParam("count", count) + .request().post(entity); + } + } + } + } +} |