import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Random; import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.util.Bytes; public class HBaseHandler { /* Creating a table */ public static void createOrOverwrite(HBaseAdmin admin, HTableDescriptor table) throws IOException { // If table with same name exists, delete it if (admin.tableExists(table.getName())) { // First disable the table admin.disableTable(table.getName()); // Then drop it admin.deleteTable(table.getName()); } // Create the table admin.createTable(table); } /* Return a random int in the range [min,max] */ public static int randInt(int min, int max) { Random rand = new Random(); // nextInt is normally exclusive of the top value, // so add 1 to make it inclusive int randomNum = rand.nextInt((max - min) + 1) + min; return randomNum; } /* Creating cf number of column families for a table */ public static void createColumnFamilies(HTableDescriptor table, int cf){ } /* Insert random values to a table */ public static void randomizeTable(Configuration config, HTableDescriptor table, int cf, int rows) throws IOException{ } /* Build hash table on table * where key is the value of the row * and value is the row name */ public static void buildHashTable(HashMap> hashT, HTableDescriptor table, int rows) throws IOException{ HTable htable = new HTable(HBaseConfiguration.create(), table.getName()); } /* Perform single-pass hash join */ public static void HashJoin(HashMap> hashT, HTableDescriptor table1, HTableDescriptor table2, int rows, HTableDescriptor resultTable) throws IOException{ } public static void main(String[] args) throws IOException { Configuration config = HBaseConfiguration.create(); // Define the minimum and maximum column families of each table int cf_max=5; int cf_min=2; try { final HBaseAdmin admin = new HBaseAdmin(config); System.out.println("Defining tables\n."); // Define two tables HTableDescriptor table1 = new HTableDescriptor( TableName.valueOf("mytable1")); HTableDescriptor table2 = new HTableDescriptor( TableName.valueOf("mytable2")); // Define results table HTableDescriptor table3 = new HTableDescriptor( TableName.valueOf("results")); System.out.println("Tables were successfully defined\n"); // Get number of column families the tables will have int cf=randInt(cf_min,cf_max); System.out.print("Creating column families\n"); // Creating column families for the two tables createColumnFamilies(table1,cf); createColumnFamilies(table2,cf); /* Creating column families for the results table * Results table must have only 1 column family */ createColumnFamilies(table3,1); System.out.println("Column families were successfully created\n"); System.out.print("Creating tables\n"); // Creating two tables createOrOverwrite(admin, table1); createOrOverwrite(admin, table2); // Creating results table createOrOverwrite(admin, table3); System.out.println("Tables were succesfully created\n"); // Get number of rows the table will have int rows1=randInt(20,40); int rows2=randInt(20,40); System.out.print("Inserting random values in tables\n"); // Insert random values in the two tables randomizeTable(config, table1, cf, rows1); randomizeTable(config, table2, cf, rows2); System.out.println("Random values were successfully inserted"); // Build hash table on table with the smaller number of rows and perform single-pass hash join HashMap> hashT1 = new HashMap>(); if(rows1