Search This Blog

Tuesday 10 September 2013

Greenplum : Command-based Web External Tables accessing Oracle

Command based external tables allow you to execute a shell command or script to return data. These make then ideal for consuming data from external systems. In this example we create an external table that connects to Oracle using JDBC to query EMP data.

The example below is based on a Java Class which allows you to connect to any RDBMS and run any SQL query with a list of comma separated columns defined as parameters.

1. Create a script which simply runs a Java Class which return data from a query to any RDBMS.

run.sh

java -cp $CLASSPATH /Users/gpadmin/demos/jdbc-externaltable/ojdbc6.jar:/Users/gpadmin/demos/jdbc-externaltable/jdbc_external.jar:. pivotal.au.greenplum.externaltable.InvokeQuery "oracle.jdbc.OracleDriver" "jdbc:oracle:thin:@10.32.243.125:1521/ora11gr2" "scott" "tiger" "|" "empno,ename,deptno" "select empno,ename,deptno from emp"

Usage as follows

Usage:

java -cp $CLASSPATH pivotal.au.greenplum.externaltable.InvokeQuery {driver-class} {url} {username} {passwd} {delimiter} {column-list} {query}

driver-class: JDBC driver class to load at runtime

url: JDBC connection URL

username: database username

passwd: database password

delimiter: What delimiter to use to separate fields

column-list: List of columns to output to console

query: SQL query to run against external RDBMS

2. Create an external table as shown below.

CREATE EXTERNAL WEB TABLE oracle_emps 
(empno int, ename character varying(20), deptno int)
EXECUTE '/Users/gpadmin/demos/jdbc-externaltable/run.sh' on host
FORMAT 'TEXT' (DELIMITER '|');

3. Load into Greenplum ensuring to use the fully qualified path to run.sh on the file system.
  
gpadmin=# \i exttab.sql;
DROP EXTERNAL TABLE
Time: 13.052 ms
CREATE EXTERNAL TABLE
Time: 5.190 ms  

4. Access as follows
  
gpadmin=# select * from oracle_emps;
 empno | ename  | deptno 
-------+--------+--------
  7369 | SMITH  |     20
  7499 | ALLEN  |     30
  7521 | WARD   |     30
  7566 | JONES  |     20
  7654 | MARTIN |     30
  7698 | BLAKE  |     30
  7782 | CLARK  |     10
  7788 | SCOTT  |     20
  7839 | KING   |     10
  7844 | TURNER |     30
  7876 | ADAMS  |     20
  7900 | JAMES  |     30
  7902 | FORD   |     20
  7934 | MILLER |     10
(14 rows)

Time: 557.705 ms
gpadmin=# select * from oracle_emps where deptno = 20;
 empno | ename | deptno 
-------+-------+--------
  7369 | SMITH |     20
  7566 | JONES |     20
  7788 | SCOTT |     20
  7876 | ADAMS |     20
  7902 | FORD  |     20
(5 rows)

Time: 543.120 ms  

Finally the java class is defined as follows
  
package pivotal.au.greenplum.externaltable;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

public class InvokeQuery 
{
    private String url;
    private String username;
    private String passwd;
    private String columnList;
    private String delimeter;
    private String driverClassName;
    private String query;
    private Connection conn = null;
    
 public InvokeQuery() 
 {
 }
 
 private void run (String[] args) throws SQLException
 {
  driverClassName = args[0];
  url = args[1];
  username = args[2];
  passwd = args[3];
  delimeter = args[4];
  columnList = args[5];
  query = args[6];
  
  Statement stmt = null;
  ResultSet rset = null;
  
  String[] columns = splitColumns(columnList);
  
  try 
  {
   conn = getConnection(driverClassName);
   stmt = conn.createStatement();
   rset = stmt.executeQuery(query);
   int size = columns.length;
   while (rset.next())
   { 
    int i = 0;
    // go through columns and output data
    for (String column: columns)
    {
     i++;
     if (i < size)
     {
      System.out.print(rset.getString(column) + "" + delimeter);
     }
     else
     {
      System.out.print(rset.getString(column) + "\n");
     }
    }
   }
   
  } 
  catch (Exception e) 
  {
   e.printStackTrace();
  } 
  finally
  {
   if (rset != null)
   {
    rset.close();
   }
   
   if (stmt != null)
   {
    stmt.close();
   }
   
   if (conn != null)
   {
    conn.close();
   }
  }
 }
 
 private String[] splitColumns (String columArray)
 {
  return columArray.split(",");
 }
  
 private Connection getConnection(String driverClass) throws SQLException, ClassNotFoundException
 {
  Class.forName(driverClassName); 
  conn = DriverManager.getConnection(url, username, passwd);
  return conn; 
 }
 
 public static void main(String[] args) throws Exception 
 {
  InvokeQuery invokeQuery = new InvokeQuery();
  
  if (args.length != 7)
  {
    System.out.println("Less then 7 arguments provided, usage as follows");
    System.out.println("\nUsage: \n\n\tjava pivotal.au.greenplum.externaltable.InvokeQuery <driver-class> <url> <username> <passwd> <delimeter> <colum-list> <query> \n");
    System.exit(-1);
  }
  
  invokeQuery.run(args);
  
 }

}  

No comments: