Academic Portfolio

of

Computing

Please consider these are intended to be small samples of my style, training, and work. In most cases, full solutions are intentionally left out to retain the integrity of the courses.

Area Description
Parallel Computing Undergrad level matrix multiply in CUDA.
Parallel Computing Merge Sort optimized for large tasks on OMP.
High Performance Computing 2016 paper review of Atmospheric Science-based PDE/HPC implementation which I really enjoyed.
Algorithms/Data Structures. Binary Search Tree in Java.
Assembler Undergrad homework: Assembler Parsing

Matrix Multiply in C++/CUDA

This assignment explored improving performance of vector addition and matrix multiplication by coalescing memory access and optimizing parallelization.


  // Loop over all sub matrices in block_row of A and block_col of B
  // required to compute Csub. Block multiply each pair of sub matrices
  // and accumulate results
  for (int m = 0;  m < (A.width / FOOTPRINT_SIZE); ++m){
    // Get Asub and Bsub descriptors
    Asub = &A.elements[A.stride * FOOTPRINT_SIZE * block_row + FOOTPRINT_SIZE * m];
    Bsub = &B.elements[B.stride * FOOTPRINT_SIZE * m + FOOTPRINT_SIZE * block_col];

    // Copy ELEMENTS OF  ASub and Bsub into shared memory
    // EACH THREAD loads ONE ELEMENT of ASub and ONE of Bsub
    // Notice: it does not need to be the element it requires to
    //         compute its Cvalue, as long as all elements are
    //         collaboratively read.

    // Notice: every thread declares shared_A and shared_B in shared memory
    //         even though a thread block has only one shared_A and one shared_B
    __shared__ float shared_A[FOOTPRINT_SIZE][FOOTPRINT_SIZE];
    __shared__ float shared_B[FOOTPRINT_SIZE][FOOTPRINT_SIZE];

    // Each thread copies a column of 4 elements from shared_A and the same from shared_B
    for (
    shared_A[thread_row*2][thread_col*2] = Asub[2*thread_row * A.stride + 2*thread_col];
    shared_B[thread_row*2][thread_col*2] = Bsub[2*thread_row * B.stride + 2*thread_col];

    shared_A[thread_row*2+1][thread_col*2] = Asub[(2*thread_row+1) * A.stride + 2*thread_col];
    shared_B[thread_row*2+1][thread_col*2] = Bsub[(2*thread_row+1) * B.stride + 2*thread_col];

    shared_A[thread_row*2][thread_col*2+1] = Asub[2*thread_row * A.stride + 2*thread_col+1];
    shared_B[thread_row*2][thread_col*2+1] = Bsub[2*thread_row * B.stride + 2*thread_col+1];

    shared_A[thread_row*2+1][thread_col*2+1] = Asub[(2*thread_row+1) * A.stride + 2*thread_col+1];
    shared_B[thread_row*2+1][thread_col*2+1] = Bsub[(2*thread_row+1) * B.stride + 2*thread_col+1];

    // Synchronize to ensure all elements are read
    __syncthreads();

    // Do an inproduct of one row of shared_A and one col of shared_B
    // computing one Cvalue by accumulation
#pragma unroll
    for(int e=0; e

Top

Merge Sort in C/OMP


#include 
#include 
#include 
#include "timer.h"

mergesort(int a[],int temp[], int low, int high) {
 int mid;
 if(low1000000)
  mergesort(a,temp,low,mid);   //sort the first half
  #pragma omp task if (high-low>1000000)
  mergesort(a,temp,mid+1,high); //sort the second half
  #pragma omp taskwait
  merge(a,temp,low,high,mid);  //merge them togeather into one sorted list

 }
}

merge(int a[],int temp[], int low, int high, int mid){
 int i, j, k;
 i=low; j=mid+1; k=low;
 while((i<=mid)&&(j<=high)){
   if(a[i] 1 ) LEN  = atoi(argv[1]);
  int i, *x,*temp;
  x=(int *)malloc(sizeof(int)*LEN);
  temp=(int *)malloc(sizeof(int)*LEN);
  if(x==NULL || temp == NULL){
  printf("Out of memory"); exit(0);
  }

  //Fill the array to be sorted with random numbers
  for (i = 0; i < LEN; i++)
    x[i] = rand() % LEN;

#ifdef DEBUG
  printf("before sort:\n");
  for (i = 0; i < LEN; i++) printf("%d ", x[i]);
  printf("\n");
#endif

  initialize_timer ();  //We are just timing the sort
  start_timer();
  #pragma omp parallel
  {
    #pragma omp single
    mergesort(x,temp,0, (LEN-1));
  }
  stop_timer();
  time=elapsed_time();

#ifdef DEBUG
  printf("after sort:\n");
  for (i = 0; i < LEN; i++) printf("%d ", x[i]);
  printf("\n");
#endif

  printf("elapsed time = %lf (sec)\n", time);
  return 0;

                                                       

Top

Binary Search Tree in Java


        public Boolean postorderEval(TreeNode node, BST symTable) throws BSTException{
        // evaluate left tree
        // evaluate right tree (if not null)
        // evaluate operator in node and return Boolean result
                Boolean leftResult = null;
                Boolean rightResult = null;
                Boolean nodeResult = null;

                if (node.getLeft() != null)
                        leftResult = postorderEval(node.getLeft(), symTable);
                if (node.getRight() != null)
                        rightResult = postorderEval(node.getRight(), symTable);

                switch (node.getItem()) {
                        case "true":    nodeResult = true;
                                        break;
                        case "false":   nodeResult = false;
                                        break;
                        case "not":     nodeResult = !leftResult;
                                                        break;
                        case "and":     nodeResult = (leftResult && rightResult);
                                                        break;
                        case "or":              nodeResult = (leftResult || rightResult);
                                                        break;
                        default:                String key = node.getItem();
                                                        nodeResult = symTable.retrieveItem(key).value();
                                                        break;
                }
                return nodeResult;
        }


        public static void main(String[] args){
                System.out.println("Stepwise build infix expression: not not (true or false) and true");

                Tree t = new Tree();
                //BST symTab = new BST();
                System.out.println("tree: "); t.preorderTraverse();
                //System.out.println("result:\n" + t.postorderEval(symTab)+"\n");

                TreeNode a = new TreeNode("true");
                t = new Tree(a);
                System.out.println("tree: "); t.preorderTraverse();
                //System.out.println("result:\n" + t.postorderEval(symTab)+"\n");

                TreeNode b = new TreeNode("false");
                TreeNode or = new TreeNode("or", a, b);
                t = new Tree(or);
                System.out.println("tree: "); t.preorderTraverse();
                //System.out.println("result:\n" + t.postorderEval(symTab)+"\n");

                TreeNode not2 = new TreeNode("not", or);
                t = new Tree(not2);
                System.out.println("tree: "); t.preorderTraverse();
                //System.out.println("result:\n" + t.postorderEval(symTab)+"\n");

                TreeNode not = new TreeNode("not", not2);
                TreeNode and = new TreeNode("and", not, new TreeNode("true"));
                t = new Tree(and);
                System.out.println("tree: "); t.preorderTraverse();
                //System.out.println("result:\n" + t.postorderEval(symTab)+"\n");
        }
                                                       

Top

Parsing in Assembler


line_info_t* asm_pass_one (const char* asm_file_name,
                           const char* sym_file_name) {

  int line_no = 0;
  char line[MAX_LINE_LENGTH];
  const char* tok;
  line_info_t* headInfo = NULL;
  line_info_t* curInfo = NULL;
  line_info_t* newInfo = NULL;
  FILE* psym = NULL;
  bool orig_found = false; // flag for .ORIG
  bool end_found = false;  // flag for .END


  // open the source file and report any error
  FILE *pasm = fopen(asm_file_name,"r");
  if (pasm == NULL)
  { asm_error(ERR_OPEN_READ, asm_file_name);
    exit(EXIT_FAILURE);
  }
 
  // read the lines one at a time using fgets()
  while (fgets(line,sizeof(line),pasm) != NULL)
  { //printf("line is: %s",line);
    line_no++;
    if (sizeof(line) > MAX_LINE_LENGTH)
    { asm_error(ERR_LINE_TOO_LONG);
      exit(EXIT_FAILURE);
    }
    // convert the line to a list of tokens
    tok = tokenize_lc3_line(line);
    //printf("  First token is: %s\n",tok);
   
    // Skip comment-only lines, blank lines
    if (tok)
    { // Some initial error checking for .ORIG and .END
      if ((orig_found) && (util_get_opcode(tok) == OP_ORIG))
         asm_error(ERR_MULTIPLE_ORIG);

      if (!(orig_found))
      { if (util_get_opcode(tok) == OP_ORIG)
          orig_found = true;
        else
          asm_error(ERR_ORIG_NOT_1ST);
      }

      if ((end_found) && (tok))
        asm_error(ERR_END_NOT_LAST);

      if ((!(end_found)) && (util_get_opcode(tok) == OP_END))
        end_found = true;

      // if there is an opcode (e.g. ADD) on the line, then
      opcode_t opcode = util_get_opcode(tok);
      if (opcode == OP_INVALID)
      { // check to see if it is a label
        if (process_label(tok))
        { tok = next_token();
          if (tok)
             opcode = util_get_opcode(tok);
            opcode = util_get_opcode(tok);
        }
      }
      if (tok && opcode == OP_INVALID) // don't make this an else
         asm_error(ERR_MISSING_OP,tok);
      else if (tok)  //found a valid operation, either as the first tok or after a label
      { //printf("  ...and it is a a valid opcode.\n");
        // allocate/initialize a new line_info_t structure
        newInfo = asm_init_line_info(NULL);
        newInfo->opcode = opcode;
        newInfo->lineNum = line_no;

                                                       

Top