Blas
Last updated on 2024-02-06 | Edit this page
Overview
Questions
- How much can parallel libraries improve time to solution for your program?
Objectives
- Introduce the Basic Linear Algebra Subroutines (BLAS)
- Show that BLAS routines are used from R for statistical calculations
- Demonstrate that parallelisation can improve time to solution
Introduction
R
library(flexiblas)
flexiblas_avail()
flexiblas_version()
flexiblas_current_backend()
flexiblas_list()
flexiblas_list_loaded()
getthreads = function() {
flexiblas_get_num_threads()
}
setthreads = function(thr, label = "") {
cat(label, "Setting", thr, "threads\n")
flexiblas_set_num_threads(thr)
}
setback = function(backend, label = "") {
cat(label, "Setting", backend, "backend\n")
flexiblas_switch(flexiblas_load_backend(backend))
}
#' PT
#' A function to time one or more R expressions after setting the number of
#' threads available to the BLAS library.
#'
#' !!
#' DO NOT USE PT RECURSIVELY
#'
#' Use:
#' variable-for-result = PT(your-num-threads, a-quoted-text-comment, {
#' expression
#' expression
#' ...
#' expression-to-assign
#' })
PT = function(threads, text = "", expr) {
setthreads(threads, label = text)
print(system.time({result = {expr}}))
result
}
R
source("flexiblas_setup.R")
memuse::howbig(5e4, 2e3)
parallel::detectCores()
x = matrix(rnorm(1e8), nrow = 5e4, ncol = 2e3)
beta = rep(1, ncol(x))
err = rnorm(nrow(x))
y = x %*% beta + err
data = as.data.frame(cbind(y, x))
names(data) = c("y", paste0("x", 1:ncol(x)))
setback("OPENBLAS")
# qr --------------------------------------
for(i in 0:4) {
setthreads(2^i, "qr")
print(system.time((qr(x, LAPACK = TRUE))))
}
# prcomp --------------------------------------
for(i in 0:4) {
setthreads(2^i, "prcomp")
print(system.time((prcomp(x))))
}
# princomp --------------------------------------
for(i in 0:4) {
setthreads(2^i, "princomp")
print(system.time((princomp(x))))
}
# crossprod --------------------------------------
for(i in 0:5) {
setthreads(2^i, "crossprod")
print(system.time((crossprod(x))))
}
# %*% --------------------------------------------
for(i in 0:5) {
setthreads(2^i, "%*%")
print(system.time((t(x) %*% x)))
}
BASH
#!/bin/bash
#SBATCH -J flexiblas
#SBATCH -A CSC489
#SBATCH -p batch
#SBATCH --nodes=1
#SBATCH --mem=0
#SBATCH -t 00:15:00
#SBATCH -e ./flexiblas.e
#SBATCH -o ./flexiblas.o
#SBATCH --open-mode=truncate
## assumes this repository was cloned in your home area
cd ~/R4HPC/code_3
pwd
## modules are specific to andes.olcf.ornl.gov
module load openblas/0.3.17-omp
module load flexiblas
flexiblas add OpenBLAS $OLCF_OPENBLAS_ROOT/lib/libopenblas.so
export LD_PRELOAD=$OLCF_FLEXIBLAS_ROOT/lib64/libflexiblas.so
module load r
echo -e "loaded R with FlexiBLAS"
module list
Rscript flexiblas_bench.R
BASH
#!/bin/bash
#PBS -N fx
#PBS -l select=1:ncpus=128,walltime=00:50:00
#PBS -q qexp
#PBS -e fx.e
#PBS -o fx.o
cd ~/R4HPC/code_3
pwd
module load R
echo "loaded R"
time Rscript flexiblas_bench2.R