Benchmark with MariaDB 10.3

This section compares the performance between using SparkSQL versus MariaDB (a MySQL fork) for processing Thailand Government Spending queries.

Hardware and Software Setup

Result

Without specific optimization, all queries in SparkSQL finish faster than plain MariaDB.

All queries are listed below. Note that for MariaDB, we create indices on contract.subdep_name and department.subdep_name; otherwise the queries that join contract and deparment tables never finish in 30 min.

Q1) select count(distinct(proj_no)) as count from contract;

Q2) select sum(max)/1E6 as total_budget from (select max(proj_mny) as max from contract group by proj_no) tbl;


Q3) select last_day(contrct_date) as month, sum(budget)/1E6 as total_budget, sum(price)/1E6 as total_price
from (select proj_no, contrct_date, max(proj_mny) as budget, sum(contrct_price) as price
      from contract group by proj_no, contrct_date
) tbl
group by last_day(contrct_date)
order by month

Q4) select typ_name, count(*) as count
from (select proj_no, typ_name from contract group by proj_no, typ_name) tbl
group by typ_name;

Q5) select proj_name, sum(contrct_price)/1E6 price from contract group by proj_name order by price desc limit 10;

Q6) select provnc, sum(contrct_price)/1E6 as price
from contract, department
where contract.subdep_name = department.subdep_name
group by provnc
order by price desc
limit 10;

Q7) select org_name, sum(contrct_price)/1E6 as price
from contract, department
where contract.subdep_name = department.subdep_name
group by org_name
order by price desc
limit 10;

Q8) select dep_name, typ_name, sum(contrct_price)/1E6 as price
from contract, department
where contract.subdep_name = department.subdep_name
and department.dep_name in (select * from
   (select dep_name
   from contract, department
   where contract.subdep_name = department.subdep_name
   group by dep_name
   order by sum(contrct_price) desc
   limit 10) tbl)
)
group by dep_name, typ_name;

Q9) select dep_name, mthd_name, sum(contrct_price)/1E6 as price
from contract, department
where contract.subdep_name = department.subdep_name
and department.dep_name in (select * from
   (select dep_name
   from contract, department
   where contract.subdep_name = department.subdep_name
   group by dep_name
   order by sum(contrct_price) desc
   limit 10) tbl)
)
group by dep_name, mthd_name;

Q10) SELECT provnc, sum(contrct_price)/1E6 as price
FROM contract, (select dep_no, subdep_name,provnc FROM department GROUP BY dep_no, subdep_name,provnc) dep
WHERE contract.subdep_name = dep.subdep_name
GROUP BY provnc;