An Overview of Result Sets in the Node.js Driver
I highly recommend using result sets as the default means through which you execute queries with the Node.js driver. Read on to see why.
Join the DZone community and get the full member experience.
Join For FreeResult sets are great for huge queries or when you don’t know how many rows will be returned. And how often do you really know how many rows will be returned from a query, anyway? Probably not very often, which is why I highly recommend using result sets as the default means through which you execute queries with the Node.js driver.
The exception to this rule would be situations when you are after a specific number of rows, such as single row fetches and pagination queries, and the number of rows is relatively low. In these cases, the default query result method will perform faster.
Test Setup
Let’s take a look at an example. We’ll start by creating a new table and populate it with a lot of random rows. You can adjust the following script as needed, but I populate the test table with about 1.1 gigs of random data. Why 1.1 gigs? Because my Node.js test server only has 1 gig of memory. Hmm, that can’t be good! Check out SQL Developer if you need a tool to execute this script. It took a few minutes to run on my test database (a 1 gig XE server) so be prepared for that.
create table test_user_table(
id number,
name varchar2(100),
email varchar2(100),
location varchar2(100),
dob date
);
declare
l_target_size_gb number := 1.1;
l_current_size_gb number;
l_idx number := 0;
function get_table_size
return number
is
l_size_gb number;
begin
select bytes/1024/1024/1024 gb
into l_size_gb
from user_segments
where segment_type = 'TABLE'
and segment_name = 'TEST_USER_TABLE';
return l_size_gb;
end;
begin
insert into test_user_table
with test_data as (
select rownum as rn,
initcap(dbms_random.string('l', dbms_random.value(10, 25))) || ' ' ||
initcap(dbms_random.string('l', dbms_random.value(10, 30))) as name,
initcap(dbms_random.string('l', dbms_random.value(10, 50))) as loc,
to_date('01-01-2015', 'DD-MM-YYYY') + dbms_random.value(-36500, 0) dob
from dual connect by level <= 100000
)
select rn,
name,
replace(lower(name), ' ') || '@company.com',
loc,
dob
from test_data;
l_current_size_gb := get_table_size();
while l_current_size_gb < l_target_size_gb
loop
l_idx := l_idx + 1;
insert into test_user_table
select id + (l_idx * 100000),
name,
email,
location,
dob
from test_user_table
where id between 1 and 100000;
l_current_size_gb := get_table_size();
end loop;
commit;
end;
/
In my case, I ended up with 7.2 million rows — fun! Let’s see what happens when we try to ingest all that data with the default execute method. We’ll start by putting the database configuration info in a file named dbconfig.js so it can be reused.
module.exports = {
user: "hr",
password: "welcome",
connectString: "db-server/XE"
};
Using the Default Execute
Next, we’ll create a file named standard-fetch.js
to get the data.
var oracledb = require('oracledb');
var dbConfig = require(__dirname + '/dbconfig.js');
oracledb.getConnection(
dbConfig,
function(err, connection) {
if (err) throw err;
connection.execute(
'select * from test_user_table',
function(err, results) {
if (err) throw err;
console.log('Rows retrieved: ', results.rows.length);
//do work on the rows here
}
);
}
);
Then, we can test standard-fetch.js
in Node.js.
$ node standard-fetch.js
Rows retrieved: 100
Woohoo, it worked! Wait, only 100 rows? What happened? Oh yeah, there’s a maxRows
setting that defaults to 100. Hopefully, this doesn’t come as any surprise; this is the default behavior of the driver.
Let’s crank the maxRows
up a bit. I’ll try eight million just in case our table grows a little.
var oracledb = require('oracledb');
var dbConfig = require(__dirname + '/dbconfig.js');
oracledb.getConnection(
dbConfig,
function(err, connection) {
if (err) throw err;
connection.execute(
'select * from test_user_table',
{}, //no binds
{
maxRows: 8000000
},
function(err, results) {
if (err) throw err;
console.log('Rows retrieved: ', results.rows.length);
//do work on the rows here
}
);
}
);
And when we test standard-fetch2.js
in Node.js...
$ node standard-fetch2.js
Segmentation fault
Segmentation fault
? That’s not good! Depending on your configuration, you might just get killed, or it’s also possible that the execute
completes and you get all 7.2 million records. It all depends on the query, the system’s resources, load on the system, etc.
Using Result Sets
Clearly, moving this amount of data around at once isn’t reliable. Let’s see how we could do this with Result Sets instead! Here’s a solution I’ll save in a file named result-set.js
. I set the resultSet
property of the configuration object passed to the execute method to true
and then set up a recursive function to process the result set. Note that the maxRowsproperty
is ignored when using result sets.
var oracledb = require('oracledb');
var dbConfig = require(__dirname + '/dbconfig.js');
oracledb.getConnection(
dbConfig,
function(err, connection) {
if (err) throw err;
connection.execute(
'select * from test_user_table',
{}, //no binds
{
resultSet: true
},
function(err, results) {
var rowsProcessed = 0;
var startTime;
if (err) throw err;
startTime = Date.now();
function processResultSet() {
results.resultSet.getRow(function(err, row) {
if (err) throw err;
if (row) {
rowsProcessed += 1;
//do work on the row here
processResultSet(); //try to get another row from the result set
return; //exit recursive function prior to closing result set
}
console.log('Finish processing ' + rowsProcessed + ' rows');
console.log('Total time (in seconds):', ((Date.now() - startTime)/1000));
results.resultSet.close(function(err) {
if (err) console.error(err.message);
connection.release(function(err) {
if (err) console.error(err.message);
});
});
});
}
processResultSet();
}
);
}
);
Then we can run result-set.js in Node.js.
$node result-set.js
Finish processing 7200000 rows
Total time (in seconds): 735.622
Woot, it finished without errors! But 12 minutes... can’t we do better than that? Of course!
Tuning Result Sets
There are two things we can do to tune result sets:
- Adjust the row prefetching
- Use
getRows
instead ofgetRow
Row prefetching allows multiple rows to be returned from Oracle Database to Node.js in each network round-trip. We can control how many rows are prefetched via the prefetchRows
property of either the base driver class or the options object passed to the execute method of a connection. The default value is 100.
When rows are returned from the database they are queued in an internal buffer. We can retrieve those rows by invoking either the getRow
or getRows
methods of the ResultSet
class. The context switch from the JavaScript event loop to the driver’s C code is generally very fast, but it’s not free. The getRows
method can be used to retrieve many rows from the internal buffer at once, greatly reducing the number of context switches needed to get all the rows.
Let’s see what happens when we set prefetchRows
to 1,000 and use getRows
with a matching number (though they don’t have to match). I’ll put the following code in a file named result-set2.js
.
var oracledb = require('oracledb');
var dbConfig = require(__dirname + '/dbconfig.js');
oracledb.getConnection(
dbConfig,
function(err, connection) {
if (err) throw err;
connection.execute(
'select * from test_user_table',
{}, //no binds
{
resultSet: true,
prefetchRows: 1000
},
function(err, results) {
var rowsProcessed = 0;
var startTime;
if (err) throw err;
startTime = Date.now();
function processResultSet() {
results.resultSet.getRows(1000, function(err, rows) {
if (err) throw err;
if (rows.length) {
rows.forEach(function(row) {
rowsProcessed += 1;
//do work on the row here
});
processResultSet(); //try to get more rows from the result set
return; //exit recursive function prior to closing result set
}
console.log('Finish processing ' + rowsProcessed + ' rows');
console.log('Total time (in seconds):', ((Date.now() - startTime)/1000));
results.resultSet.close(function(err) {
if (err) console.error(err.message);
connection.release(function(err) {
if (err) console.error(err.message);
});
});
});
}
processResultSet();
}
);
}
);
And when we run it in Node.js…
$node result-set2.js
Finish processing 7200000 rows
Total time (in seconds): 99.069
Nice! With relatively minor changes to our code, we got the execution time down from 12.26 to 1.65 minutes. That’s over 6x faster — not too shabby! Of course, you’ll need to adjust these numbers in your environment to get the best balance between performance and resource utilization.
Published at DZone with permission of Dan McGhan, DZone MVB. See the original article here.
Opinions expressed by DZone contributors are their own.
Comments