Comparación del tiempo de lectura de RocksDb, Speedb, Lmdb

Preparación

Utilizaré el conjunto de datos y la base de datos de mi artículo anterior Comparación de almacenamiento de clave-valor embebido para python

python

import os
import json
import random
import string
from datetime import datetime

workspace_dir = '/tmp/data'
output_dir = f'{workspace_dir}/jsons'
os.makedirs(output_dir, exist_ok=True)

def generate_random_string(length):
    return ''.join(random.choices(string.ascii_letters + string.digits + string.punctuation + ' ', k=length))

for i in range(1000000):
    data = {
        'text': generate_random_string(random.randint(0, 2000)),
        'created_at': int(datetime.now().timestamp())
    }
    filename = os.path.join(output_dir, f'{i}.json')
    with open(filename, 'w') as json_file:
        json.dump(data, json_file)

Iterador para archivos:

python

import os
import json

def json_file_reader(directory):
    json_files = [filename for filename in os.listdir(directory) if filename.endswith('.json')]
    sorted_files = sorted(json_files, key=lambda x: int(os.path.splitext(x)[0]))
    for filename in sorted_files:
        file_path = os.path.join(directory, filename)
        if os.path.isfile(file_path):
            with open(file_path, 'r', encoding='utf-8') as json_file:
                yield int(os.path.splitext(filename)[0]), json_file.read()

Medición decorador:

python

import time
from functools import wraps

def measure(attempts=1000):
    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            execution_times = []
            for _ in range(attempts):
                start_time = time.time()  # Registrar el tiempo de inicio
                result = func(*args, **kwargs)  # Llamar a la función original
                end_time = time.time()  # Registrar el tiempo de finalización
                execution_time = (end_time - start_time) * 1000  # Convertir a milisegundos
                execution_times.append(execution_time)

            min_time = min(execution_times)
            max_time = max(execution_times)
            avg_time = sum(execution_times) / attempts

            print(f"Tiempo de ejecución de {func.__name__} sobre {attempts} intentos:")
            print(f"Mínimo: {min_time:.2f} ms")
            print(f"Máximo: {max_time:.2f} ms")
            print(f"Promedio: {avg_time:.2f} ms")

            return result
        return wrapper
    return decorator

Lmdb

python

import lmdb

# creando la base de datos
lmdb_dir = f'{workspace_dir}/lmdb'
# reservemos 100 Gb
with lmdb.open(lmdb_dir, 10 ** 11) as env:
    with env.begin(write=True) as txn:
        for i, data in json_file_reader(output_dir):
            txn.put(i.to_bytes(4, 'big'), data.encode())

# lectura
@measure(attempts=1000)
def read_lmdb():
    with lmdb.open(lmdb_dir) as env:
        with env.begin() as txn:
            return txn.get(int(123456).to_bytes(4, 'big')).decode()

read_lmdb()

RocksDb

python

from rocksdict import Rdict

# creando la base de datos
rocksdict_dir = f'{workspace_dir}/rocksdb'
with Rdict(rocksdict_dir) as db:
    for i, data in json_file_reader(output_dir):
        db[i] = data

# lectura
@measure(attempts=1000)
def read_rockdb():
    with Rdict(rocksdict_dir) as db:
        return db[123456]

read_rockdb()

Vamos a probar un poco con diferentes parámetros:

RocksDb en modo RAW:

python

from rocksdict import Rdict, Options

# creando la base de datos
rocksdict_raw_dir = f'{workspace_dir}/rocksdb_raw'
with Rdict(rocksdict_raw_dir, options=Options(raw_mode=True)) as db:
    for i, data in json_file_reader(output_dir):
        db[i.to_bytes(4, 'big')] = data.encode()

# prueba
@measure(attempts=1000)
def read_rockdb_raw():
    with Rdict(rocksdict_raw_dir, options=Options(raw_mode=True)) as db:
        return db[int(123456).to_bytes(4, 'big')].decode()

read_rockdb_raw()

RocksDb con compresión:

python

from rocksdict import Rdict, Options, DBCompressionType


# creando la base de datos
def db_options():
    opt = Options()
    opt.set_compression_type(DBCompressionType.zstd())
    return opt


rocksdict_zstd_dir = f'{workspace_dir}/rocksdb_zstd'
with Rdict(rocksdict_zstd_dir, options=db_options()) as db:
    for i, data in json_file_reader(output_dir):
        db[i] = data


# prueba
@measure(attempts=1000)
def read_rockdb_zstd():
    with Rdict(rocksdict_zstd_dir) as db:
        return db[123456]
read_rockdb_zstd()

RocksDb en modo de solo lectura:

python

from rocksdict import Rdict, AccessType


@measure(attempts=1000)
def read_rockdb():
    with Rdict(rocksdict_dir, access_type=AccessType.read_only()) as db:
        return db[123456]
read_rockdb()

rocksdict_dir_secondary = f'{workspace_dir}/rocksdb_secondary'

@measure(attempts=1000)
def read_rockdb():
    with Rdict(rocksdict_dir, access_type=AccessType.secondary(rocksdict_dir_secondary)) as db:
        return db[123456]
read_rockdb()

Speedb

Para probar speedb solo necesitamos cambiar la importación de rocksdict a speedict

Resultado

nombre	espacio ocupado	tiempo de escritura	lectura mín	lectura prom	lectura máx
lmdb	1.1G	1m 26s	0.17ms	0.31ms	3.68ms
rocksdb	1.0G	1m 29s	2.85ms	6.32ms	30.85ms
rocksdb (raw)	1.0G	1m 36s	4.04ms	10.03ms	37.07ms
rocksdb (zstd)	878M	1m 32s	3.52ms	6.85ms	18.18ms
rocksdb (solo lectura)	1.0G	-	2.41ms	3.65ms	10.68ms
rocksdb (secundario)	1.0G	-	4.71ms	5.47ms	31.92ms
speedb	1.1G	1m 31s	4.23ms	10.44ms	101.32ms
speedb (raw)	1.1G	1m 36s	4.45ms	15.70ms	55.91ms
speedb (zstd)	876M	1m 36s	4.22ms	13.49ms	87.59ms
speedb (solo lectura)	1.1G	-	3.95ms	5.19ms	16.68ms
speedb (secundario)	1.1G	-	-	-	-

Speedb no soporta modo secundario
Estoy sorprendido de que speedb tenga una latencia peor que rocksdb, especialmente en la columna máxima
Es extraño que rocksdb en modo raw funcione con peor latencia

Resumen

Lmdb es mucho más rápido según mis resultados: ¡más de 10x!

Comparación del tiempo de lectura de RocksDb, Speedb, Lmdb ​

Preparación ​

Lmdb ​

RocksDb ​

RocksDb en modo RAW: ​

RocksDb con compresión: ​

RocksDb en modo de solo lectura: ​

Speedb ​

Resultado ​

Resumen ​

Comparación del tiempo de lectura de RocksDb, Speedb, Lmdb

Preparación

Lmdb

RocksDb

RocksDb en modo RAW:

RocksDb con compresión:

RocksDb en modo de solo lectura:

Speedb

Resultado

Resumen