From a169ac523d166c6cbba918b152a76782176c3e88 Mon Sep 17 00:00:00 2001 From: Joshua Watt Date: Fri, 6 Oct 2023 09:36:44 -0600 Subject: hashserv: Add API to clean unused entries Adds an API to remove unused entries in the outhash database based on age and if they are referenced by any unihash Signed-off-by: Joshua Watt Signed-off-by: Richard Purdie --- lib/hashserv/client.py | 5 +++++ lib/hashserv/server.py | 20 +++++++++++++++++++- lib/hashserv/tests.py | 19 +++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/lib/hashserv/client.py b/lib/hashserv/client.py index eeafeabda..f676d267f 100644 --- a/lib/hashserv/client.py +++ b/lib/hashserv/client.py @@ -105,6 +105,10 @@ class AsyncClient(bb.asyncrpc.AsyncClient): await self._set_mode(self.MODE_NORMAL) return await self.send_message({"remove": {"where": where}}) + async def clean_unused(self, max_age): + await self._set_mode(self.MODE_NORMAL) + return await self.send_message({"clean-unused": {"max_age_seconds": max_age}}) + class Client(bb.asyncrpc.Client): def __init__(self): @@ -120,6 +124,7 @@ class Client(bb.asyncrpc.Client): "reset_stats", "backfill_wait", "remove", + "clean_unused", ) def _get_async_client(self): diff --git a/lib/hashserv/server.py b/lib/hashserv/server.py index d52e1d46d..45bf476bf 100644 --- a/lib/hashserv/server.py +++ b/lib/hashserv/server.py @@ -4,7 +4,7 @@ # from contextlib import closing, contextmanager -from datetime import datetime +from datetime import datetime, timedelta import enum import asyncio import logging @@ -187,6 +187,7 @@ class ServerClient(bb.asyncrpc.AsyncServerConnection): 'reset-stats': self.handle_reset_stats, 'backfill-wait': self.handle_backfill_wait, 'remove': self.handle_remove, + 'clean-unused': self.handle_clean_unused, }) def validate_proto_version(self): @@ -542,6 +543,23 @@ class ServerClient(bb.asyncrpc.AsyncServerConnection): self.write_message({"count": count}) + async def handle_clean_unused(self, request): + max_age = request["max_age_seconds"] + with closing(self.db.cursor()) as cursor: + cursor.execute( + """ + DELETE FROM outhashes_v2 WHERE created<:oldest AND NOT EXISTS ( + SELECT unihashes_v2.id FROM unihashes_v2 WHERE unihashes_v2.method=outhashes_v2.method AND unihashes_v2.taskhash=outhashes_v2.taskhash LIMIT 1 + ) + """, + { + "oldest": datetime.now() - timedelta(seconds=-max_age) + } + ) + count = cursor.rowcount + + self.write_message({"count": count}) + def query_equivalent(self, cursor, method, taskhash): # This is part of the inner loop and must be as fast as possible cursor.execute( diff --git a/lib/hashserv/tests.py b/lib/hashserv/tests.py index a3e066406..f343c586b 100644 --- a/lib/hashserv/tests.py +++ b/lib/hashserv/tests.py @@ -158,6 +158,25 @@ class HashEquivalenceCommonTests(object): result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash) self.assertIsNone(result_outhash) + def test_clean_unused(self): + taskhash, outhash, unihash = self.test_create_hash() + + # Clean the database, which should not remove anything because all hashes an in-use + result = self.client.clean_unused(0) + self.assertEqual(result["count"], 0) + self.assertClientGetHash(self.client, taskhash, unihash) + + # Remove the unihash. The row in the outhash table should still be present + self.client.remove({"unihash": unihash}) + result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash, False) + self.assertIsNotNone(result_outhash) + + # Now clean with no minimum age which will remove the outhash + result = self.client.clean_unused(0) + self.assertEqual(result["count"], 1) + result_outhash = self.client.get_outhash(self.METHOD, outhash, taskhash, False) + self.assertIsNone(result_outhash) + def test_huge_message(self): # Simple test that hashes can be created taskhash = 'c665584ee6817aa99edfc77a44dd853828279370' -- cgit 1.2.3-korg