aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2024-07-03 15:15:37 +0300
committerBonfaceKilz2024-07-03 16:53:31 +0300
commit4fd5710dc04b51c0953a2063d9e934dddc13ad6c (patch)
tree914a30a1f58c2da07eec24b6b0571faff7e5fc62
parent1dc6d62ec211b7ce1373bc4a0521037886523058 (diff)
downloadgenenetwork3-4fd5710dc04b51c0953a2063d9e934dddc13ad6c.tar.gz
Generate a checksum for all the ttl files.
* scripts/index-genenetwork (hash_generif_graph): Rename to hash_rdf_graph. Generate a checksum of all the turtle files inside the ttl directory that's the basis for the GN virtuoso graph. (create_xapian_index): Rename hash_generif_graph -> hash_rdf_graph. Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-xscripts/index-genenetwork14
1 files changed, 8 insertions, 6 deletions
diff --git a/scripts/index-genenetwork b/scripts/index-genenetwork
index 219858c..de3edb0 100755
--- a/scripts/index-genenetwork
+++ b/scripts/index-genenetwork
@@ -240,10 +240,12 @@ def build_rdf_cache(sparql_uri: str, query: str, remove_common_words: bool = Fal
return smaller_cache
-def hash_generif_graph(generif_file: pathlib.Path) -> str:
- with open(generif_file, encoding="utf-8") as f_:
- data = f_.read()
- return hashlib.md5(data.encode()).hexdigest()
+def hash_rdf_graph(ttl_dir: pathlib.Path) -> str:
+ ttl_hash = hashlib.new("md5")
+ for ttl_file in ttl_dir.glob("*.ttl"):
+ with open(ttl_file, encoding="utf-8") as f_:
+ ttl_hash.update(f_.read().encode())
+ return ttl_hash.hexdigest()
# pylint: disable=invalid-name
@@ -528,7 +530,7 @@ def is_data_modified(xapian_directory: str,
generif = pathlib.Path("/var/lib/data/generif-metadata.ttl")
generif_checksum = "-1"
if generif.exists():
- generif_checksum = hash_generif_graph(generif)
+ generif_checksum = hash_rdf_graph(generif)
if (db.get_metadata("generif-checksum").decode() == generif_checksum and
db.get_metadata("checksums").decode() == checksums):
sys.exit(1)
@@ -586,7 +588,7 @@ def create_xapian_index(xapian_directory: str, sql_uri: str,
logging.info("Writing generif checksums into index")
generif = pathlib.Path("/var/lib/data/generif-metadata.ttl")
if generif.exists():
- db.set_metadata("generif-checksum", hash_generif_graph(generif).encode())
+ db.set_metadata("generif-checksum", hash_rdf_graph(generif).encode())
for child in combined_index.iterdir():
shutil.move(child, xapian_directory)
logging.info("Index built")