From 8e8263c374f84fb9cbd6c3a0da7b7f17e6e06c72 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 9 May 2024 10:33:53 +0200 Subject: Full test for whether a data-id belongs to the BXD --- gn/data/strains.scm | 54 +++++++++++++++++++-------- scripts/precompute/list-traits-to-compute.scm | 27 ++++---------- 2 files changed, 46 insertions(+), 35 deletions(-) diff --git a/gn/data/strains.scm b/gn/data/strains.scm index 4a251d4..7854cfe 100644 --- a/gn/data/strains.scm +++ b/gn/data/strains.scm @@ -1,20 +1,23 @@ (define-module (gn data strains) - #:use-module (json) - #:use-module (ice-9 match) - #:use-module (ice-9 format) - #:use-module (ice-9 iconv) - #:use-module (ice-9 receive) - #:use-module (ice-9 string-fun) - ;; #:use-module (gn db sparql) #:use-module (dbi dbi) + #:use-module (gn cache memoize) #:use-module (gn data group) #:use-module (gn db mysql) #:use-module (gn util convert) + #:use-module (ice-9 format) + #:use-module (ice-9 iconv) + #:use-module (ice-9 match) + #:use-module (ice-9 receive) + #:use-module (ice-9 string-fun) + #:use-module (json) #:use-module (web gn-uri) #:export ( strain-id-names + is-a-bxd? + has-bxd? bxd-strain-id-names + memo-bxd-strain-id-names )) (define* (strain-id-names inbred-set #:key (used-for-mapping? #t)) @@ -27,20 +30,41 @@ used-for-mapping? will say whether the strains/individuals are used for mapping. (lambda (db) (dbi-query db (string-append "SELECT StrainId,Strain.Name FROM Strain, StrainXRef WHERE StrainXRef.StrainId = Strain.Id AND StrainXRef.InbredSetId = " (int-to-string inbred-set) (if used-for-mapping? - ;; " AND Used_for_mapping='Y'" + " AND Used_for_mapping='Y'" "") " ORDER BY StrainId;")) (get-rows-apply db (lambda (r) `(,(assoc-ref r "StrainId") . ,(assoc-ref r "Name"))) '())))) + + (define* (bxd-strain-id-names #:key (used-for-mapping? #f)) "Return assoc list of tuples of strain id + names. Same as strain-id-names, but just for the BXD used-for-mapping? will say whether the strains/individuals are used for mapping. Always True, FIXME" (filter (lambda (l) l) - (map (lambda (l) - (let [(id (car l)) - (name (cdr l))] - (if (or (< id 42) (string-contains name "BXD")) - l - #f)) - ) (strain-id-names 1 #:used-for-mapping? used-for-mapping?)))) + (map (lambda (l) + (let [(id (car l)) + (name (cdr l))] + (if (or (< id 42) (string-contains name "BXD")) + l + #f)) + ) (strain-id-names 1 #:used-for-mapping? used-for-mapping?)))) + +(define memo-bxd-strain-id-names + (memoize bxd-strain-id-names)) + +(define (is-a-bxd? strain-id) + "Is a strain a member of the BXD? For speed we memoize tuples from the DB. Note that we check the DB list, as +well as the name. This won't work for the parents. We do it this way because the GN table is wrong." + (match (assoc strain-id (memo-bxd-strain-id-names)) + [(id . name) (string-contains name "BXD")] + )) + +(define (has-bxd? trait-values) + "Walk tuples of trait strain-id and value. Check we have at least one BXD. Stops at the first match" + (match trait-values + [(trait . rest) (match trait + [(strain-id . value) (if (is-a-bxd? strain-id) + #t + (has-bxd? rest))])] + [() #f])) diff --git a/scripts/precompute/list-traits-to-compute.scm b/scripts/precompute/list-traits-to-compute.scm index 1fadb4a..e954b14 100755 --- a/scripts/precompute/list-traits-to-compute.scm +++ b/scripts/precompute/list-traits-to-compute.scm @@ -70,25 +70,11 @@ When that is the case we might as well write the phenotype file because we have (srfi srfi-1) ) -(define (is-bxd? trait-values) - (display "HEY") - (display trait-values) - (newline) - (match trait-values - [(trait . rest) (begin - (display "PARSE") - (display trait) - (match trait - [(strain . value) (if (= strain 5) - #f - (is-bxd? rest))]) - (newline))] - [() #f])) (call-with-db (lambda (db) (begin - (let [(bxd-strains (bxd-strain-id-names #:used-for-mapping? #t))] + (let [(bxd-strains (memo-bxd-strain-id-names #:used-for-mapping? #t))] (define (run-list-traits-to-compute db prev-id count) (let* [(hits (get-precompute-hits db prev-id count)) (data-ids (map (lambda (hit) @@ -101,7 +87,7 @@ When that is the case we might as well write the phenotype file because we have (data-ids-query (string-join data-str-ids " OR ")) (query (string-append "SELECT Id,StrainId,value FROM ProbeSetData WHERE " data-ids-query)) ] - (display query) + ;; (display query) (dbi-query db query) (let [(id-traits (get-rows db '())) (nrecs '())] @@ -114,15 +100,16 @@ When that is the case we might as well write the phenotype file because we have (acons strain-id value has-lst) '()) )] - ;; (display lst) (set! nrecs (assoc-set! nrecs data-id lst)))) id-traits) (for-each (lambda (r) - (if (is-bxd? (cdr r)) - (display r) + (if (has-bxd? (cdr r)) + (begin + (display (car r)) + (newline)) )) nrecs) ; (display nrecs) ))) - (run-list-traits-to-compute db 0 5) ;; start precompute + (run-list-traits-to-compute db 0 1000) ;; start precompute ;; (write bxd-strains) )))) -- cgit v1.2.3