From 8e8263c374f84fb9cbd6c3a0da7b7f17e6e06c72 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Thu, 9 May 2024 10:33:53 +0200
Subject: Full test for whether a data-id belongs to the BXD

---
 gn/data/strains.scm                           | 54 +++++++++++++++++++--------
 scripts/precompute/list-traits-to-compute.scm | 27 ++++----------
 2 files changed, 46 insertions(+), 35 deletions(-)

diff --git a/gn/data/strains.scm b/gn/data/strains.scm
index 4a251d4..7854cfe 100644
--- a/gn/data/strains.scm
+++ b/gn/data/strains.scm
@@ -1,20 +1,23 @@
 (define-module (gn data strains)
-  #:use-module (json)
-  #:use-module (ice-9 match)
-  #:use-module (ice-9 format)
-  #:use-module (ice-9 iconv)
-  #:use-module (ice-9 receive)
-  #:use-module (ice-9 string-fun)
-  ;; #:use-module (gn db sparql)
   #:use-module (dbi dbi)
+  #:use-module (gn cache memoize)
   #:use-module (gn data group)
   #:use-module (gn db mysql)
   #:use-module (gn util convert)
+  #:use-module (ice-9 format)
+  #:use-module (ice-9 iconv)
+  #:use-module (ice-9 match)
+  #:use-module (ice-9 receive)
+  #:use-module (ice-9 string-fun)
+  #:use-module (json)
   #:use-module (web gn-uri)
 
   #:export (
             strain-id-names
+            is-a-bxd?
+            has-bxd?
             bxd-strain-id-names
+            memo-bxd-strain-id-names
             ))
 
 (define* (strain-id-names inbred-set #:key (used-for-mapping? #t))
@@ -27,20 +30,41 @@ used-for-mapping? will say whether the strains/individuals are used for mapping.
    (lambda (db)
      (dbi-query db (string-append "SELECT StrainId,Strain.Name FROM Strain, StrainXRef WHERE StrainXRef.StrainId = Strain.Id AND StrainXRef.InbredSetId = " (int-to-string inbred-set)
                                   (if used-for-mapping?
-                                      ;; " AND Used_for_mapping='Y'"
+                                      " AND Used_for_mapping='Y'"
                                       "")
                                   " ORDER BY StrainId;"))
       (get-rows-apply db (lambda (r) `(,(assoc-ref r "StrainId") . ,(assoc-ref r "Name"))) '()))))
 
+
+
 (define* (bxd-strain-id-names #:key (used-for-mapping? #f))
   "Return assoc list of tuples of strain id + names. Same as strain-id-names, but just for the BXD
 
 used-for-mapping? will say whether the strains/individuals are used for mapping. Always True, FIXME"
   (filter (lambda (l) l)
-  (map (lambda (l)
-         (let [(id (car l))
-               (name (cdr l))]
-           (if (or (< id 42) (string-contains name "BXD"))
-               l
-               #f))
-         ) (strain-id-names 1 #:used-for-mapping? used-for-mapping?))))
+            (map (lambda (l)
+                   (let [(id (car l))
+                         (name (cdr l))]
+                     (if (or (< id 42) (string-contains name "BXD"))
+                         l
+                         #f))
+                   ) (strain-id-names 1 #:used-for-mapping? used-for-mapping?))))
+
+(define memo-bxd-strain-id-names
+  (memoize bxd-strain-id-names))
+
+(define (is-a-bxd? strain-id)
+  "Is a strain a member of the BXD? For speed we memoize tuples from the DB. Note that we check the DB list, as
+well as the name. This won't work for the parents. We do it this way because the GN table is wrong."
+  (match (assoc strain-id (memo-bxd-strain-id-names))
+    [(id . name) (string-contains name "BXD")]
+    ))
+
+(define (has-bxd? trait-values)
+  "Walk tuples of trait strain-id and value. Check we have at least one BXD. Stops at the first match"
+  (match trait-values
+    [(trait . rest) (match trait
+                      [(strain-id . value) (if (is-a-bxd? strain-id)
+                                               #t
+                                               (has-bxd? rest))])]
+    [() #f]))
diff --git a/scripts/precompute/list-traits-to-compute.scm b/scripts/precompute/list-traits-to-compute.scm
index 1fadb4a..e954b14 100755
--- a/scripts/precompute/list-traits-to-compute.scm
+++ b/scripts/precompute/list-traits-to-compute.scm
@@ -70,25 +70,11 @@ When that is the case we might as well write the phenotype file because we have
              (srfi srfi-1)
              )
 
-(define (is-bxd? trait-values)
-  (display "HEY")
-  (display trait-values)
-  (newline)
-  (match trait-values
-    [(trait . rest) (begin
-                      (display "PARSE")
-                      (display trait)
-                      (match trait
-                        [(strain . value) (if (= strain 5)
-                                              #f
-                                              (is-bxd? rest))])
-                      (newline))]
-    [() #f]))
 
 (call-with-db
  (lambda (db)
    (begin
-     (let [(bxd-strains (bxd-strain-id-names #:used-for-mapping? #t))]
+     (let [(bxd-strains (memo-bxd-strain-id-names #:used-for-mapping? #t))]
        (define (run-list-traits-to-compute db prev-id count)
          (let* [(hits (get-precompute-hits db prev-id count))
                 (data-ids (map (lambda (hit)
@@ -101,7 +87,7 @@ When that is the case we might as well write the phenotype file because we have
                 (data-ids-query (string-join data-str-ids " OR "))
                 (query (string-append "SELECT Id,StrainId,value FROM ProbeSetData WHERE " data-ids-query))
                 ]
-           (display query)
+           ;; (display query)
            (dbi-query db query)
            (let [(id-traits (get-rows db '()))
                  (nrecs '())]
@@ -114,15 +100,16 @@ When that is the case we might as well write the phenotype file because we have
                                    (acons strain-id value has-lst)
                                    '())
                                )]
-                      ;; (display lst)
                       (set! nrecs (assoc-set! nrecs data-id lst))))
                        id-traits)
              (for-each (lambda (r)
-                         (if (is-bxd? (cdr r))
-                             (display r)
+                         (if (has-bxd? (cdr r))
+                             (begin
+                               (display (car r))
+                               (newline))
                          )) nrecs)
              ; (display nrecs)
            )))
-       (run-list-traits-to-compute db 0 5) ;; start precompute
+       (run-list-traits-to-compute db 0 1000) ;; start precompute
        ;; (write bxd-strains)
        ))))
-- 
cgit v1.2.3