aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPjotr Prins2024-05-09 10:33:53 +0200
committerPjotr Prins2024-05-09 10:33:53 +0200
commit8e8263c374f84fb9cbd6c3a0da7b7f17e6e06c72 (patch)
treebe3058f7185311cc867f4a371fa41b741f967edd
parentbac78b1607851096cdf48c46bbd1e5dd9c31fe96 (diff)
downloadgn-guile-8e8263c374f84fb9cbd6c3a0da7b7f17e6e06c72.tar.gz
Full test for whether a data-id belongs to the BXD
-rw-r--r--gn/data/strains.scm54
-rwxr-xr-xscripts/precompute/list-traits-to-compute.scm27
2 files changed, 46 insertions, 35 deletions
diff --git a/gn/data/strains.scm b/gn/data/strains.scm
index 4a251d4..7854cfe 100644
--- a/gn/data/strains.scm
+++ b/gn/data/strains.scm
@@ -1,20 +1,23 @@
(define-module (gn data strains)
- #:use-module (json)
- #:use-module (ice-9 match)
- #:use-module (ice-9 format)
- #:use-module (ice-9 iconv)
- #:use-module (ice-9 receive)
- #:use-module (ice-9 string-fun)
- ;; #:use-module (gn db sparql)
#:use-module (dbi dbi)
+ #:use-module (gn cache memoize)
#:use-module (gn data group)
#:use-module (gn db mysql)
#:use-module (gn util convert)
+ #:use-module (ice-9 format)
+ #:use-module (ice-9 iconv)
+ #:use-module (ice-9 match)
+ #:use-module (ice-9 receive)
+ #:use-module (ice-9 string-fun)
+ #:use-module (json)
#:use-module (web gn-uri)
#:export (
strain-id-names
+ is-a-bxd?
+ has-bxd?
bxd-strain-id-names
+ memo-bxd-strain-id-names
))
(define* (strain-id-names inbred-set #:key (used-for-mapping? #t))
@@ -27,20 +30,41 @@ used-for-mapping? will say whether the strains/individuals are used for mapping.
(lambda (db)
(dbi-query db (string-append "SELECT StrainId,Strain.Name FROM Strain, StrainXRef WHERE StrainXRef.StrainId = Strain.Id AND StrainXRef.InbredSetId = " (int-to-string inbred-set)
(if used-for-mapping?
- ;; " AND Used_for_mapping='Y'"
+ " AND Used_for_mapping='Y'"
"")
" ORDER BY StrainId;"))
(get-rows-apply db (lambda (r) `(,(assoc-ref r "StrainId") . ,(assoc-ref r "Name"))) '()))))
+
+
(define* (bxd-strain-id-names #:key (used-for-mapping? #f))
"Return assoc list of tuples of strain id + names. Same as strain-id-names, but just for the BXD
used-for-mapping? will say whether the strains/individuals are used for mapping. Always True, FIXME"
(filter (lambda (l) l)
- (map (lambda (l)
- (let [(id (car l))
- (name (cdr l))]
- (if (or (< id 42) (string-contains name "BXD"))
- l
- #f))
- ) (strain-id-names 1 #:used-for-mapping? used-for-mapping?))))
+ (map (lambda (l)
+ (let [(id (car l))
+ (name (cdr l))]
+ (if (or (< id 42) (string-contains name "BXD"))
+ l
+ #f))
+ ) (strain-id-names 1 #:used-for-mapping? used-for-mapping?))))
+
+(define memo-bxd-strain-id-names
+ (memoize bxd-strain-id-names))
+
+(define (is-a-bxd? strain-id)
+ "Is a strain a member of the BXD? For speed we memoize tuples from the DB. Note that we check the DB list, as
+well as the name. This won't work for the parents. We do it this way because the GN table is wrong."
+ (match (assoc strain-id (memo-bxd-strain-id-names))
+ [(id . name) (string-contains name "BXD")]
+ ))
+
+(define (has-bxd? trait-values)
+ "Walk tuples of trait strain-id and value. Check we have at least one BXD. Stops at the first match"
+ (match trait-values
+ [(trait . rest) (match trait
+ [(strain-id . value) (if (is-a-bxd? strain-id)
+ #t
+ (has-bxd? rest))])]
+ [() #f]))
diff --git a/scripts/precompute/list-traits-to-compute.scm b/scripts/precompute/list-traits-to-compute.scm
index 1fadb4a..e954b14 100755
--- a/scripts/precompute/list-traits-to-compute.scm
+++ b/scripts/precompute/list-traits-to-compute.scm
@@ -70,25 +70,11 @@ When that is the case we might as well write the phenotype file because we have
(srfi srfi-1)
)
-(define (is-bxd? trait-values)
- (display "HEY")
- (display trait-values)
- (newline)
- (match trait-values
- [(trait . rest) (begin
- (display "PARSE")
- (display trait)
- (match trait
- [(strain . value) (if (= strain 5)
- #f
- (is-bxd? rest))])
- (newline))]
- [() #f]))
(call-with-db
(lambda (db)
(begin
- (let [(bxd-strains (bxd-strain-id-names #:used-for-mapping? #t))]
+ (let [(bxd-strains (memo-bxd-strain-id-names #:used-for-mapping? #t))]
(define (run-list-traits-to-compute db prev-id count)
(let* [(hits (get-precompute-hits db prev-id count))
(data-ids (map (lambda (hit)
@@ -101,7 +87,7 @@ When that is the case we might as well write the phenotype file because we have
(data-ids-query (string-join data-str-ids " OR "))
(query (string-append "SELECT Id,StrainId,value FROM ProbeSetData WHERE " data-ids-query))
]
- (display query)
+ ;; (display query)
(dbi-query db query)
(let [(id-traits (get-rows db '()))
(nrecs '())]
@@ -114,15 +100,16 @@ When that is the case we might as well write the phenotype file because we have
(acons strain-id value has-lst)
'())
)]
- ;; (display lst)
(set! nrecs (assoc-set! nrecs data-id lst))))
id-traits)
(for-each (lambda (r)
- (if (is-bxd? (cdr r))
- (display r)
+ (if (has-bxd? (cdr r))
+ (begin
+ (display (car r))
+ (newline))
)) nrecs)
; (display nrecs)
)))
- (run-list-traits-to-compute db 0 5) ;; start precompute
+ (run-list-traits-to-compute db 0 1000) ;; start precompute
;; (write bxd-strains)
))))