aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-07-15 10:06:00 -0500
committerFrederick Muriuki Muriithi2024-07-15 10:06:00 -0500
commit501a514e8f36c4cbb7830ceeda85c45667a9c277 (patch)
tree8ee2a7e2f578b1ba71e4fe35c77f0e36db435180 /scripts
parent3f38250b4b3803c85a4f85899accf9eca562bac6 (diff)
downloadgn-auth-501a514e8f36c4cbb7830ceeda85c45667a9c277.tar.gz
Make only non-human data public automatically
Ensure that the scripted process of making data automatically public only works for data that is non-human - this is because human data has legal issues with privacy, security and the like. Human data is made public only via a manual process where (a) human(s) get to make a judgement call.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/migrate_existing_data.py37
1 files changed, 20 insertions, 17 deletions
diff --git a/scripts/migrate_existing_data.py b/scripts/migrate_existing_data.py
index 1261462..198d37d 100644
--- a/scripts/migrate_existing_data.py
+++ b/scripts/migrate_existing_data.py
@@ -210,11 +210,12 @@ def __unassigned_mrna__(bioconn, assigned):
"FROM Species AS s INNER JOIN InbredSet AS iset "
"ON s.SpeciesId=iset.SpeciesId INNER JOIN ProbeFreeze AS pf "
"ON iset.InbredSetId=pf.InbredSetId INNER JOIN ProbeSetFreeze AS psf "
- "ON pf.ProbeFreezeId=psf.ProbeFreezeId ")
+ "ON pf.ProbeFreezeId=psf.ProbeFreezeId "
+ "WHERE s.Name != 'human' ")
if len(assigned) > 0:
paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(assigned))
query = query + (
- "WHERE (s.SpeciesId, iset.InbredSetId, pf.ProbeFreezeId, psf.Id) "
+ "AND (s.SpeciesId, iset.InbredSetId, pf.ProbeFreezeId, psf.Id) "
f"NOT IN ({paramstr}) ")
query = query + "LIMIT 100000"
@@ -270,11 +271,12 @@ def __unassigned_geno__(bioconn, assigned):
"gf.ShortName AS dataset_shortname "
"FROM Species AS s INNER JOIN InbredSet AS iset "
"ON s.SpeciesId=iset.SpeciesId INNER JOIN GenoFreeze AS gf "
- "ON iset.InbredSetId=gf.InbredSetId ")
+ "ON iset.InbredSetId=gf.InbredSetId "
+ "WHERE s.Name != 'human' ")
if len(assigned) > 0:
paramstr = ", ".join(["(%s, %s, %s)"] * len(assigned))
query = query + (
- "WHERE (s.SpeciesId, iset.InbredSetId, gf.Id) "
+ "AND (s.SpeciesId, iset.InbredSetId, gf.Id) "
f"NOT IN ({paramstr}) ")
query = query + "LIMIT 100000"
@@ -326,22 +328,23 @@ def __assigned_pheno__(authconn):
def __unassigned_pheno__(bioconn, assigned):
"""Retrieve all unassigned Phenotype data."""
query = (
- "SELECT spc.SpeciesId, iset.InbredSetId, "
- "pf.Id AS PublishFreezeId, pf.Name AS dataset_name, "
- "pf.FullName AS dataset_fullname, "
- "pf.ShortName AS dataset_shortname, pxr.Id AS PublishXRefId "
- "FROM "
- "Species AS spc "
- "INNER JOIN InbredSet AS iset "
- "ON spc.SpeciesId=iset.SpeciesId "
- "INNER JOIN PublishFreeze AS pf "
- "ON iset.InbredSetId=pf.InbredSetId "
- "INNER JOIN PublishXRef AS pxr "
- "ON pf.InbredSetId=pxr.InbredSetId ")
+ "SELECT spc.SpeciesId, iset.InbredSetId, "
+ "pf.Id AS PublishFreezeId, pf.Name AS dataset_name, "
+ "pf.FullName AS dataset_fullname, "
+ "pf.ShortName AS dataset_shortname, pxr.Id AS PublishXRefId "
+ "FROM "
+ "Species AS spc "
+ "INNER JOIN InbredSet AS iset "
+ "ON spc.SpeciesId=iset.SpeciesId "
+ "INNER JOIN PublishFreeze AS pf "
+ "ON iset.InbredSetId=pf.InbredSetId "
+ "INNER JOIN PublishXRef AS pxr "
+ "ON pf.InbredSetId=pxr.InbredSetId "
+ "WHERE spc.Name != 'human' ")
if len(assigned) > 0:
paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(assigned))
query = query + (
- "WHERE (spc.SpeciesId, iset.InbredSetId, pf.Id, pxr.Id) "
+ "AND (spc.SpeciesId, iset.InbredSetId, pf.Id, pxr.Id) "
f"NOT IN ({paramstr}) ")
query = query + "LIMIT 100000"