# Load Python3 environment with GN2 utilities:
#
# source /usr/local/guix-profiles/gn-latest-20210512/etc/profile
#
# and run
#
# python load_phenotypes.py [args...]
import sys
import csv
import utilities
import datastructure
def main(argv):
# config
config = utilities.get_config(argv[1])
print("config:")
for item in config.items('config'):
print("\t%s" % (str(item)))
# var
inbredsetid = config.get('config', 'inbredsetid')
print("inbredsetid: %s" % inbredsetid)
species = datastructure.get_species(inbredsetid)
speciesid = species[0]
print("speciesid: %s" % speciesid)
dataid = datastructure.get_nextdataid_phenotype()
print("next data id: %s" % dataid)
cursor, con = utilities.get_cursor()
# datafile
datafile = open(config.get('config', 'datafile'), 'r')
phenotypedata = csv.reader(datafile, delimiter='\t', quotechar='"')
phenotypedata_head = phenotypedata.next()
print("phenotypedata head:\n\t%s" % phenotypedata_head)
strainnames = phenotypedata_head[1:]
strains = datastructure.get_strains_bynames(inbredsetid=inbredsetid, strainnames=strainnames, updatestrainxref="yes")
# metafile
metafile = open(config.get('config', 'metafile'), 'r')
phenotypemeta = csv.reader(metafile, delimiter='\t', quotechar='"')
phenotypemeta_head = phenotypemeta.next()
print("phenotypemeta head:\n\t%s" % phenotypemeta_head)
# load
for metarow in phenotypemeta:
#
datarow_value = phenotypedata.next()
datarow_se = phenotypedata.next()
datarow_n = phenotypedata.next()
# Phenotype
sql = """
INSERT INTO Phenotype
SET
Phenotype.`Pre_publication_description`=%s,
Phenotype.`Post_publication_description`=%s,
Phenotype.`Original_description`=%s,
Phenotype.`Pre_publication_abbreviation`=%s,
Phenotype.`Post_publication_abbreviation`=%s,
Phenotype.`Lab_code`=%s,
Phenotype.`Submitter`=%s,
Phenotype.`Owner`=%s,
Phenotype.`Authorized_Users`=%s,
Phenotype.`Units`=%s
"""
cursor.execute(sql, (
utilities.to_db_string(metarow[1], None),
utilities.to_db_string(metarow[2], None),
utilities.to_db_string(metarow[3], None),
utilities.to_db_string(metarow[4], None),
utilities.to_db_string(metarow[5], None),
utilities.to_db_string(metarow[6], None),
utilities.to_db_string(metarow[7], None),
utilities.to_db_string(metarow[8], None),
utilities.to_db_string(metarow[9], ""),
utilities.to_db_string(metarow[18], ""),
))
rowcount = cursor.rowcount
phenotypeid = con.insert_id()
print("INSERT INTO Phenotype: %d record: %d" % (rowcount, phenotypeid))
# Publication
publicationid = None # reset
pubmed_id = utilities.to_db_string(metarow[0], None)
if pubmed_id:
sql = """
SELECT Publication.`Id`
FROM Publication
WHERE Publication.`PubMed_ID`=%s
"""
cursor.execute(sql, (pubmed_id))
re = cursor.fetchone()
if re:
publicationid = re[0]
print("get Publication record: %d" % publicationid)
if not publicationid:
sql = """
INSERT INTO Publication
SET
Publication.`PubMed_ID`=%s,
Publication.`Abstract`=%s,
Publication.`Authors`=%s,
Publication.`Title`=%s,
Publication.`Journal`=%s,
Publication.`Volume`=%s,
Publication.`Pages`=%s,
Publication.`Month`=%s,
Publication.`Year`=%s
"""
cursor.execute(sql, (
utilities.to_db_string(metarow[0], None),
utilities.to_db_string(metarow[12], None),
utilities.to_db_string(metarow[10], ""),
utilities.to_db_string(metarow[11], None),
utilities.to_db_string(metarow[13], None),
utilities.to_db_string(metarow[14], None),
utilities.to_db_string(metarow[15], None),
utilities.to_db_string(metarow[16], None),
utilities.to_db_string(metarow[17], ""),
))
rowcount = cursor.rowcount
publicationid = con.insert_id()
print("INSERT INTO Publication: %d record: %d" % (rowcount, publicationid))
# data
for index, strain in enumerate(strains):
#
strainid = strain[0]
value = utilities.to_db_float(datarow_value[index+1], None)
se = utilities.to_db_float(datarow_se[index+1], None)
n = utilities.to_db_int(datarow_n[index+1], None)
#
if value is not None:
sql = """
INSERT INTO PublishData
SET
PublishData.`Id`=%s,
PublishData.`StrainId`=%s,
PublishData.`value`=%s
"""
cursor.execute(sql, (dataid, strainid, value))
if se is not None:
sql = """
INSERT INTO PublishSE
SET
PublishSE.`DataId`=%s,
PublishSE.`StrainId`=%s,
PublishSE.`error`=%s
"""
cursor.execute(sql, (dataid, strainid, se))
if n is not None:
sql = """
INSERT INTO NStrain
SET
NStrain.`DataId`=%s,
NStrain.`StrainId`=%s,
NStrain.`count`=%s
"""
cursor.execute(sql, (dataid, strainid, n))
# PublishXRef
sql = """
INSERT INTO PublishXRef
SET
PublishXRef.`InbredSetId`=%s,
PublishXRef.`PhenotypeId`=%s,
PublishXRef.`PublicationId`=%s,
PublishXRef.`DataId`=%s,
PublishXRef.`comments`=%s
"""
cursor.execute(sql, (inbredsetid, phenotypeid, publicationid, dataid, ""))
rowcount = cursor.rowcount
publishxrefid = con.insert_id()
print("INSERT INTO PublishXRef: %d record: %d" % (rowcount, publishxrefid))
# for loop next
dataid += 1
print
# release
con.close()
if __name__ == "__main__":
print("command line arguments:\n\t%s" % sys.argv)
main(sys.argv)
print("exit successfully")