diff options
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/insert_data.py | 20 |
1 files changed, 13 insertions, 7 deletions
diff --git a/scripts/insert_data.py b/scripts/insert_data.py index 7e27f93..f8b73f6 100644 --- a/scripts/insert_data.py +++ b/scripts/insert_data.py @@ -131,27 +131,33 @@ def insert_means( "Insert the means/averages data into the database" headings = read_file_headings(filepath) strains = strains_info(dbconn, headings[1:], speciesid) - check_strains(headings[1:], strains) + check_strains(headings[1:], strains), + probeset_query = ( + "INSERT IGNORE INTO ProbeSet(ChipId, Name) " + "VALUES (%(ChipId)s, %(ProbeSetId)s) ") means_query = ( "INSERT INTO ProbeSetData " "VALUES(%(ProbeSetDataId)s, %(StrainId)s, %(DataValue)s)") xref_query = ( "INSERT INTO ProbeSetXRef(ProbeSetFreezeId, ProbeSetId, DataId) " "VALUES(%(ProbeSetFreezeId)s, %(ProbeSetId)s, %(ProbeSetDataId)s)") - the_means = ( - {"ProbeSetFreezeId": datasetid, "ProbeSetDataId": data_id, **mean} - for data_id, mean in - enumerate( - read_datavalues(filepath, headings, strains), - start=(last_data_id(dbconn)+1))) + the_means = ({ + "ProbeSetFreezeId": datasetid, "ProbeSetDataId": data_id, + "ChipId": platform_id, **mean + } for data_id, mean in enumerate( + read_datavalues(filepath, headings, strains), + start=(last_data_id(dbconn)+1))) with dbconn.cursor(cursorclass=DictCursor) as cursor: while True: means = tuple(take(the_means, 1000)) if not bool(means): break + print(__format_query__(probeset_query, means)) + print() print(__format_query__(means_query, means)) print() print(__format_query__(xref_query, means)) + cursor.executemany(probeset_query, means) cursor.executemany(means_query, means) cursor.executemany(xref_query, means) return 0 |