From 93eea6803c4206a1cdc7956413df746de60583ee Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Wed, 17 Aug 2022 14:29:12 +0300 Subject: Topic: Queries (On preventing SQL Injection). --- .../queries-and-prepared-statements-in-python.gmi | 87 ++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 topics/queries-and-prepared-statements-in-python.gmi (limited to 'topics/queries-and-prepared-statements-in-python.gmi') diff --git a/topics/queries-and-prepared-statements-in-python.gmi b/topics/queries-and-prepared-statements-in-python.gmi new file mode 100644 index 0000000..ca6510e --- /dev/null +++ b/topics/queries-and-prepared-statements-in-python.gmi @@ -0,0 +1,87 @@ +# Queries and Prepared Statements in Python + +String interpolation when writing queries is a really bad idea; it leads to exposure to SQL Injection attacks. To mitigate against this, we need to write queries using placeholders for values, then passing in the values as arguments to the **execute** function. + +As a demonstration, using some existing code, do not write a query like this: + +> curr.execute( +> """ +> SELECT Strain.Name, Strain.Id FROM Strain, Species +> WHERE Strain.Name IN {} +> and Strain.SpeciesId=Species.Id +> and Species.name = '{}' +> """.format( +> create_in_clause(list(sample_data.keys())), +> *mescape(dataset.group.species))) + +In the query above, we interpolate the values of the 'sample_data.keys()' values and that of the 'dataset.group.species' values. + +The code above can be rewritten to something like: + +> sample_data_keys = tuple(key for key in sample_data.keys()) +> +> curr.execute( +> """ +> SELECT Strain.Name, Strain.Id FROM Strain, Species +> WHERE Strain.Name IN ({}) +> and Strain.SpeciesId=Species.Id +> and Species.name = %s +> """.format(", ".join(sample_data_keys)), +> (sample_data_keys + (dataset.group.species,))) + +In this new query, the IN clause ends up being a string of the form + +> %s, %s, %s, ... + +for the total number of items in the 'sample_data_key' tuple. + +There is one more '%s' placeholder for the 'Species.name' value, so, the final tuple we provide as an argument to execute needs to add the 'dataset.group.species' value. + +**IMPORTANT 01**: the total number of placeholders (%s) must be the same as the total number of arguments passed into the 'execute' function. + +**IMPORTANT 02**: the order of the values must correspond to the order of the placeholders. + +### Aside + +The functions 'create_in_clause' and 'mescape' are defined as below: + +> from MySQLdb import escape_string as escape_ +> +> def create_in_clause(items): +> """Create an in clause for mysql""" +> in_clause = ', '.join("'{}'".format(x) for x in mescape(*items)) +> in_clause = '( {} )'.format(in_clause) +> return in_clause +> +> def mescape(*items): +> """Multiple escape""" +> return [escape_(str(item)).decode('utf8') for item in items] +> +> def escape(string_): +> return escape_(string_).decode('utf8') + + +## Parameter Style + +In the section above, we show the most common parameter style used in most cases. + +If you want to use a mapping object (dict), you have the option of using the '%()s' format for the query. In that case, we could rewrite the query above into something like: + +> sample_data_dict = {f"sample_{idx}: key for idx,key in enumerate(sample_data.keys())} +> +> curr.execute( +> """ +> SELECT Strain.Name, Strain.Id FROM Strain, Species +> WHERE Strain.Name IN ({}) +> and Strain.SpeciesId=Species.Id +> and Species.name = %(species_name)s +> """.format(", ".join([f"%({key})s" for key in sample_data_dict.keys()])), +> {**sample_data_dict, "species_name": dataset.group.species}) + +## Final Note + +While this has dealt mostly with the MySQLdb driver for Python3, the idea is the same for the psycopg2 (PostgreSQL) driver and others (with some minor variation in the details). + +The concept is also similar in many other languages. + +The main takeaway is that you really should not be manually escaping the values - instead, you should let the driver do that for you, by providing placeholders in the query, and the values to use separately. -- cgit v1.2.3 From 5cca46a2eed70cb440aa88bc29b0e321794f70c8 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Wed, 17 Aug 2022 15:55:35 +0300 Subject: Use pre-formatted text blocks for source code --- .../queries-and-prepared-statements-in-python.gmi | 94 ++++++++++++---------- 1 file changed, 51 insertions(+), 43 deletions(-) (limited to 'topics/queries-and-prepared-statements-in-python.gmi') diff --git a/topics/queries-and-prepared-statements-in-python.gmi b/topics/queries-and-prepared-statements-in-python.gmi index ca6510e..642ed96 100644 --- a/topics/queries-and-prepared-statements-in-python.gmi +++ b/topics/queries-and-prepared-statements-in-python.gmi @@ -4,30 +4,34 @@ String interpolation when writing queries is a really bad idea; it leads to expo As a demonstration, using some existing code, do not write a query like this: -> curr.execute( -> """ -> SELECT Strain.Name, Strain.Id FROM Strain, Species -> WHERE Strain.Name IN {} -> and Strain.SpeciesId=Species.Id -> and Species.name = '{}' -> """.format( -> create_in_clause(list(sample_data.keys())), -> *mescape(dataset.group.species))) +``` +curr.execute( + """ + SELECT Strain.Name, Strain.Id FROM Strain, Species + WHERE Strain.Name IN {} + and Strain.SpeciesId=Species.Id + and Species.name = '{}' + """.format( + create_in_clause(list(sample_data.keys())), + *mescape(dataset.group.species))) +``` In the query above, we interpolate the values of the 'sample_data.keys()' values and that of the 'dataset.group.species' values. The code above can be rewritten to something like: -> sample_data_keys = tuple(key for key in sample_data.keys()) -> -> curr.execute( -> """ -> SELECT Strain.Name, Strain.Id FROM Strain, Species -> WHERE Strain.Name IN ({}) -> and Strain.SpeciesId=Species.Id -> and Species.name = %s -> """.format(", ".join(sample_data_keys)), -> (sample_data_keys + (dataset.group.species,))) +``` +sample_data_keys = tuple(key for key in sample_data.keys()) + +curr.execute( + """ + SELECT Strain.Name, Strain.Id FROM Strain, Species + WHERE Strain.Name IN ({}) + and Strain.SpeciesId=Species.Id + and Species.name = %s + """.format(", ".join(sample_data_keys)), + (sample_data_keys + (dataset.group.species,))) +``` In this new query, the IN clause ends up being a string of the form @@ -45,20 +49,22 @@ There is one more '%s' placeholder for the 'Species.name' value, so, the final t The functions 'create_in_clause' and 'mescape' are defined as below: -> from MySQLdb import escape_string as escape_ -> -> def create_in_clause(items): -> """Create an in clause for mysql""" -> in_clause = ', '.join("'{}'".format(x) for x in mescape(*items)) -> in_clause = '( {} )'.format(in_clause) -> return in_clause -> -> def mescape(*items): -> """Multiple escape""" -> return [escape_(str(item)).decode('utf8') for item in items] -> -> def escape(string_): -> return escape_(string_).decode('utf8') +``` +from MySQLdb import escape_string as escape_ + +def create_in_clause(items): + """Create an in clause for mysql""" + in_clause = ', '.join("'{}'".format(x) for x in mescape(*items)) + in_clause = '( {} )'.format(in_clause) + return in_clause + +def mescape(*items): + """Multiple escape""" + return [escape_(str(item)).decode('utf8') for item in items] + +def escape(string_): + return escape_(string_).decode('utf8') +``` ## Parameter Style @@ -67,16 +73,18 @@ In the section above, we show the most common parameter style used in most cases If you want to use a mapping object (dict), you have the option of using the '%()s' format for the query. In that case, we could rewrite the query above into something like: -> sample_data_dict = {f"sample_{idx}: key for idx,key in enumerate(sample_data.keys())} -> -> curr.execute( -> """ -> SELECT Strain.Name, Strain.Id FROM Strain, Species -> WHERE Strain.Name IN ({}) -> and Strain.SpeciesId=Species.Id -> and Species.name = %(species_name)s -> """.format(", ".join([f"%({key})s" for key in sample_data_dict.keys()])), -> {**sample_data_dict, "species_name": dataset.group.species}) +``` +sample_data_dict = {f"sample_{idx}: key for idx,key in enumerate(sample_data.keys())} + +curr.execute( + """ + SELECT Strain.Name, Strain.Id FROM Strain, Species + WHERE Strain.Name IN ({}) + and Strain.SpeciesId=Species.Id + and Species.name = %(species_name)s + """.format(", ".join([f"%({key})s" for key in sample_data_dict.keys()])), + {**sample_data_dict, "species_name": dataset.group.species}) +``` ## Final Note -- cgit v1.2.3