From 93eea6803c4206a1cdc7956413df746de60583ee Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Wed, 17 Aug 2022 14:29:12 +0300
Subject: Topic: Queries (On preventing SQL Injection).

---
 .../queries-and-prepared-statements-in-python.gmi  | 87 ++++++++++++++++++++++
 1 file changed, 87 insertions(+)
 create mode 100644 topics/queries-and-prepared-statements-in-python.gmi

(limited to 'topics/queries-and-prepared-statements-in-python.gmi')
diff --git a/topics/queries-and-prepared-statements-in-python.gmi b/topics/queries-and-prepared-statements-in-python.gmi
new file mode 100644
index 0000000..ca6510e
--- /dev/null
+++ b/topics/queries-and-prepared-statements-in-python.gmi
@@ -0,0 +1,87 @@
+# Queries and Prepared Statements in Python
+
+String interpolation when writing queries is a really bad idea; it leads to exposure to SQL Injection attacks. To mitigate against this, we need to write queries using placeholders for values, then passing in the values as arguments to the **execute** function.
+
+As a demonstration, using some existing code, do not write a query like this:
+
+> curr.execute(
+>     """
+>         SELECT Strain.Name, Strain.Id FROM Strain, Species
+>         WHERE Strain.Name IN {}
+>         and Strain.SpeciesId=Species.Id
+>         and Species.name = '{}'
+>     """.format(
+>         create_in_clause(list(sample_data.keys())),
+>         *mescape(dataset.group.species)))
+
+In the query above, we interpolate the values of the 'sample_data.keys()' values and that of the 'dataset.group.species' values.
+
+The code above can be rewritten to something like:
+
+> sample_data_keys = tuple(key for key in sample_data.keys())
+>
+> curr.execute(
+>     """
+>         SELECT Strain.Name, Strain.Id FROM Strain, Species
+>         WHERE Strain.Name IN ({})
+>         and Strain.SpeciesId=Species.Id
+>         and Species.name = %s
+>     """.format(", ".join(sample_data_keys)),
+>     (sample_data_keys + (dataset.group.species,)))
+
+In this new query, the IN clause ends up being a string of the form
+
+> %s, %s, %s, ...
+
+for the total number of items in the 'sample_data_key' tuple.
+
+There is one more '%s' placeholder for the 'Species.name' value, so, the final tuple we provide as an argument to execute needs to add the 'dataset.group.species' value.
+
+**IMPORTANT 01**: the total number of placeholders (%s) must be the same as the total number of arguments passed into the 'execute' function.
+
+**IMPORTANT 02**: the order of the values must correspond to the order of the placeholders.
+
+### Aside
+
+The functions 'create_in_clause' and 'mescape' are defined as below:
+
+> from MySQLdb import escape_string as escape_
+> 
+> def create_in_clause(items):
+>     """Create an in clause for mysql"""
+>     in_clause = ', '.join("'{}'".format(x) for x in mescape(*items))
+>     in_clause = '( {} )'.format(in_clause)
+>     return in_clause
+> 
+> def mescape(*items):
+>     """Multiple escape"""
+>     return [escape_(str(item)).decode('utf8') for item in items]
+>
+> def escape(string_):
+>     return escape_(string_).decode('utf8')
+
+
+## Parameter Style
+
+In the section above, we show the most common parameter style used in most cases.
+
+If you want to use a mapping object (dict), you have the option of using the '%(<text>)s' format for the query. In that case, we could rewrite the query above into something like:
+
+> sample_data_dict = {f"sample_{idx}: key for idx,key in enumerate(sample_data.keys())}
+>
+> curr.execute(
+>     """
+>         SELECT Strain.Name, Strain.Id FROM Strain, Species
+>         WHERE Strain.Name IN ({})
+>         and Strain.SpeciesId=Species.Id
+>         and Species.name = %(species_name)s
+>     """.format(", ".join([f"%({key})s" for key in sample_data_dict.keys()])),
+>     {**sample_data_dict, "species_name": dataset.group.species})
+
+## Final Note
+
+While this has dealt mostly with the MySQLdb driver for Python3, the idea is the same for the psycopg2 (PostgreSQL) driver and others (with some minor variation in the details).
+
+The concept is also similar in many other languages.
+
+The main takeaway is that you really should not be manually escaping the values - instead, you should let the driver do that for you, by providing placeholders in the query, and the values to use separately.
-- 
cgit v1.2.3


From 5cca46a2eed70cb440aa88bc29b0e321794f70c8 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Wed, 17 Aug 2022 15:55:35 +0300
Subject: Use pre-formatted text blocks for source code

---
 .../queries-and-prepared-statements-in-python.gmi  | 94 ++++++++++++----------
 1 file changed, 51 insertions(+), 43 deletions(-)

(limited to 'topics/queries-and-prepared-statements-in-python.gmi')

diff --git a/topics/queries-and-prepared-statements-in-python.gmi b/topics/queries-and-prepared-statements-in-python.gmi
index ca6510e..642ed96 100644
--- a/topics/queries-and-prepared-statements-in-python.gmi
+++ b/topics/queries-and-prepared-statements-in-python.gmi
@@ -4,30 +4,34 @@ String interpolation when writing queries is a really bad idea; it leads to expo
 
 As a demonstration, using some existing code, do not write a query like this:
 
-> curr.execute(
->     """
->         SELECT Strain.Name, Strain.Id FROM Strain, Species
->         WHERE Strain.Name IN {}
->         and Strain.SpeciesId=Species.Id
->         and Species.name = '{}'
->     """.format(
->         create_in_clause(list(sample_data.keys())),
->         *mescape(dataset.group.species)))
+```
+curr.execute(
+    """
+        SELECT Strain.Name, Strain.Id FROM Strain, Species
+        WHERE Strain.Name IN {}
+        and Strain.SpeciesId=Species.Id
+        and Species.name = '{}'
+    """.format(
+        create_in_clause(list(sample_data.keys())),
+        *mescape(dataset.group.species)))
+```
 
 In the query above, we interpolate the values of the 'sample_data.keys()' values and that of the 'dataset.group.species' values.
 
 The code above can be rewritten to something like:
 
-> sample_data_keys = tuple(key for key in sample_data.keys())
->
-> curr.execute(
->     """
->         SELECT Strain.Name, Strain.Id FROM Strain, Species
->         WHERE Strain.Name IN ({})
->         and Strain.SpeciesId=Species.Id
->         and Species.name = %s
->     """.format(", ".join(sample_data_keys)),
->     (sample_data_keys + (dataset.group.species,)))
+```
+sample_data_keys = tuple(key for key in sample_data.keys())
+
+curr.execute(
+    """
+        SELECT Strain.Name, Strain.Id FROM Strain, Species
+        WHERE Strain.Name IN ({})
+        and Strain.SpeciesId=Species.Id
+        and Species.name = %s
+    """.format(", ".join(sample_data_keys)),
+    (sample_data_keys + (dataset.group.species,)))
+```
 
 In this new query, the IN clause ends up being a string of the form
 
@@ -45,20 +49,22 @@ There is one more '%s' placeholder for the 'Species.name' value, so, the final t
 
 The functions 'create_in_clause' and 'mescape' are defined as below:
 
-> from MySQLdb import escape_string as escape_
-> 
-> def create_in_clause(items):
->     """Create an in clause for mysql"""
->     in_clause = ', '.join("'{}'".format(x) for x in mescape(*items))
->     in_clause = '( {} )'.format(in_clause)
->     return in_clause
-> 
-> def mescape(*items):
->     """Multiple escape"""
->     return [escape_(str(item)).decode('utf8') for item in items]
->
-> def escape(string_):
->     return escape_(string_).decode('utf8')
+```
+from MySQLdb import escape_string as escape_
+
+def create_in_clause(items):
+    """Create an in clause for mysql"""
+    in_clause = ', '.join("'{}'".format(x) for x in mescape(*items))
+    in_clause = '( {} )'.format(in_clause)
+    return in_clause
+
+def mescape(*items):
+    """Multiple escape"""
+    return [escape_(str(item)).decode('utf8') for item in items]
+
+def escape(string_):
+    return escape_(string_).decode('utf8')
+```
 
 
 ## Parameter Style
@@ -67,16 +73,18 @@ In the section above, we show the most common parameter style used in most cases
 
 If you want to use a mapping object (dict), you have the option of using the '%(<text>)s' format for the query. In that case, we could rewrite the query above into something like:
 
-> sample_data_dict = {f"sample_{idx}: key for idx,key in enumerate(sample_data.keys())}
->
-> curr.execute(
->     """
->         SELECT Strain.Name, Strain.Id FROM Strain, Species
->         WHERE Strain.Name IN ({})
->         and Strain.SpeciesId=Species.Id
->         and Species.name = %(species_name)s
->     """.format(", ".join([f"%({key})s" for key in sample_data_dict.keys()])),
->     {**sample_data_dict, "species_name": dataset.group.species})
+```
+sample_data_dict = {f"sample_{idx}: key for idx,key in enumerate(sample_data.keys())}
+
+curr.execute(
+    """
+        SELECT Strain.Name, Strain.Id FROM Strain, Species
+        WHERE Strain.Name IN ({})
+        and Strain.SpeciesId=Species.Id
+        and Species.name = %(species_name)s
+    """.format(", ".join([f"%({key})s" for key in sample_data_dict.keys()])),
+    {**sample_data_dict, "species_name": dataset.group.species})
+```
 
 ## Final Note
 
-- 
cgit v1.2.3