metadata_edits.py: Use `csv_diff` and `remove_insignificant_edits`

* wqflask/wqflask/metadata_edits (update_phenotype): The logic was for generating csv_diff and removing insignificant values in the edits was moved to gn3; use those functions instead of the manual way.
author: BonfaceKilz 2022-02-24 15:38:54 +0300
committer: BonfaceKilz 2022-03-12 15:33:01 +0300
commit: bcc2af521787ac7ca7cbae01c2ed11d367aafe3d (patch)
tree: 7d3ad9c29a95ceccc7995d06e6895997550fbd8e
parent: 4fc948f38bdd81e8cd2afece3a7cc7cfca051bf0 (diff)
download: genenetwork2-bcc2af521787ac7ca7cbae01c2ed11d367aafe3d.tar.gz
1 files changed, 93 insertions, 104 deletions
diff --git a/wqflask/wqflask/metadata_edits.py b/wqflask/wqflask/metadata_edits.py
index b93e9ec4..87aa9d06 100644
--- a/wqflask/wqflask/metadata_edits.py
+++ b/wqflask/wqflask/metadata_edits.py
@@ -30,6 +30,8 @@ from gn3.authentication import get_highest_user_access_role
 from gn3.authentication import get_user_membership
 from gn3.commands import run_cmd
 from gn3.csvcmp import create_dirs_if_not_exists
+from gn3.csvcmp import csv_diff
+from gn3.csvcmp import remove_insignificant_edits
 from gn3.db import diff_from_dict
 from gn3.db import fetchall
 from gn3.db import fetchone
@@ -230,106 +232,39 @@ def update_phenotype(dataset_id: str, name: str):
             DIFF_DATADIR := os.path.join(SAMPLE_DATADIR, "diffs"),
             UPLOAD_DATADIR := os.path.join(SAMPLE_DATADIR, "updated")
         ])
+
         current_time = str(datetime.datetime.now().isoformat())
         _file_name = (f"{author}.{request.args.get('resource-id')}."
                       f"{current_time}")
-        new_file_name = (os.path.join(TMPDIR,
-                                      f"sample-data/updated/{_file_name}.csv"))
-        uploaded_file_name = (os.path.join(
-            TMPDIR, "sample-data/updated/",
-            f"{_file_name}.csv.uploaded"))
-        file_.save(new_file_name)
-        with open(uploaded_file_name, "w") as f_:
-            f_.write(get_trait_csv_sample_data(
-                conn=conn,
-                trait_name=str(name),
-                phenotype_id=str(phenotype_id)))
-        r = run_cmd(cmd=("csvdiff "
-                         f"'{uploaded_file_name}' '{new_file_name}' "
-                         "--format json"))
-        json_data = json.loads(r.get("output"))
-
-            # Only consider values where |ε| < 0.001; otherwise, use the
-            # old value in "Original".
-            _modifications = []
-            for m in json_data.get("Modifications"):
-                _original = m.get("Original").split(",")
-                _current = m.get("Current").split(",")
-                for i, (x, y) in enumerate(zip(_original, _current)):
-                    if (x.replace('.', '').isdigit()
-                        and y.replace('.', '').isdigit()
-                        and abs(float(x) - float(y)) < 0.001):
-                        _current[i] = x
-                if not (__o:=",".join(_original)) == (__c:=",".join(_current)):
-                        _modifications.append(
-                            {
-                                "Original": __o,
-                                "Current": __c,
-                            })
-            json_data['Modifications'] = _modifications
-
-            # Edge case where the csv file has not been edited!
-            if not any(json_data.values()):
-                flash(f"You have not modified the csv file you downloaded!",
-                      "warning")
-                return redirect(f"/datasets/{dataset_id}/traits/{name}"
-                                f"?resource-id={request.args.get('resource-id')}")
-            diff_output = (f"{TMPDIR}/sample-data/diffs/"
-                           f"{_file_name}.json")
-            with open(diff_output, "w") as f:
-                dict_ = json_data
-                dict_.update({
-                    "trait_name": str(name),
-                    "phenotype_id": str(phenotype_id),
-                    "author": author,
-                    "timestamp": datetime.datetime.now().strftime(
-                        "%Y-%m-%d %H:%M:%S")
-                })
-                f.write(json.dumps(dict_))
-            flash("Sample-data has been successfully uploaded", "success")
-        # Run updates:
-        phenotype_ = {
-            "pre_pub_description": data_.get("pre-pub-desc"),
-            "post_pub_description": data_.get("post-pub-desc"),
-            "original_description": data_.get("orig-desc"),
-            "units": data_.get("units"),
-            "pre_pub_abbreviation": data_.get("pre-pub-abbrev"),
-            "post_pub_abbreviation": data_.get("post-pub-abbrev"),
-            "lab_code": data_.get("labcode"),
-            "submitter": data_.get("submitter"),
-            "owner": data_.get("owner"),
-            "authorized_users": data_.get("authorized-users"),
-        }
-        updated_phenotypes = update(
-            conn, "Phenotype",
-            data=Phenotype(**phenotype_),
-            where=Phenotype(id_=data_.get("phenotype-id")))
-        diff_data = {}
-        if updated_phenotypes:
-            diff_data.update({"Phenotype": diff_from_dict(old={
-                k: data_.get(f"old_{k}") for k, v in phenotype_.items()
-                if v is not None}, new=phenotype_)})
-        publication_ = {
-            "abstract": data_.get("abstract"),
-            "authors": data_.get("authors"),
-            "title": data_.get("title"),
-            "journal": data_.get("journal"),
-            "volume": data_.get("volume"),
-            "pages": data_.get("pages"),
-            "month": data_.get("month"),
-            "year": data_.get("year")
-        }
-        updated_publications = update(
-            conn, "Publication",
-            data=Publication(**publication_),
-            where=Publication(id_=data_.get("pubmed-id",
-                                            data_.get("old_id_"))))
-        if updated_publications:
-            diff_data.update({"Publication": diff_from_dict(old={
-                k: data_.get(f"old_{k}") for k, v in publication_.items()
-                if v is not None}, new=publication_)})
-        if diff_data:
+        diff_data = remove_insignificant_edits(
+            diff_data=csv_diff(
+                base_csv=(base_csv:=get_trait_csv_sample_data(
+                    conn=conn,
+                    trait_name=str(name),
+                    phenotype_id=str(phenotype_id))),
+                delta_csv=(delta_csv:=file_.read().decode()),
+                tmp_dir=TMPDIR),
+            epsilon=0.001)
+        # Edge case where the csv file has not been edited!
+        if not any(diff_data.values()):
+            flash(f"You have not modified the csv file you downloaded!",
+                  "warning")
+            return redirect(f"/datasets/{dataset_id}/traits/{name}"
+                            f"?resource-id={request.args.get('resource-id')}")
+
+        with open(os.path.join(
+                UPLOAD_DATADIR,
+                f"{_file_name}.csv"), "w") as f_:
+            f_.write(base_csv)
+        with open(os.path.join(
+                UPLOAD_DATADIR,
+                f"{_file_name}.delta.csv"), "w") as f_:
+            f_.write(delta_csv)
+
+        with open(os.path.join(DIFF_DATADIR,
+                               f"{_file_name}.json"), "w") as f:
             diff_data.update({
+                "trait_name": str(name),
                 "phenotype_id": str(phenotype_id),
                 "dataset_id": name,
                 "resource_id": request.args.get('resource-id'),
@@ -339,14 +274,68 @@ def update_phenotype(dataset_id: str, name: str):
                               .now()
                               .strftime("%Y-%m-%d %H:%M:%S")),
             })
-            insert(conn,
-                   table="metadata_audit",
-                   data=MetadataAudit(dataset_id=name,
-                                      editor=author,
-                                      json_data=json.dumps(diff_data)))
-            flash(f"Diff-data: \n{diff_data}\nhas been uploaded", "success")
-        return redirect(f"/datasets/{dataset_id}/traits/{name}"
-                        f"?resource-id={request.args.get('resource-id')}")
+            f.write(json.dumps(diff_data))
+        flash("Sample-data has been successfully uploaded", "success")
+    # Run updates:
+    phenotype_ = {
+        "pre_pub_description": data_.get("pre-pub-desc"),
+        "post_pub_description": data_.get("post-pub-desc"),
+        "original_description": data_.get("orig-desc"),
+        "units": data_.get("units"),
+        "pre_pub_abbreviation": data_.get("pre-pub-abbrev"),
+        "post_pub_abbreviation": data_.get("post-pub-abbrev"),
+        "lab_code": data_.get("labcode"),
+        "submitter": data_.get("submitter"),
+        "owner": data_.get("owner"),
+        "authorized_users": data_.get("authorized-users"),
+    }
+    updated_phenotypes = update(
+        conn, "Phenotype",
+        data=Phenotype(**phenotype_),
+        where=Phenotype(id_=data_.get("phenotype-id")))
+    diff_data = {}
+    if updated_phenotypes:
+        diff_data.update({"Phenotype": diff_from_dict(old={
+            k: data_.get(f"old_{k}") for k, v in phenotype_.items()
+            if v is not None}, new=phenotype_)})
+    publication_ = {
+        "abstract": data_.get("abstract"),
+        "authors": data_.get("authors"),
+        "title": data_.get("title"),
+        "journal": data_.get("journal"),
+        "volume": data_.get("volume"),
+        "pages": data_.get("pages"),
+        "month": data_.get("month"),
+        "year": data_.get("year")
+    }
+    updated_publications = update(
+        conn, "Publication",
+        data=Publication(**publication_),
+        where=Publication(id_=data_.get("pubmed-id",
+                                        data_.get("old_id_"))))
+    if updated_publications:
+        diff_data.update({"Publication": diff_from_dict(old={
+            k: data_.get(f"old_{k}") for k, v in publication_.items()
+            if v is not None}, new=publication_)})
+    if diff_data:
+        diff_data.update({
+            "phenotype_id": str(phenotype_id),
+            "dataset_id": name,
+            "resource_id": request.args.get('resource-id'),
+            "author": author,
+            "timestamp": (datetime
+                          .datetime
+                          .now()
+                          .strftime("%Y-%m-%d %H:%M:%S")),
+        })
+        insert(conn,
+               table="metadata_audit",
+               data=MetadataAudit(dataset_id=name,
+                                  editor=author,
+                                  json_data=json.dumps(diff_data)))
+        flash(f"Diff-data: \n{diff_data}\nhas been uploaded", "success")
+    return redirect(f"/datasets/{dataset_id}/traits/{name}"
+                    f"?resource-id={request.args.get('resource-id')}")
 
 
 @metadata_edit.route("/traits/<name>", methods=("POST",))
author	BonfaceKilz	2022-02-24 15:38:54 +0300
committer	BonfaceKilz	2022-03-12 15:33:01 +0300
commit	bcc2af521787ac7ca7cbae01c2ed11d367aafe3d (patch)
tree	7d3ad9c29a95ceccc7995d06e6895997550fbd8e
parent	4fc948f38bdd81e8cd2afece3a7cc7cfca051bf0 (diff)
download	genenetwork2-bcc2af521787ac7ca7cbae01c2ed11d367aafe3d.tar.gz