diff options
Diffstat (limited to 'issues/gn-uploader')
-rw-r--r-- | issues/gn-uploader/AuthorisationError-gn-uploader.gmi | 66 | ||||
-rw-r--r-- | issues/gn-uploader/check-genotypes-in-database-too.gmi | 22 | ||||
-rw-r--r-- | issues/gn-uploader/gn-uploader-container-running-wrong-gn2.gmi | 2 | ||||
-rw-r--r-- | issues/gn-uploader/link-authentication-authorisation.gmi | 15 | ||||
-rw-r--r-- | issues/gn-uploader/move-uploader-to-tux02.gmi | 48 | ||||
-rw-r--r-- | issues/gn-uploader/provide-page-for-uploaded-data.gmi | 22 | ||||
-rw-r--r-- | issues/gn-uploader/replace-redis-with-sqlite3.gmi | 17 | ||||
-rw-r--r-- | issues/gn-uploader/resume-upload.gmi | 41 | ||||
-rw-r--r-- | issues/gn-uploader/samplelist-details.gmi | 17 | ||||
-rw-r--r-- | issues/gn-uploader/speed-up-rqtl2-qc.gmi | 30 | ||||
-rw-r--r-- | issues/gn-uploader/uploading-samples.gmi | 51 |
11 files changed, 330 insertions, 1 deletions
diff --git a/issues/gn-uploader/AuthorisationError-gn-uploader.gmi b/issues/gn-uploader/AuthorisationError-gn-uploader.gmi new file mode 100644 index 0000000..50a236d --- /dev/null +++ b/issues/gn-uploader/AuthorisationError-gn-uploader.gmi @@ -0,0 +1,66 @@ +# AuthorisationError in gn uploader + +## Tags +* assigned: fredm +* status: open +* priority: critical +* type: error +* key words: authorisation, permission + +## Description + +Trying to create population for Kilifish dataset in the gn-uploader webpage, +then encountered the following error: +```sh +Traceback (most recent call last): + File "/gnu/store/wxb6rqf7125sb6xqd4kng44zf9yzsm5p-profile/lib/python3.10/site-packages/flask/app.py", line 917, in full_dispatch_request + rv = self.dispatch_request() + File "/gnu/store/wxb6rqf7125sb6xqd4kng44zf9yzsm5p-profile/lib/python3.10/site-packages/flask/app.py", line 902, in dispatch_request + return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args) # type: ignore[no-any-return] + File "/gnu/store/wxb6rqf7125sb6xqd4kng44zf9yzsm5p-profile/lib/python3.10/site-packages/uploader/authorisation.py", line 23, in __is_session_valid__ + return session.user_token().either( + File "/gnu/store/wxb6rqf7125sb6xqd4kng44zf9yzsm5p-profile/lib/python3.10/site-packages/pymonad/either.py", line 89, in either + return right_function(self.value) + File "/gnu/store/wxb6rqf7125sb6xqd4kng44zf9yzsm5p-profile/lib/python3.10/site-packages/uploader/authorisation.py", line 25, in <lambda> + lambda token: function(*args, **kwargs)) + File "/gnu/store/wxb6rqf7125sb6xqd4kng44zf9yzsm5p-profile/lib/python3.10/site-packages/uploader/population/views.py", line 185, in create_population + ).either( + File "/gnu/store/wxb6rqf7125sb6xqd4kng44zf9yzsm5p-profile/lib/python3.10/site-packages/pymonad/either.py", line 91, in either + return left_function(self.monoid[0]) + File "/gnu/store/wxb6rqf7125sb6xqd4kng44zf9yzsm5p-profile/lib/python3.10/site-packages/uploader/monadic_requests.py", line 99, in __fail__ + raise Exception(_data) +Exception: {'error': 'AuthorisationError', 'error-trace': 'Traceback (most recent call last): + File "/gnu/store/38iayxz7dgm86f2x76kfaa6gwicnnjg4-profile/lib/python3.10/site-packages/flask/app.py", line 917, in full_dispatch_request + rv = self.dispatch_request() + File "/gnu/store/38iayxz7dgm86f2x76kfaa6gwicnnjg4-profile/lib/python3.10/site-packages/flask/app.py", line 902, in dispatch_request + return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args) # type: ignore[no-any-return] + File "/gnu/store/38iayxz7dgm86f2x76kfaa6gwicnnjg4-profile/lib/python3.10/site-packages/authlib/integrations/flask_oauth2/resource_protector.py", line 110, in decorated + return f(*args, **kwargs) + File "/gnu/store/38iayxz7dgm86f2x76kfaa6gwicnnjg4-profile/lib/python3.10/site-packages/gn_auth/auth/authorisation/resources/inbredset/views.py", line 95, in create_population_resource + ).then( + File "/gnu/store/38iayxz7dgm86f2x76kfaa6gwicnnjg4-profile/lib/python3.10/site-packages/pymonad/monad.py", line 152, in then + result = self.map(function) + File "/gnu/store/38iayxz7dgm86f2x76kfaa6gwicnnjg4-profile/lib/python3.10/site-packages/pymonad/either.py", line 106, in map + return self.__class__(function(self.value), (None, True)) + File "/gnu/store/38iayxz7dgm86f2x76kfaa6gwicnnjg4-profile/lib/python3.10/site-packages/gn_auth/auth/authorisation/resources/inbredset/views.py", line 98, in <lambda> + "resource": create_resource( + File "/gnu/store/38iayxz7dgm86f2x76kfaa6gwicnnjg4-profile/lib/python3.10/site-packages/gn_auth/auth/authorisation/resources/inbredset/models.py", line 25, in create_resource + return _create_resource(cursor, + File "/gnu/store/38iayxz7dgm86f2x76kfaa6gwicnnjg4-profile/lib/python3.10/site-packages/gn_auth/auth/authorisation/checks.py", line 56, in __authoriser__ + raise AuthorisationError(error_description) +gn_auth.auth.errors.AuthorisationError: Insufficient privileges to create a resource +', 'error_description': 'Insufficient privileges to create a resource'} + +``` +The error above resulted from the attempt to upload the following information on the gn-uploader-`create population section` +Input details are as follows: +Full Name: Kilifish F2 Intercross Lines +Name: KF2_Lines +Population code: KF2 +Description: Kilifish second generation population +Family: Crosses, AIL, HS +Mapping Methods: GEMMA, QTLReaper, R/qtl +Genetic type: intercross + +And when pressed the `Create Population` icon, it led to the error above. + diff --git a/issues/gn-uploader/check-genotypes-in-database-too.gmi b/issues/gn-uploader/check-genotypes-in-database-too.gmi new file mode 100644 index 0000000..4e034b7 --- /dev/null +++ b/issues/gn-uploader/check-genotypes-in-database-too.gmi @@ -0,0 +1,22 @@ +# Check Genotypes in the Database for R/qtl2 Uploads + +## Tags + +* type: bug +* assigned: fredm +* priority: high +* status: closed, completed, fixed +* keywords: gn-uploader, uploader, upload, genotypes, geno + +## Description + +Currently, the uploader expects that a R/qtl2 bundle be self-contained, i.e. it contains all the genotypes and other data that fully describe the data in that bundle. + +This is unnecessary, in a lot of situations, seeing as Genenetwork might already have the appropriate genotypes already in its database. + +This issue tracks the implementation for the check of the genotypes against both the genotypes provided in the bundle, and those already in the database. + +### Updates + +Fixed in +=> https://git.genenetwork.org/gn-uploader/commit/?id=0e74a1589db9f367cdbc3dce232b1b6168e3aca1 this commit diff --git a/issues/gn-uploader/gn-uploader-container-running-wrong-gn2.gmi b/issues/gn-uploader/gn-uploader-container-running-wrong-gn2.gmi index d2c33e8..5a5cdfa 100644 --- a/issues/gn-uploader/gn-uploader-container-running-wrong-gn2.gmi +++ b/issues/gn-uploader/gn-uploader-container-running-wrong-gn2.gmi @@ -3,7 +3,7 @@ ## Tags * assigned: fredm, aruni -* status: open +* status: closed, completed * priority: high * type: bug * keywords: guix, gn-uploader diff --git a/issues/gn-uploader/link-authentication-authorisation.gmi b/issues/gn-uploader/link-authentication-authorisation.gmi new file mode 100644 index 0000000..90b8e5e --- /dev/null +++ b/issues/gn-uploader/link-authentication-authorisation.gmi @@ -0,0 +1,15 @@ +# Link Authentication/Authorisation + +## Tags + +* status: open +* assigned: fredm +* priority: critical +* type: feature request, feature-request +* keywords: gn-uploader, gn-auth, authorisation, authentication, uploader, upload + +## Description + +The last chain in the link to the uploads is the authentication/authorisation. Once the user uploads their data, they need access to it. The auth system, by default, will deny anyone/everyone access to any data that is not linked to a resource and which no user has any roles allowing them access to the data. + +We, currently, assign such data to the user manually, but that is not a sustainable way of working, especially as the uploader is exposed to more and more users. diff --git a/issues/gn-uploader/move-uploader-to-tux02.gmi b/issues/gn-uploader/move-uploader-to-tux02.gmi new file mode 100644 index 0000000..20c5b24 --- /dev/null +++ b/issues/gn-uploader/move-uploader-to-tux02.gmi @@ -0,0 +1,48 @@ +# Move Uploader to tux02 + +## Tags + +* type: migration +* assigned: fredm +* priority: high +* status: closed, completed, fixed +* keywords: gn-uploader, guix, container, deploy + +## Databases + +### MariaDB + +To avoid corrupting the data on CI/CD, we need to run a separate database server. +This implies separate configurations, and separate startup. + +Some of the things to do to enable this, then, are: + +* [x] Provide separate configs and run db server on separate port + - Configs put in /etc/mysql3307 + - Selected port 3307 + - datadir in /var/lib/mysql3307 -> /export5 +* [x] Provide separate data directory for the content + - extract backup +* [x] Maybe suffix the files with the port number, e.g. + ``` + datadir = /var/lib/mysql3307 + socket = /var/run/mysqld/mysqld3307.sock + ︙ + ``` + +### SQLite + +- [ ] Provide separate path for the SQLite database file +- [ ] Run migrations on SQLite database file +- [ ] Create admin user +- [ ] Make existing data public by default + +## Build Script + +- [x] Provide separate host directories that are writeable from the container(s) + +## Systemd + +- [x] Provide unit file for separate MariadDB running on different port + +## … diff --git a/issues/gn-uploader/provide-page-for-uploaded-data.gmi b/issues/gn-uploader/provide-page-for-uploaded-data.gmi new file mode 100644 index 0000000..60b154b --- /dev/null +++ b/issues/gn-uploader/provide-page-for-uploaded-data.gmi @@ -0,0 +1,22 @@ +# Provide Page/Link for/to Uploaded Data + +## Tags + +* status: open +* assigned: fredm +* priority: medium +* type: feature, feature request, feature-request +* keywords: gn-uploader, uploader, data dashboard + +## Description + +Once a user has uploaded their data, provide them with a landing page/dashboard for the data they have uploaded, with details on what that data is. + +* Should we provide a means to edit the data here (mostly to add metadata and the like)? +* Maybe the page should actually be shown on GN2? + +## Blockers + +Depends on + +=> /issues/gn-uploader/link-authentication-authorisation diff --git a/issues/gn-uploader/replace-redis-with-sqlite3.gmi b/issues/gn-uploader/replace-redis-with-sqlite3.gmi new file mode 100644 index 0000000..3e5020a --- /dev/null +++ b/issues/gn-uploader/replace-redis-with-sqlite3.gmi @@ -0,0 +1,17 @@ +# Replace Redis with SQL + +## Tags + +* status: open +* priority: low +* assigned: fredm +* type: feature, feature-request, feature request +* keywords: gn-uploader, uploader, redis, sqlite, sqlite3 + +## Description + +We currently (as of 2024-06-27) use Redis for tracking any asynchronous jobs (e.g. QC on uploaded files). + +A lot of what we use redis for, we can do in one of the many SQL databases (we'll probably use SQLite3 anyway), which are more standardised, and easier to migrate data from and to. It has the added advantage that we can open multiple connections to the database, enabling the different processes to update the status and metadata of the same job consistently. + +Changes done here can then be migrated to the other systems, i.e. GN2, GN3, and gn-auth, as necessary. diff --git a/issues/gn-uploader/resume-upload.gmi b/issues/gn-uploader/resume-upload.gmi new file mode 100644 index 0000000..0f9ba30 --- /dev/null +++ b/issues/gn-uploader/resume-upload.gmi @@ -0,0 +1,41 @@ +# gn-uploader: Resume Upload + +## Tags + +* status: closed, completed, fixed +* priority: medium +* assigned: fredm, flisso +* type: feature request, feature-request +* keywords: gn-uploader, uploader, upload, resume upload + +## Description + +If a user is uploading a particularly large file, we might need to provide a way for the user to resume their upload of the file. + +Maybe this can wait until we have +=> /issues/gn-uploader/link-authentication-authorisation linked authentication/authorisation to gn-uploader. +In this way, each upload can be linked to a specific user. + +### TODOs + +* [x] Build UI to allow uploads +* [x] Build back-end to handle uploads +* [x] Handle upload failures/errors +* [x] Deploy to staging + +### Updates + +=> https://git.genenetwork.org/gn-uploader/commit/?id=9a8dddab072748a70d43416ac8e6db69ad6fb0cb +=> https://git.genenetwork.org/gn-uploader/commit/?id=df9da3d5b5e4382976ede1b54eb1aeb04c4c45e5 +=> https://git.genenetwork.org/gn-uploader/commit/?id=47c2ea64682064d7cb609e5459d7bd2e49efa17e +=> https://git.genenetwork.org/gn-uploader/commit/?id=a68fe177ae41f2e58a64b3f8dcf3f825d004eeca + +### Possible Resources + +=> https://javascript.info/resume-upload +=> https://github.com/23/resumable.js/ +=> https://www.dropzone.dev/ +=> https://stackoverflow.com/questions/69339582/what-hash-python-3-hashlib-yields-a-portable-hash-of-file-contents + + +This is mostly fixed. Any arising bugs can be tracked is separate issues. diff --git a/issues/gn-uploader/samplelist-details.gmi b/issues/gn-uploader/samplelist-details.gmi new file mode 100644 index 0000000..2e64d8a --- /dev/null +++ b/issues/gn-uploader/samplelist-details.gmi @@ -0,0 +1,17 @@ +# Explanation of how Sample Lists are handled in GN2 (and may be handled moving forward) + +## Tags + +* status: open +* assigned: fredm, zsloan +* priority: medium +* type: documentation +* keywords: strains, gn-uploader + +## Description + +Regarding the order of samples/strains, it can basically be whatever we decide it is. It just needs to stay consistent (like if there are multiple genotype files). It only really affects how the strains are displayed, and any other genotype files we use for mapping needs to share the same order. + +I think this is the case regardless of whether it's strains or individuals (and both the code and files make no distinction). Sometimes it just logically makes sense to sort them in a particular way for display purposes (like BXD1, BXD2, etc), but technically everything would still work the same if you swapped those columns across all genotype files. Users would be confused about why BXD2 is before BXD1, but everything would still work and all calculations would give the same results. + +zsloan's proposal for handling sample lists in the future is to just store them in a JSON file in the genotype_files/genotype directory. diff --git a/issues/gn-uploader/speed-up-rqtl2-qc.gmi b/issues/gn-uploader/speed-up-rqtl2-qc.gmi new file mode 100644 index 0000000..43e6d49 --- /dev/null +++ b/issues/gn-uploader/speed-up-rqtl2-qc.gmi @@ -0,0 +1,30 @@ +# Speed Up QC on R/qtl2 Bundles + +## Tags + +## Description + +The default format for the CSV files in a R/qtl2 bundle is: + +``` +matrix of individuals × (markers/phenotypes/covariates/phenotype covariates/etc.) +``` + +(A) (f/F)ile(s) in the R/qtl2 bundle could however +=> https://kbroman.org/qtl2/assets/vignettes/input_files.html#csv-files be transposed, +which means the system needs to "un-transpose" the file(s) before processing. + +Currently, the system does this by reading all the files of a particular type, and then "un-transposing" the entire thing. This leads to a very slow system. + +This issue proposes to do the quality control/assurance processing on each file in isolation, where possible - this will allow parallelisation/multiprocessing of the QC checks. + +The main considerations that need to be handled are as follows: + +* Do QC on (founder) genotype files (when present) before any of the other files +* Genetic and physical maps (if present) can have QC run on them after the genotype files +* Do QC on phenotype files (when present) after genotype files but before any other files +* Covariate and phenotype covariate files come after the phenotype files +* Cross information files … ? +* Sex information files … ? + +We should probably detail the type of QC checks done for each type of file diff --git a/issues/gn-uploader/uploading-samples.gmi b/issues/gn-uploader/uploading-samples.gmi new file mode 100644 index 0000000..11842b9 --- /dev/null +++ b/issues/gn-uploader/uploading-samples.gmi @@ -0,0 +1,51 @@ +# Uploading Samples + +## Tags + +* status: open +* assigned: fredm +* interested: acenteno, zachs, flisso +* priority: high +* type: feature-request +* keywords: gn-uploader, uploader, samples, strains + +## Description + +This will track the various notes regarding the upload of samples onto GeneNetwork. + +### Sample Lists + +From the email thread(s) with @zachs, @flisso and @acenteno + +``` +When there's a new set of individuals, it generally needs to be added as a new group. In the absence of genotype data, a "dummy" .geno file currently needs to be generated* in order to define the sample list (if you look at the list of .geno files in genotype_files/genotype you'll find some really small files that just have either a single marker or a bunch of fake markers calls "Marker1, Marker2, etc" - these are solely just used to get the samplelist from the columns). So in theory such a file could be generated as a part of the upload process in the absence of genotypes +``` + +We note, however, that the as @zachs mentions + +``` +This is really goofy and should probably change. I've brought up the idea of just replacing these with JSON files containing group metadata (including samplelist), but we've never actually gone through with making any change to this. I already did something sorta similar to this with the existing JSON files (in genotype_files/genotype), but those are currently only used in situations where there are either multiple genotype files, or a genotype file only contains a subset of samples/strains from a group (so the JSON file tells mapping to only use those samples/strains). +``` + +We need to explore whether such a change might need updates to the GN2/GN3 code to ensure code that depends on these dummy files can also use the new format JSON files too. + +Regarding the order of the samples, from the email thread: + +``` +Regarding the order of samples, it can basically be whatever we decide it is. It just needs to stay consistent (like if there are multiple genotype files). It only really affects how it's displayed, and any other genotype files we use for mapping needs to share the same order. +``` + +The ordering of the samples has no bearing on the analysis of the data, i.e. it does not affect the results of computations. + + +### Curation + +``` +But any time new samples are involved, there probably needs to be some explicit confirmation by a curator like Rob (since we want to avoid a situation where a sample/strain just has a typo or somethin and we treat it like a new sample/strain). +``` + +also + +``` +When there's a mix of existing individuals, I think it's usually the case that it's the same group (that is being expanded with new individuals), but anything that involves adding new samples should probably involve some sort of direct/explicit confirmation from a curator like Rob or something. +``` |