diff options
author | Pjotr Prins | 2023-03-07 10:24:10 +0100 |
---|---|---|
committer | Pjotr Prins | 2023-03-07 10:24:10 +0100 |
commit | c2d9aa4a000da885e26f601cb641725f2b052d10 (patch) | |
tree | 68dae0a7cdd1924460ff066d35142e0e2135306c /issues | |
parent | 52f430388fbaf38dd1681dcc18bbfd1f90639af2 (diff) | |
download | gn-gemtext-c2d9aa4a000da885e26f601cb641725f2b052d10.tar.gz |
mariadb fixes/speedup
Diffstat (limited to 'issues')
-rw-r--r-- | issues/correlation_speed_issue.gmi | 43 | ||||
-rw-r--r-- | issues/systems/mariadb/move-to-innodb.gmi | 74 |
2 files changed, 87 insertions, 30 deletions
diff --git a/issues/correlation_speed_issue.gmi b/issues/correlation_speed_issue.gmi index 714646a..9b2bff6 100644 --- a/issues/correlation_speed_issue.gmi +++ b/issues/correlation_speed_issue.gmi @@ -15,3 +15,46 @@ Some correlations (it specifically seems to be ones done against ProbeSet databa After looking into this, the cause seemed to be a specific query*, which leads me to think it might be related to the recent DB changes. All the actual calculations only take a couple seconds, but then this query takes like ~80 seconds. * https://github.com/genenetwork/genenetwork2/blob/98602d24c64ffafe2c4af150236b72f77709f8de/wqflask/wqflask/correlation/rust_correlation.py#L23-L55 + +The suspect is the mixing of innodb and myisam tables after a recent migration of tables. ProbeSet is one I haven't converted yet because it has a textual search field. + +## Info + +In the /var/log/mysql/mysql-slow.log slow queries started to appear after converting more tables to innodb. Such as: + +``` +SELECT ProbeSet.Name,ProbeSet.Chr,ProbeSet.Mb, + ProbeSet.Symbol,ProbeSetXRef.mean, + CONCAT_WS('; ', ProbeSet.description, ProbeSet.Probe_Target_Description) AS description, + ProbeSetXRef.additive,ProbeSetXRef.LRS,Geno.Chr, Geno.Mb + FROM ProbeSet INNER JOIN ProbeSetXRef + ON ProbeSet.Id=ProbeSetXRef.ProbeSetId + INNER JOIN Geno + ON ProbeSetXRef.Locus = Geno.Name + INNER JOIN Species + ON Geno.SpeciesId = Species.Id + WHERE ProbeSet.Name in ('10606948', '10434845', '10548978', '10586110', '10429520', '10477061', '10434719', '10467400', '10414360', '10466947', '10569319', '10363000', '10583056', '10478525', '10419038', '10583044', '10540897', '10456392', '10590909', '10347741', '10483381', '10560131', '10607467', '10539472', '10493990', '10454015', '10403352', '10457640', '10468869', '10449940', '10364675', '10461277', '10578427', '10585010', '10497817', '10514763', '10528207', '10430883', '10567355', '10547758', '10557459', '10512145', '10593174', '10483410', '10554240', '10349904', '10419223', '10548375', '10498076', '10463027', '10430892', '10578649', '10604528', '10600210', '10506454', '10570280', '10402353', '10500545', '10363231', '10446596', '10574498', '10458016', '10414262', '10452639', '10404077', '10372421', '10584787', '10578448', '10389231', '10419156', '10545895', '10366707', '10338580', '10598175', '10569890', '10473367', '10459530', '10559509', '10582837', '10453939', '10546929', '10425808', '10477012', '10560045', '10520371', '10577645', '10352439', '10443021', '10570291', '10513529', '10469923', '10584334', '10419154', '10508986', '10469322', '10388718', '10541301', '10500547', '10428018', '10490923', '10585331', '10474129', '10424245', '10567335', '10513818', '10397145', '10388869', '10562500', '10594812', '10540273', '10395277', '10379044', '10558134', '10407072', '10506301', '10441787', '10452030') AND + Species.Name = 'mouse' AND + ProbeSetXRef.ProbeSetFreezeId IN ( + SELECT ProbeSetFreeze.Id + FROM ProbeSetFreeze WHERE ProbeSetFreeze.Name = 'EPFLMouseLiverHFDRMA0818'); +``` + +also, the standard test-gsearch query has started to time out - and that shows up in sheepdog as + +``` +2023-03-07 07:01:42 +0000 FAIL tux01 mariadb-gsearch +``` + +I created a new test script that can be run as + +``` +tux01:/export/backup/scripts/tux01$ time mysql -u webqtlout -p db_webqtl < ../shared/sql/test-regression1.sql +real 0m29.855s +``` + +The good news is that after converting ProbeSet to innodb time changed to + +``` +real 0m0.012s +``` diff --git a/issues/systems/mariadb/move-to-innodb.gmi b/issues/systems/mariadb/move-to-innodb.gmi index 72520c5..d846ade 100644 --- a/issues/systems/mariadb/move-to-innodb.gmi +++ b/issues/systems/mariadb/move-to-innodb.gmi @@ -470,14 +470,11 @@ Or ``` SHOW CREATE TABLE ProbeSet; - -E.g. - - FULLTEXT KEY `SEARCH_GENE_IDX` (`Symbol`,`alias`), - FULLTEXT KEY `SEARCH_FULL_IDX` (`Name`,`description`,`Symbol`,`alias`,`GenbankId`,`UniGeneId`,`Probe_Target_Description`), - FULLTEXT KEY `RefSeq_FULL_IDX` (`RefSeq_TranscriptId`) + FULLTEXT KEY 'SEARCH_GENE_IDX' ('Symbol','alias'), + FULLTEXT KEY 'SEARCH_FULL_IDX' ('Name','description','Symbol','alias','GenbankId','UniGeneId','Probe_Target_Description'), + FULLTEXT KEY 'RefSeq_FULL_IDX' ('RefSeq_TranscriptId') +ENGINE=MyISAM AUTO_INCREMENT=12806592 DEFAULT CHARSET=latin1 ``` - To see all: ``` @@ -501,8 +498,11 @@ REPAIR TABLE pubmedsearch QUICK; Note that GN1 search only appears to use ProbeSet. Reindexing takes about 10 minutes on Tux02. -After updating to ProbeSet to innodb the following query failed +``` +ALTER TABLE ProbeSet ENGINE = InnoDB; +``` +After updating to ProbeSet to innodb the following query failed ``` SELECT distinct ProbeSet.Name as TNAME, 0 as thistable, ProbeSetXRef.Mean as TMEAN, ProbeSetXRef.LRS as TLRS, ProbeSetXRef.PVALUE as TPVALUE, ProbeSet.Chr_num as TCHR_NUM, ProbeSet.Mb as TMB, ProbeSet.Symbol as TSYMBOL, ProbeSet.name_num as TNAME_NUM FROM ProbeSetXRef, ProbeSet WHERE ((MATCH (ProbeSet.Name, ProbeSet.description, ProbeSet.symbol, alias, GenbankId, UniGeneId, Probe_Target_Description) AGAINST ('shh' IN BOOLEAN MODE)) or (MATCH (ProbeSet.symbol) AGAINST ('"Hx" "ShhNC" "9530036O11Rik" "Dsh" "Hhg1" "Hxl3" "M100081"' IN BOOLEAN MODE))) and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = 112 ORDER BY ProbeSet.symbol ASC; @@ -531,10 +531,10 @@ CREATE FULLTEXT INDEX ft_ProbeSet_Symbol ON ProbeSet(Symbol); Now we have ``` -FULLTEXT KEY `SEARCH_GENE_IDX` (`Symbol`,`alias`), -FULLTEXT KEY `SEARCH_FULL_IDX` (`Name`,`description`,`Symbol`,`alias`,`GenbankId`,`UniGeneId`,`Probe_Target_Description`), -FULLTEXT KEY `RefSeq_FULL_IDX` (`RefSeq_TranscriptId`), -FULLTEXT KEY `ft_ProbeSet_Symbol` (`Symbol`) +FULLTEXT KEY 'SEARCH_GENE_IDX' ('Symbol','alias'), +FULLTEXT KEY 'SEARCH_FULL_IDX' ('Name','description','Symbol','alias','GenbankId','UniGeneId','Probe_Target_Description'), +FULLTEXT KEY 'RefSeq_FULL_IDX' ('RefSeq_TranscriptId'), +FULLTEXT KEY 'ft_ProbeSet_Symbol' ('Symbol') ``` and the query works. @@ -597,24 +597,24 @@ select count(*) from ProbeSetFreeze limit 2; | 931 | +----------+ SHOW CREATE TABLE ProbeSetFreeze; -CREATE TABLE `ProbeSetFreeze` ( - `Id` smallint(5) unsigned NOT NULL AUTO_INCREMENT, - `ProbeFreezeId` smallint(5) unsigned NOT NULL DEFAULT 0, - `AvgID` smallint(5) unsigned NOT NULL DEFAULT 0, - `Name` varchar(40) DEFAULT NULL, - `Name2` varchar(100) NOT NULL DEFAULT '', - `FullName` varchar(100) NOT NULL DEFAULT '', - `ShortName` varchar(100) NOT NULL DEFAULT '', - `CreateTime` date NOT NULL DEFAULT '0000-00-00', - `OrderList` int(5) DEFAULT NULL, - `public` tinyint(4) NOT NULL DEFAULT 0, - `confidentiality` tinyint(4) NOT NULL DEFAULT 0, - `AuthorisedUsers` varchar(300) NOT NULL, - `DataScale` varchar(20) NOT NULL DEFAULT 'log2', - PRIMARY KEY (`Id`), - UNIQUE KEY `FullName` (`FullName`), - UNIQUE KEY `Name` (`Name`), - KEY `NameIndex` (`Name2`) +CREATE TABLE 'ProbeSetFreeze' ( + 'Id' smallint(5) unsigned NOT NULL AUTO_INCREMENT, + 'ProbeFreezeId' smallint(5) unsigned NOT NULL DEFAULT 0, + 'AvgID' smallint(5) unsigned NOT NULL DEFAULT 0, + 'Name' varchar(40) DEFAULT NULL, + 'Name2' varchar(100) NOT NULL DEFAULT '', + 'FullName' varchar(100) NOT NULL DEFAULT '', + 'ShortName' varchar(100) NOT NULL DEFAULT '', + 'CreateTime' date NOT NULL DEFAULT '0000-00-00', + 'OrderList' int(5) DEFAULT NULL, + 'public' tinyint(4) NOT NULL DEFAULT 0, + 'confidentiality' tinyint(4) NOT NULL DEFAULT 0, + 'AuthorisedUsers' varchar(300) NOT NULL, + 'DataScale' varchar(20) NOT NULL DEFAULT 'log2', + PRIMARY KEY ('Id'), + UNIQUE KEY 'FullName' ('FullName'), + UNIQUE KEY 'Name' ('Name'), + KEY 'NameIndex' ('Name2') ) ENGINE=MyISAM AUTO_INCREMENT=1054 DEFAULT CHARSET=latin1 ``` @@ -886,12 +886,26 @@ GeneRIF_BASIC pubmedsearch ``` +(probably should not try the following) + ``` REPAIR TABLE ProbeSet QUICK; REPAIR TABLE GeneRIF_BASIC QUICK; REPAIR TABLE pubmedsearch QUICK; ``` +after a repair I had to + +``` +root@tux01:/var/lib/mysql/db_webqtl# myisamchk ProbeSet -r +- recovering (with sort) MyISAM-table 'ProbeSet' +Data records: 0 +- Fixing index 1 +- Fixing index 2 +- Fixing index 3 +etc +``` + Note that GN1 search only appears to use ProbeSet. Reindexing takes about 10 minutes on Tux02. |