diff options
-rw-r--r-- | gn3/computations/partial_correlations.py | 85 | ||||
-rw-r--r-- | gn3/data_helpers.py | 15 | ||||
-rw-r--r-- | gn3/settings.py | 2 | ||||
-rw-r--r-- | tests/unit/computations/partial_correlations_test_data/pcor_rec_blackbox_test.txt | 127 | ||||
-rw-r--r-- | tests/unit/computations/test_partial_correlations.py | 112 | ||||
-rw-r--r-- | tests/unit/test_data_helpers.py | 15 |
6 files changed, 341 insertions, 15 deletions
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index 07dc16d..07a67be 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -5,11 +5,14 @@ It is an attempt to migrate over the partial correlations feature from GeneNetwork1. """ +import math from functools import reduce -from typing import Any, Tuple, Sequence +from typing import Any, Tuple, Union, Sequence from scipy.stats import pearsonr, spearmanr -from gn3.settings import TEXTDIR +import pandas + +from gn3.settings import TEXTDIR, ROUND_TO from gn3.data_helpers import parse_csv_line def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]): @@ -258,10 +261,27 @@ def compute_partial_correlations_fast(# pylint: disable=[R0913, R0914] else fetched_correlations[corr[0]][0:2]) for idx, corr in enumerate(all_correlations)) +def build_data_frame( + xdata: Tuple[float, ...], ydata: Tuple[float, ...], + zdata: Union[ + Tuple[float, ...], + Tuple[Tuple[float, ...], ...]]) -> pandas.DataFrame: + """ + Build a pandas DataFrame object from xdata, ydata and zdata + """ + x_y_df = pandas.DataFrame({"x": xdata, "y": ydata}) + if isinstance(zdata[0], float): + return x_y_df.join(pandas.DataFrame({"z": zdata})) + interm_df = x_y_df.join(pandas.DataFrame( + {"z{}".format(i): val for i, val in enumerate(row)} for row in zdata)) + if interm_df.shape[1] == 3: + return interm_df.rename(columns={"z0": "z"}) + return interm_df + def partial_correlation_matrix( xdata: Tuple[float, ...], ydata: Tuple[float, ...], - zdata: Tuple[float, ...], method: str = "pearsons", - omit_nones: bool = True) -> float: + zdata: Union[Tuple[float, ...], Tuple[Tuple[float, ...], ...]], + method: str = "pearson", omit_nones: bool = True) -> float: """ Computes the partial correlation coefficient using the 'variance-covariance matrix' method @@ -275,8 +295,8 @@ def partial_correlation_matrix( def partial_correlation_recursive( xdata: Tuple[float, ...], ydata: Tuple[float, ...], - zdata: Tuple[float, ...], method: str = "pearsons", - omit_nones: bool = True) -> float: + zdata: Union[Tuple[float, ...], Tuple[Tuple[float, ...], ...]], + method: str = "pearson", omit_nones: bool = True) -> float: """ Computes the partial correlation coefficient using the 'recursive formula' method @@ -286,4 +306,55 @@ def partial_correlation_recursive( GeneNetwork1, specifically the `pcor.rec` function written in the R programming language. """ - return 0 + assert method in ("pearson", "spearman", "kendall") + data = ( + build_data_frame(xdata, ydata, zdata).dropna(axis=0) + if omit_nones else + build_data_frame(xdata, ydata, zdata)) + + if data.shape[1] == 3: # z is a vector, not matrix + fields = { + "rxy": ("x", "y"), + "rxz": ("x", "z"), + "ryz": ("y", "z")} + tdata = { + corr_type: pandas.DataFrame( + {cols[0]: data[cols[0]], + cols[1]: data[cols[1]]}).dropna(axis=0) + for corr_type, cols in fields.items() + } + corrs = { + corr_type: tdata[corr_type][cols[0]].corr( + tdata[corr_type][cols[1]], method=method) + for corr_type, cols in fields.items() + } + return round(( + (corrs["rxy"] - corrs["rxz"] * corrs["ryz"]) / + (math.sqrt(1 - corrs["rxz"]**2) * + math.sqrt(1 - corrs["ryz"]**2))), ROUND_TO) + + remaining_cols = [ + colname for colname, series in data.items() + if colname not in ("x", "y", "z0") + ] + + new_xdata = tuple(data["x"]) + new_ydata = tuple(data["y"]) + zc = tuple( + tuple(row_series[1]) + for row_series in data[remaining_cols].iterrows()) + + rxy_zc = partial_correlation_recursive( + new_xdata, new_ydata, zc, method=method, + omit_nones=omit_nones) + rxz0_zc = partial_correlation_recursive( + new_xdata, tuple(data["z0"]), zc, method=method, + omit_nones=omit_nones) + ryz0_zc = partial_correlation_recursive( + new_ydata, tuple(data["z0"]), zc, method=method, + omit_nones=omit_nones) + + return round( + ((rxy_zc - rxz0_zc * ryz0_zc) /( + math.sqrt(1 - rxz0_zc**2) * math.sqrt(1 - ryz0_zc**2))), + ROUND_TO) diff --git a/gn3/data_helpers.py b/gn3/data_helpers.py index d3f942b..b72fbc5 100644 --- a/gn3/data_helpers.py +++ b/gn3/data_helpers.py @@ -24,6 +24,21 @@ def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...] in reduce( __compute_start_stop__, iterations, tuple())]) +def partition_by(partition_fn, items): + """ + Given a sequence `items`, return a tuple of tuples, each of which contain + the values in `items` partitioned such that the first item in each internal + tuple, when passed to `partition_function` returns True. + + This is an approximation of Clojure's `partition-by` function. + """ + def __partitioner__(accumulator, item): + if partition_fn(item): + return accumulator + ((item,),) + return accumulator[:-1] + (accumulator[-1] + (item,),) + + return reduce(__partitioner__, items, tuple()) + def parse_csv_line( line: str, delimiter: str = ",", quoting: Optional[str] = '"') -> Tuple[str, ...]: diff --git a/gn3/settings.py b/gn3/settings.py index 57c63df..eaf8f23 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -53,3 +53,5 @@ CORS_HEADERS = [ GNSHARE = os.environ.get("GNSHARE", "/gnshare/gn/") TEXTDIR = f"{GNSHARE}/web/ProbeSetFreeze_DataMatrix" + +ROUND_TO = 10 diff --git a/tests/unit/computations/partial_correlations_test_data/pcor_rec_blackbox_test.txt b/tests/unit/computations/partial_correlations_test_data/pcor_rec_blackbox_test.txt new file mode 100644 index 0000000..4b9f68a --- /dev/null +++ b/tests/unit/computations/partial_correlations_test_data/pcor_rec_blackbox_test.txt @@ -0,0 +1,127 @@ +------------------------------------------------------------ +function_name: pcor.rec +count: 18 +x: 39.43254314363, 71.7503248248249, -25.9237891994417, -71.4318438898772, 5.86411766707897, 20.5873795319349, -55.2685840055346, -8.46728710457683, -92.6864624489099, -93.9637628849596, 63.8542262371629, -57.1248637512326, -57.3486962355673, -15.5142558738589, -81.5054637845606, -52.3766598664224, 64.1838387586176, 59.4040210358799 +y: 1.37278931215405, 17.6336284726858, -81.062437640503, -71.6937384102494, 3.27541613951325, -61.2097083125263, -89.2936512362212, 58.8778289500624, -45.9098653402179, 1.99255612678826, -1.74913080409169, -13.9142457395792, 4.65260092169046, 12.5304204411805, 76.9336444325745, -28.4501533955336, -88.7515506241471, 48.6711137462407 +z: -32.2106098290533, 68.8010563142598, 71.8754313886166, 55.6917862966657, -81.8915933836251, 54.5552121941, -28.4821664914489, 84.0448261238635, -73.2388113159686, 57.5857586227357, 35.7404014561325, 80.0396463833749, 5.09524783119559, -91.1972944624722, -32.7593921218067, 73.9581868518144, 64.0648300759494, 45.4317939002067 +method: s +rm: FALSE +result: 0.0933987255899925 +------------------------------------------------------------ +function_name: pcor.rec +count: 16 +x: -54.4209572486579, -45.8115526009351, 33.5523693822324, 1.22122392058372, 3.21844927966595, -60.7970183715224, -83.0410700291395, 50.8907133247703, -22.4564190022647, -34.4268564134836, 63.6943627148867, 49.2209523916245, -17.8922688122839, -36.6935533937067, 52.1462481003255, -64.0409931540489 +y: -68.8048375304788, -35.6251731514931, -89.7739315405488, 43.3957693167031, -75.1052091363817, 95.1878732535988, 26.5241817105561, -3.53294685482979, -39.4754795823246, -24.8853640630841, -38.7773368973285, -21.1217492353171, 91.2531037349254, -65.4770888388157, -31.2665332108736, 93.3961927425116 +method: k +rm: FALSE +result: -0.197321176233531 +input.z.1: -73.6856204457581, -72.1338029019535, -8.49423431791365, 96.4111450593919, 69.9088138528168, -79.6609855722636, 53.6274126265198, 90.9375778865069, -3.22364578023553, 29.0517455432564, -14.7842853330076, 13.1408505141735, -81.6047978587449, -56.5155825112015, 84.1243132483214, 73.0853107757866 +input.z.2: -19.4999775383621, 47.0977590885013, -2.12466698139906, -37.686708336696, 65.1993685867637, -28.32176303491, -7.70075893960893, -90.5746295116842, -36.2292127218097, -9.05124093405902, -11.9998707436025, 49.7369820252061, 58.8602422736585, 30.8995170984417, 96.1009593680501, 66.8684390839189 +input.z.3: -46.8380375299603, -52.8264682274312, -97.0825299620628, 41.9147129170597, 23.6995006911457, -82.9784338828176, -77.9325462412089, -7.79568557627499, 29.9582084640861, 9.06890500336885, -40.2380415704101, 21.167817665264, -86.7719978559762, 39.0303079970181, 63.6500052642077, 97.9667663108557 +input.z.4: 86.9204596150666, -25.8097658865154, -1.28133394755423, 61.1777768004686, 15.653800778091, 11.6941955406219, -93.9451756421477, 78.9077403023839, -46.1673066485673, 96.1867772508413, 26.0938669089228, 60.3195236530155, -32.911190437153, -34.5624244306237, -74.3599003646523, 14.8807656019926 +input.z.5: 98.9267870783806, 36.2660525832325, -68.6460936442018, 86.7504182737321, -88.3716467302293, -29.2315890546888, 44.0522667020559, 21.4357518590987, 76.6611814498901, -42.3452171962708, 76.589828170836, 98.7857358530164, 35.6640399899334, 50.846758717671, -59.3500204384327, -0.00509298406541348 +input.z.6: -57.6999995857477, -82.2626783046871, 46.3180988095701, -24.4982566218823, 34.0653191320598, -77.0602269563824, -17.6961224991828, 80.3584903944284, 81.4667600672692, -61.6284600459039, -66.4861890021712, 6.89575211144984, -41.4117193315178, 30.7175636291504, 28.5187983885407, -13.7681076768786 +input.z.7: -41.7436920106411, 10.0865331478417, -2.38809078000486, -30.7737228926271, -81.4597308635712, 87.8097877837718, -16.6090088896453, 69.4210984744132, -68.126902077347, 41.2628519814461, 66.3250613957644, -4.5269432477653, -41.7136794887483, -18.0790721438825, 83.7037453427911, -53.3618018496782 +input.z.8: 59.7387420479208, -23.4883698634803, 37.5736472196877, -78.1687255483121, -77.1213553380221, -96.9213201664388, -30.2102203480899, -34.6717041451484, -88.6850557755679, 57.2000164538622, -30.3203931543976, -49.4191915728152, 77.6859395205975, -99.9760767444968, 30.1298272795975, -18.8501961529255 +------------------------------------------------------------ +function_name: pcor.rec +count: 12 +x: -67.8204851225019, -67.8738070186228, -50.9799968916923, -56.6631073132157, -97.7544205728918, 97.7393067907542, -96.9981655012816, -77.3418115451932, 56.8798351567239, 20.4237798694521, -64.5462201908231, 98.7875565420836 +y: 42.6409022882581, -91.7236181441694, -17.9849655367434, 37.4724933411926, -41.5671722963452, 77.3905807174742, 56.6901307553053, 6.92465561442077, 56.1498492956161, -28.8950334768742, 47.1627758815885, 16.8313813861459 +z: 25.840808916837, -51.1974649038166, -93.5578513890505, 70.4190298449248, -34.2187709640712, -31.2343313824385, 14.4085955806077, 79.3285765685141, 17.1590864192694, -27.2897508461028, -72.3316723946482, 52.0868620369583 +method: s +rm: TRUE +result: 0.286086890994449 +------------------------------------------------------------ +function_name: pcor.rec +count: 10 +x: -29.0385090280324, -3.96912447176874, -47.6545965764672, 21.2344757281244, -75.0501185189933, -65.4063744004816, 56.9108343217522, 94.6856036316603, -61.1419711727649, -21.2616088334471 +y: -87.6347944606096, -25.0790369231254, 38.3246153593063, 36.459594219923, 15.7592192292213, -23.3778812456876, 70.4604929778725, -84.8090231884271, 34.0032887645066, -88.9591474086046 +z: 48.4383501578122, -8.53351750411093, -31.6805249545723, 69.0797030925751, -19.9694957118481, 78.5090744029731, 23.8751287106425, -88.1592587567866, -14.9336404167116, -23.2747808098793 +method: p +rm: FALSE +result: -0.067405367029641 +------------------------------------------------------------ +function_name: pcor.rec +count: 19 +x: -73.2341736555099, -22.6851521991193, 73.2446681708097, -15.1385780889541, 32.3975902982056, -28.8684471510351, 0.588919408619404, 11.535478848964, 25.0071287155151, 12.3059609439224, -75.7648941129446, 98.2133234385401, -97.0771756023169, 27.9895041603595, 52.5587898679078, 7.36836437135935, -56.7817625124007, 69.9889135081321, 87.2643577400595 +y: 8.29132283106446, -76.8334563821554, 91.4037824142724, -51.617310475558, 86.0677725635469, 72.0950326882303, -7.40185175091028, -95.9184793289751, -59.8128827754408, -86.7349065840244, 98.207059269771, 27.2509966976941, -87.9419495817274, 21.4868438895792, -22.3735796753317, -74.0092439111322, -9.10649765282869, 59.1995907947421, 53.423271374777 +method: s +rm: FALSE +result: 0.814251360213814 +input.z.1: -86.7573534138501, -78.3108811359853, -90.7141804695129, 20.8892675116658, -91.2731793243438, 12.4221083242446, -48.4068923164159, 73.3695236034691, 50.0449348706752, -35.9727113973349, 26.3153513427824, -82.5342622119933, -96.8505010008812, -34.9071862176061, 27.9058467131108, 42.9564985912293, -45.2088835183531, 62.9711722023785, 74.5247499551624 +input.z.2: -79.6970147173852, 67.8865933325142, -92.2662578523159, 25.5234225187451, 50.5748279858381, 86.7062113247812, 75.1391254365444, -43.7841295730323, 66.8446023017168, 55.0580637063831, -65.2654378209263, 23.6318178009242, 93.2108672801405, -68.0372172966599, 79.0532948914915, -94.364460837096, -16.6068505961448, -61.8612750899047, 96.0427426267415 +input.z.3: 40.9217507578433, 83.3804386667907, -62.5007103662938, -7.85297965630889, -76.5989664942026, -22.2986672539264, -45.4036694485694, -21.4061794336885, 69.5465714670718, -25.5713499616832, 36.4734465721995, 36.9375838432461, 32.6723072677851, 70.6644219346344, 49.876768887043, -74.5363045018166, -33.0244093667716, 10.4065840598196, -1.73172312788665 +input.z.4: -10.7907691039145, -17.38386224024, 43.5880040284246, 16.086917091161, -16.9488569255918, -14.011207735166, 45.4824588261545, 60.7187359593809, -5.2096379455179, 85.9155170619488, 72.9566072579473, -9.66432858258486, -6.13841973245144, -79.1190144140273, 91.5309552568942, 98.9748722407967, -50.1585202291608, 91.2043817806989, 66.6650855448097 +input.z.5: -96.3273600675166, -69.5332479197532, -82.4410750996321, -97.9709683917463, 39.1531039960682, -43.9597781281918, -59.7700778860599, 43.593360343948, 20.8425134420395, 80.6880735326558, -29.0332242380828, 69.8170928284526, -87.8374603576958, 58.8284231256694, 16.4075888693333, 25.2064376138151, -14.7603216115385, -76.6239028889686, -6.44092066213489 +input.z.6: 9.45433415472507, -15.5354761518538, -12.1865042485297, -37.5650288071483, -64.8326196707785, -85.5531277600676, 71.388221764937, 69.2387981340289, -35.1823683828115, -53.9007833693177, -58.2558114547282, 99.7826437931508, -65.5693722888827, 17.514361301437, -5.62593601644039, -67.5470025744289, 34.4595825299621, -7.97500484623015, -8.5614780895412 +input.z.7: -7.37124560400844, -63.2605917751789, -42.9963525850326, 93.0190080311149, -44.6478553116322, -17.311972938478, -4.46714116260409, -27.6207268238068, -45.3168144449592, 53.9484368637204, 68.1760023348033, -75.7576355710626, -17.4947005696595, -63.5428718291223, -88.4344877209514, -80.6289754342288, -73.8872450776398, 0.910355197265744, 52.926555974409 +input.z.8: 19.0888123121113, 40.6167997512966, -46.38990778476, -70.8545834757388, -36.2336438614875, -40.9333631396294, -56.2934595625848, 73.1570843607187, -60.2570091374218, -5.13700363226235, -34.0326094534248, 23.2543344609439, 20.66769907251, 53.0113300774246, -72.2416108474135, -4.46344027295709, 21.0111503489316, -35.0955804809928, 91.8279080186039 +input.z.9: 28.0409298837185, 56.3033602200449, 59.1395729221404, -34.2144967522472, 94.9237512424588, 45.0175272766501, -53.4253435675055, -4.01845825836062, -57.4233544059098, -68.6469163745642, 71.3221471291035, -24.5776301715523, -38.2390833459795, 41.3620623759925, -71.5135616715997, -88.0712532903999, 70.1913930010051, -28.7702376022935, -63.8502344489098 +input.z.10: -21.9328049104661, 17.6567534916103, 14.6377096418291, 84.4626953359693, 22.6560408715159, -17.2372563742101, -22.671253234148, 86.8056834675372, -39.7697510197759, -62.9925069399178, 30.6044021155685, 82.5929053127766, 25.9118379559368, -49.5282203890383, 82.6023631729186, 80.466779274866, 66.4673592895269, 75.8482501842082, 27.1312859840691 +input.z.11: 31.2323299236596, 79.6605288051069, 91.6790826246142, 55.398157518357, 3.95502164028585, -37.2159007005394, -8.49848613142967, 48.5803676769137, -23.2938712928444, -26.5643768478185, -79.5425849035382, -7.04444167204201, -23.03784978576, -99.916556943208, 32.1567460894585, -45.6349844578654, -47.5025039631873, -21.7059692367911, -4.12281164899468 +input.z.12: 66.0603046417236, 82.6507363468409, 7.85638866946101, -94.7555633727461, 88.32175033167, -78.9715636055917, 89.9744458962232, 84.1503497678787, 48.0024141259491, 49.9335964210331, 41.1366487853229, 28.8708603475243, 97.2897793632001, -2.92221619747579, 43.6754964292049, -80.5523125454783, -6.66788783855736, -39.199779368937, 5.29502532444894 +input.z.13: 92.9124710150063, -66.4420946501195, 97.3967826925218, -5.97171736881137, 96.4472725521773, 85.0369708612561, -76.7937204800546, 30.6782084517181, -36.8599112611264, 33.9056268334389, 44.4440166000277, -4.0401806589216, -61.0703659243882, 75.8796263951808, -38.2074842229486, -46.0064077749848, -13.9765918254852, -8.38187239132822, -74.8116604983807 +input.z.14: -85.5263015255332, -5.4057159461081, -19.4283518008888, 86.0516140703112, -36.5200987551361, 30.0780310761184, -29.0378153789788, -78.5522460471839, 97.8414277546108, -5.13621103018522, 93.8663412816823, 2.06638658419251, 43.4202517382801, -91.4164004381746, 71.6018950566649, 28.6870362237096, -35.0583405699581, -93.1831272318959, 58.4984787739813 +input.z.15: -94.9607445392758, -82.2086645755917, -16.0494496114552, 27.903659036383, -7.8359093517065, -55.5805193260312, 41.416268190369, 50.9919991251081, -94.1964072175324, -7.3187300004065, -83.2794434390962, -49.7139678802341, 74.39328096807, -55.0389611162245, 97.8410188574344, 20.435424009338, 19.3925447762012, 89.979507541284, 25.4206706769764 +------------------------------------------------------------ +function_name: pcor.rec +count: 15 +x: 19.1210152581334, -90.7481685746461, 94.6057046763599, -61.290800338611, -37.1947812382132, 54.4369328767061, 55.7387878187001, 17.9015715140849, 31.0679465532303, 63.1330410018563, 52.1574283950031, 65.4397922568023, 91.4413943421096, 99.5237410068512, -74.9028561171144 +y: -81.3996239565313, -13.6488320305943, -5.80292693339288, -67.2151657752693, -36.4809868391603, 99.4002115912735, 1.03691508993506, 43.7332965899259, 0.0288900453597307, 85.2870703209192, -66.8704317882657, -25.5248280707747, -81.3251432962716, -23.7031722441316, -93.2608364615589 +z: 12.8577758558095, -22.8082065470517, -48.3178524300456, -83.7297586258501, 42.5842776428908, 32.5472959317267, 55.6511440314353, 3.36264544166625, 12.7249016426504, 9.32536264881492, -36.669804668054, 46.4725097641349, 0.145619735121727, -43.9962794072926, -13.7548735830933 +method: p +rm: FALSE +result: 0.252991642308749 +------------------------------------------------------------ +function_name: pcor.rec +count: 16 +x: -2.87382756359875, -30.5810853838921, -85.2656959090382, -57.8431877773255, 41.8040546122938, -4.51440489850938, -77.4139664601535, -78.0852025840431, 76.9753176253289, -39.8538885638118, 61.5029000211507, 25.2118390519172, -91.6211612056941, -13.2199198473245, -29.4821108691394, -16.201650025323 +y: 28.135217865929, -35.8999697491527, 65.1573278941214, 77.1730968728662, -62.5566221773624, 72.1754947211593, 65.3400977142155, 56.2912649475038, -4.19798144139349, 42.1437229495496, 40.1672286447138, 32.6568186283112, -14.3435334321111, -90.3321508318186, 86.114533431828, 12.6757733058184 +method: k +rm: FALSE +result: -0.253803690496807 +input.z.1: -4.89483792334795, 73.1407755985856, 98.5393163748085, 73.0779736768454, -28.1159524340183, 54.082156997174, 86.8587783072144, -79.4726002961397, -73.4560188837349, 27.0900287199765, -86.8956328835338, 96.6792982537299, 35.2100874297321, -60.1671026088297, -72.4173011258245, 4.11061025224626 +input.z.2: -34.0709479991347, 76.5315871220082, 43.3915213681757, 43.4308610390872, 76.321689831093, 46.0346157196909, -0.73040034621954, -41.1897071637213, 47.0153306610882, 30.9888483956456, 80.7444188278168, -52.3551567457616, -14.2577795311809, -72.9259013198316, 75.1937944442034, 88.6314859613776 +input.z.3: 30.184931633994, 25.1940117683262, 43.8190793152899, 44.0422522835433, -87.6267918385565, -33.915796270594, 78.6654303781688, -24.5026275049895, -8.16088672727346, 70.0028923340142, 41.1979887168854, 82.3699820786715, 6.11956720240414, 80.0185721367598, -91.9803010765463, -24.4723544921726 +input.z.4: 5.89864449575543, 83.7068512570113, 15.6911447178572, -2.80351233668625, -65.6391194555908, 14.5310245454311, 68.1848946027458, -16.2310080137104, 83.4109206683934, 99.3864068761468, 78.5966154187918, 92.34071765095, -6.43716314807534, -2.70818201825023, 1.35397682897747, 79.0387979708612 +input.z.5: -19.5283096283674, 33.9582368265837, -74.1643586196005, -33.1158923916519, -78.7567632738501, -95.0876677874476, -33.7155544664711, -76.497556315735, -45.8616196643561, 41.3556576240808, 28.7840550299734, -33.4265344310552, 61.1726825591177, 59.0956036001444, -79.8228676896542, -57.7531451825052 +input.z.6: 53.0482107773423, -14.0646053478122, 52.7205745689571, 41.7561090085655, 92.3762793187052, 76.5466017182916, -69.4168658461422, 23.8598483614624, 19.6098514366895, 73.5599991399795, 87.6351232174784, -6.20425450615585, 32.7709008008242, -1.08212679624557, 6.18844078853726, -63.7467356864363 +input.z.7: -48.3841958455741, -50.1150219701231, -15.5646015889943, -79.2570824734867, -19.6070665959269, -49.5304220821708, 53.2172558829188, 25.2054167445749, 18.3721250854433, -68.8817646354437, 18.6557998415083, 57.3686588555574, 28.6675109528005, -43.0730409454554, -4.50756121426821, -70.1776389963925 +input.z.8: -10.7248624786735, 76.593514252454, 28.891241364181, -0.018490431830287, -28.4583521541208, -33.1800576299429, -1.8756405916065, -75.8051007986069, 49.2299810517579, -1.69429946690798, -25.1867124810815, 69.2886833101511, -99.62286981754, 89.9874444585294, 30.4199578706175, -97.7524971589446 +------------------------------------------------------------ +function_name: pcor.rec +count: 12 +x: 15.6129707582295, -54.5765130314976, -62.1533300727606, -83.7492469232529, -75.3303173929453, -2.68199793063104, 60.322671988979, -22.6404232904315, 90.9686226863414, 75.999130262062, -84.101656684652, 97.3987545352429 +y: -40.038321306929, -38.4067192208022, -54.87005636096, -32.2858245577663, 86.0376148484647, -90.7305406406522, -19.5302437990904, 43.8807010184973, -33.8322110939771, 12.6573502551764, 17.2774605453014, -27.0399304572493 +z: 54.1844798717648, 76.2015995569527, -19.2299467977136, -96.2915348820388, -98.1550739146769, 4.9422649666667, 2.88800583221018, -21.8325999099761, -20.0546327978373, 50.5448115523905, -42.4167810473591, 44.39243148081 +method: p +rm: FALSE +result: 0.0258928698851513 +------------------------------------------------------------ +function_name: pcor.rec +count: 17 +x: -96.0152605548501, -2.73632658645511, 20.4153073020279, 27.1719563752413, 8.59699491411448, -14.5893115084618, -33.9465518947691, 39.2609817907214, -99.0190614480525, 62.5098573975265, 35.9342403709888, -30.6088973302394, 44.7166417259723, -93.9578357152641, 32.6512531843036, -70.8288522902876, 73.4201280400157 +y: -46.9669208861887, -36.2372543197125, 4.4616396073252, 38.7583258096129, 44.8999984189868, -31.204593880102, 35.038438020274, 95.6365440040827, -45.6151684280485, -56.2252663075924, -21.9110350590199, -38.5250215418637, 10.3221551980823, 1.9145913887769, -47.8505118750036, -54.9174412619323, 23.1537543702871 +method: k +rm: TRUE +result: 0.227414201271952 +input.z.1: -91.8689756188542, -18.8268605154008, 59.9834852851927, 39.8389992304146, -80.8505142573267, -18.3535169810057, -2.34811864793301, 24.1830998566002, -22.6586456876248, 68.8272529747337, -13.866110611707, 43.5314364265651, -67.6513452082872, -97.7143038064241, 32.9879198689014, -24.4230728130788, 31.1623202171177 +input.z.2: 39.7535849362612, 26.8228638451546, -5.83359580487013, 75.0341654755175, 75.1568455714732, -19.646413391456, -38.4626689366996, -52.5916866026819, 92.6454027183354, -6.59558856859803, 15.2207531500608, -59.9319984205067, 67.6257754210383, 62.5032463110983, -13.8798008672893, -74.1318658925593, 57.3057317174971 +input.z.3: -50.2737354487181, 67.5364006776363, -24.2570764385164, 48.7902171444148, 11.5434463135898, 67.6300485618412, 52.8123706579208, -10.3952446486801, -55.1109313499182, 2.81554046086967, 3.11438394710422, 24.7719271574169, 12.2765214182436, -18.9550118986517, -43.5645492281765, 76.3299115467817, 72.8231159504503 +input.z.4: 79.1319472249597, -64.6508545149118, -25.6458255462348, -67.6172162871808, -1.9231891259551, -87.36885599792, -51.9834969658405, 81.4505566377193, 90.3312657494098, -83.9177244342864, -56.4620361663401, -66.6789827868342, 87.5367146451026, 18.9934492576867, -81.9155515171587, 71.6108248569071, -58.1476961728185 +input.z.5: 96.9413920771331, 47.8810616768897, -0.438442826271057, 3.20979873649776, 20.6230421550572, 89.096216019243, -82.9677095636725, 91.9871860649437, 74.0136182866991, 38.2246119901538, -10.429418226704, 89.3207651097327, -13.5560743510723, 4.01417766697705, -34.4030613079667, 95.2031007036567, -43.0174002423882 +input.z.6: 80.8942574542016, -39.6212201565504, 14.2572369426489, -78.1328707933426, -96.1618753615767, 61.9455046951771, -42.8125618491322, 45.2630438376218, -26.2891847174615, -37.7531661652029, 71.7649383936077, -76.2287222314626, 86.0407523345202, 83.4333196282387, 11.858429107815, 90.0938509497792, 27.1023083478212 +input.z.7: -68.578722840175, -72.2220734227449, 83.0359090585262, 44.5527241565287, 19.0210536122322, 19.7943818289787, 42.1044544316828, -10.6518527492881, 85.2387520018965, -89.5295495167375, -14.2881830222905, 15.0138720404357, 73.6910053994507, 95.9490245208144, 29.4783721212298, -32.2806132957339, 98.4265907201916 +input.z.8: -63.3846850600094, -75.9456290863454, 98.109992640093, -8.30751904286444, -65.3464913833886, 57.9990320838988, -97.9716447182, 48.3666017185897, -11.2887284718454, 38.4293566923589, -4.15021106600761, -6.2844387255609, -20.7505342550576, -56.9003260694444, 96.397353708744, -90.6195733696222, -40.7526909839362 +input.z.9: 81.5076497849077, 13.3879841770977, -66.9347247574478, -4.78587108664215, 30.7318323291838, -55.0735270604491, 9.40470518544316, 66.8840528000146, -31.3702027779073, 69.8431192431599, -86.8119182996452, 37.1748886071146, -39.0971344895661, -6.33791824802756, -4.74258312024176, -37.5578073319048, 16.0260049626231 +------------------------------------------------------------ +function_name: pcor.rec +count: 15 +x: 13.3217854890972, 3.74524784274399, 42.3860765993595, 70.1574267819524, -36.6772301495075, 43.096883688122, 11.656632553786, 49.6097341645509, 35.055069392547, -34.170615952462, -12.3891159426421, 95.595167670399, 27.9552156571299, 60.3556469082832, -64.5174876786768 +y: 93.0693128611892, 94.2700367886573, -59.7466928884387, 75.3644049633294, 38.714852463454, -25.7440742570907, -26.9153070170432, -39.7653248626739, -2.73380884900689, -51.2801296077669, 55.7282803580165, 9.4015384092927, -56.5411465242505, -14.9695897940546, -58.2891712896526 +z: 48.0578206945211, -49.0533413831145, -62.5287163536996, 77.8333532158285, 70.9992234129459, -80.4808055050671, 63.4501236490905, 1.48282377049327, -52.8766040224582, -60.6905648484826, 75.8240396622568, -26.8130346667022, -66.6958931367844, -14.8776702582836, 57.7621683478355 +method: p +rm: FALSE +result: 0.223987436768198 +------------------------------------------------------------ diff --git a/tests/unit/computations/test_partial_correlations.py b/tests/unit/computations/test_partial_correlations.py index 83cb9d9..7cf8cd8 100644 --- a/tests/unit/computations/test_partial_correlations.py +++ b/tests/unit/computations/test_partial_correlations.py @@ -1,10 +1,18 @@ """Module contains tests for gn3.partial_correlations""" import csv -from unittest import TestCase, skip +from unittest import TestCase + +import pandas + +from gn3.settings import ROUND_TO +from gn3.function_helpers import compose +from gn3.data_helpers import partition_by + from gn3.computations.partial_correlations import ( fix_samples, control_samples, + build_data_frame, dictify_by_samples, tissue_correlation, find_identical_traits, @@ -112,9 +120,82 @@ def parse_test_data_csv(filename): "z": __str__to_tuple(line, "z"), "method": methods[line["method"]], "rm": line["rm"] == "TRUE", - "result": float(line["result"]) + "result": round(float(line["result"]), ROUND_TO) } for line in lines) +def parse_method(key_value): + """Parse the partial correlation method""" + key, value = key_value + if key == "method": + methods_dict = {"p": "pearson", "k": "kendall", "s": "spearman"} + return (key, methods_dict[value]) + return key_value + +def parse_count(key_value): + """Parse the value of count into an integer""" + key, value = key_value + if key == "count": + return (key, int(value)) + return key_value + +def parse_xyz(key_value): + """Parse the values of x, y, and z* items into sequences of floats""" + key, value = key_value + if (key in ("x", "y", "z")) or key.startswith("input.z"): + return ( + key.replace("input", "").replace(".", ""), + tuple(float(val.strip("\n\t ")) for val in value.split(","))) + return key_value + +def parse_rm(key_value): + """Parse the rm value into a python True/False value.""" + key, value = key_value + if key == "rm": + return (key, value == "TRUE") + return key_value + +def parse_result(key_value): + """Parse the result into a float value.""" + key, value = key_value + if key == "result": + return (key, float(value)) + return key_value + +parse_for_rec = compose( + parse_result, + parse_rm, + parse_xyz, + parse_count, + parse_method, + lambda k_v: tuple(item.strip("\n\t ") for item in k_v), + lambda s: s.split(":")) + +def parse_input_line(line, parser_function): + return tuple( + parser_function(item) for item in line if not item.startswith("------")) + +def merge_z(item): + without_z = { + key: val for key, val in item.items() if not key.startswith("z")} + return { + **without_z, + "z": item.get( + "z", + tuple(val for key, val in item.items() if key.startswith("z")))} + +def parse_input(lines, parser_function): + return tuple( + merge_z(dict(item)) + for item in (parse_input_line(line, parser_function) for line in lines) + if len(item) != 0) + +def parse_test_data(filename): + with open("pcor_rec_blackbox_attempt.txt", newline="\n") as fl: + input_lines = partition_by( + lambda s: s.startswith("------"), + (line.strip("\n\t ") for line in fl.readlines())) + + return parse_input(input_lines, parse_for_rec) class TestPartialCorrelations(TestCase): """Class for testing partial correlations computation functions""" @@ -298,7 +379,25 @@ class TestPartialCorrelations(TestCase): ("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l")), (0, 4, 8, 10)) - @skip + def test_build_data_frame(self): + """ + Check that the function builds the correct data frame. + """ + for xdata, ydata, zdata, expected in ( + ((0.1, 1.1, 2.1), (2.1, 3.1, 4.1), (5.1, 6.1 ,7.1), + pandas.DataFrame({ + "x": (0.1, 1.1, 2.1), "y": (2.1, 3.1, 4.1), + "z": (5.1, 6.1 ,7.1)})), + ((0.1, 1.1, 2.1), (2.1, 3.1, 4.1), + ((5.1, 6.1 ,7.1), (5.2, 6.2, 7.2), (5.3, 6.3, 7.3)), + pandas.DataFrame({ + "x": (0.1, 1.1, 2.1), "y": (2.1, 3.1, 4.1), + "z0": (5.1, 5.2 ,5.3), "z1": (6.1, 6.2 ,6.3), + "z2": (7.1, 7.2 ,7.3)}))): + with self.subTest(xdata=xdata, ydata=ydata, zdata=zdata): + self.assertTrue( + build_data_frame(xdata, ydata, zdata).equals(expected)) + def test_partial_correlation_matrix(self): """ Test that `partial_correlation_matrix` computes the appropriate @@ -316,15 +415,14 @@ class TestPartialCorrelations(TestCase): method=sample["method"], omit_nones=sample["rm"]), sample["result"]) - @skip def test_partial_correlation_recursive(self): """ Test that `partial_correlation_recursive` computes the appropriate correlation value. """ - for sample in parse_test_data_csv( + for sample in parse_test_data( ("tests/unit/computations/partial_correlations_test_data/" - "pcor_rec_blackbox_test.csv")): + "pcor_rec_blackbox_test.txt")): with self.subTest( xdata=sample["x"], ydata=sample["y"], zdata=sample["z"], method=sample["method"], omit_nones=sample["rm"]): @@ -332,4 +430,4 @@ class TestPartialCorrelations(TestCase): partial_correlation_recursive( sample["x"], sample["y"], sample["z"], method=sample["method"], omit_nones=sample["rm"]), - sample["result"]) + round(sample["result"], ROUND_TO)) diff --git a/tests/unit/test_data_helpers.py b/tests/unit/test_data_helpers.py index 39aea45..3f76344 100644 --- a/tests/unit/test_data_helpers.py +++ b/tests/unit/test_data_helpers.py @@ -4,7 +4,7 @@ Test functions in gn3.data_helpers from unittest import TestCase -from gn3.data_helpers import partition_all, parse_csv_line +from gn3.data_helpers import partition_by, partition_all, parse_csv_line class TestDataHelpers(TestCase): """ @@ -59,3 +59,16 @@ class TestDataHelpers(TestCase): parse_csv_line( line=line, delimiter=delimiter, quoting=quoting), expected) + + def test_partition_by(self): + for part_fn, items, expected in ( + (lambda s: s.startswith("----"), + ("------", "a", "b", "-----", "c", "----", "d", "e", "---", + "f"), + (("------", "a", "b"), ("-----", "c"), + ("----", "d", "e", "---", "f"))), + (lambda x: (x % 2) == 0, + (0, 1, 3, 2, 4, 5, 7, 6, 9, 1), + ((0, 1, 3), (2,), (4, 5, 7), (6, 9, 1))),): + with self.subTest(items=items): + self.assertEqual(partition_by(part_fn, items), expected) |