aboutsummaryrefslogtreecommitdiff
path: root/src/logistic.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/logistic.cpp')
-rw-r--r--src/logistic.cpp783
1 files changed, 783 insertions, 0 deletions
diff --git a/src/logistic.cpp b/src/logistic.cpp
new file mode 100644
index 0000000..1b47946
--- /dev/null
+++ b/src/logistic.cpp
@@ -0,0 +1,783 @@
+#include <stdio.h>
+#include <math.h>
+#include <gsl/gsl_matrix.h>
+#include <gsl/gsl_rng.h>
+#include <gsl/gsl_multimin.h>
+#include <gsl/gsl_sf.h>
+#include <gsl/gsl_linalg.h>
+#include "logistic.h"
+
+// I need to bundle all the data that goes to the function to optimze together.
+typedef struct{
+ gsl_matrix_int *X;
+ gsl_vector_int *nlev;
+ gsl_vector *y;
+ gsl_matrix *Xc; // continuous covariates Matrix Nobs x Kc (NULL if not used)
+ double lambdaL1;
+ double lambdaL2;
+}fix_parm_mixed_T;
+
+
+
+
+
+
+double fLogit_mixed(gsl_vector *beta
+ ,gsl_matrix_int *X
+ ,gsl_vector_int *nlev
+ ,gsl_matrix *Xc
+ ,gsl_vector *y
+ ,double lambdaL1
+ ,double lambdaL2)
+{
+ int n = y->size;
+ // int k = X->size2;
+ int npar = beta->size;
+ double total = 0;
+ double aux = 0;
+ /* omp_set_num_threads(ompthr); */
+ /* /\* Changed loop start at 1 instead of 0 to avoid regularization of beta_0*\/ */
+ /* /\*#pragma omp parallel for reduction (+:total)*\/ */
+ for(int i = 1; i < npar; ++i)
+ total += beta->data[i]*beta->data[i];
+ total = (-total*lambdaL2/2);
+ /* /\*#pragma omp parallel for reduction (+:aux)*\/ */
+ for(int i = 1; i < npar; ++i)
+ aux += (beta->data[i]>0 ? beta->data[i] : -beta->data[i]);
+ total = total-aux*lambdaL1;
+ /* #pragma omp parallel for schedule(static) shared(n,beta,X,nlev,y) reduction (+:total) */
+ for(int i = 0; i < n; ++i) {
+ double Xbetai=beta->data[0];
+ int iParm=1;
+ for(int k = 0; k < X->size2; ++k) {
+ if(gsl_matrix_int_get(X,i,k)>0)
+ Xbetai+=beta->data[gsl_matrix_int_get(X,i,k)-1+iParm];
+ iParm+=nlev->data[k]-1;
+ }
+ for(int k = 0; k < (Xc->size2); ++k)
+ Xbetai+= gsl_matrix_get(Xc,i,k)*beta->data[iParm++];
+ total += y->data[i]*Xbetai-gsl_sf_log_1plusx(gsl_sf_exp(Xbetai));
+ }
+ return -total;
+}
+
+
+void logistic_mixed_pred(gsl_vector *beta // Vector of parameters length = 1 + Sum_k(C_k - 1)
+ ,gsl_matrix_int *X //Matrix Nobs x K
+ ,gsl_vector_int *nlev // Vector with number categories
+ ,gsl_matrix *Xc // continuous covariates Matrix Nobs x Kc (NULL if not used)
+ ,gsl_vector *yhat //Vector of prob. predicted by the logistic
+ )
+{
+ for(int i = 0; i < X->size1; ++i) {
+ double Xbetai=beta->data[0];
+ int iParm=1;
+ for(int k = 0; k < X->size2; ++k) {
+ if(gsl_matrix_int_get(X,i,k)>0)
+ Xbetai+=beta->data[gsl_matrix_int_get(X,i,k)-1+iParm];
+ iParm+=nlev->data[k]-1;
+ }
+ // Adding the continuous
+ for(int k = 0; k < (Xc->size2); ++k)
+ Xbetai+= gsl_matrix_get(Xc,i,k)*beta->data[iParm++];
+ yhat->data[i]=1/(1 + gsl_sf_exp(-Xbetai));
+ }
+}
+
+
+/* The gradient of f, df = (df/dx, df/dy). */
+void
+wgsl_mixed_optim_df (const gsl_vector *beta, void *params,
+ gsl_vector *out)
+{
+ fix_parm_mixed_T *p = (fix_parm_mixed_T *)params;
+ int n = p->y->size;
+ int K = p->X->size2;
+ int Kc = p->Xc->size2;
+ int npar = beta->size;
+ // Intitialize gradient out necessary?
+ for(int i = 0; i < npar; ++i)
+ out->data[i]= 0;
+ /* Changed loop start at 1 instead of 0 to avoid regularization of beta 0 */
+ for(int i = 1; i < npar; ++i)
+ out->data[i]= p->lambdaL2*beta->data[i];
+ for(int i = 1; i < npar; ++i)
+ out->data[i]+= p->lambdaL1*((beta->data[i]>0)-(beta->data[i]<0));
+
+ for(int i = 0; i < n; ++i) {
+ double pn=0;
+ double Xbetai=beta->data[0];
+ int iParm=1;
+ for(int k = 0; k < K; ++k) {
+ if(gsl_matrix_int_get(p->X,i,k)>0)
+ Xbetai+=beta->data[gsl_matrix_int_get(p->X,i,k)-1+iParm];
+ iParm+=p->nlev->data[k]-1;
+ }
+ // Adding the continuous
+ for(int k = 0; k < Kc; ++k)
+ Xbetai+= gsl_matrix_get(p->Xc,i,k)*beta->data[iParm++];
+
+ pn= -( p->y->data[i] - 1/(1 + gsl_sf_exp(-Xbetai)) );
+
+ out->data[0]+= pn;
+ iParm=1;
+ for(int k = 0; k < K; ++k) {
+ if(gsl_matrix_int_get(p->X,i,k)>0)
+ out->data[gsl_matrix_int_get(p->X,i,k)-1+iParm]+=pn;
+ iParm+=p->nlev->data[k]-1;
+ }
+ // Adding the continuous
+ for(int k = 0; k < Kc; ++k) {
+ out->data[iParm++] += gsl_matrix_get(p->Xc,i,k)*pn;
+ }
+ }
+
+}
+
+
+/* The Hessian of f */
+void
+wgsl_mixed_optim_hessian (const gsl_vector *beta, void *params,
+ gsl_matrix *out)
+{
+ fix_parm_mixed_T *p = (fix_parm_mixed_T *)params;
+ int n = p->y->size;
+ int K = p->X->size2;
+ int Kc = p->Xc->size2;
+ int npar = beta->size;
+ gsl_vector *gn = gsl_vector_alloc(npar); /* gn */
+ // Intitialize Hessian out necessary ???
+ gsl_matrix_set_zero(out);
+ /* Changed loop start at 1 instead of 0 to avoid regularization of beta 0*/
+ for(int i = 1; i < npar; ++i)
+ gsl_matrix_set(out,i,i,(p->lambdaL2)); // Double check this
+ // L1 penalty not working yet, as not differentiable, I may need to do coordinate descent (as in glm_net)
+
+ for(int i = 0; i < n; ++i) {
+ double pn=0;
+ double aux=0;
+ double Xbetai=beta->data[0];
+ int iParm1=1;
+ for(int k = 0; k < K; ++k) {
+ if(gsl_matrix_int_get(p->X,i,k)>0)
+ Xbetai+=beta->data[gsl_matrix_int_get(p->X,i,k)-1+iParm1];
+ iParm1+=p->nlev->data[k]-1; //-1?
+ }
+ // Adding the continuous
+ for(int k = 0; k < Kc; ++k)
+ Xbetai+= gsl_matrix_get(p->Xc,i,k)*beta->data[iParm1++];
+
+ pn= 1/(1 + gsl_sf_exp(-Xbetai));
+ // Add a protection for pn very close to 0 or 1?
+ aux=pn*(1-pn);
+
+ // Calculate sub-gradient vector gn
+ gsl_vector_set_zero(gn);
+ gn->data[0]= 1;
+ iParm1=1;
+ for(int k = 0; k < K; ++k) {
+ if(gsl_matrix_int_get(p->X,i,k)>0)
+ gn->data[gsl_matrix_int_get(p->X,i,k)-1+iParm1]=1;
+ iParm1+=p->nlev->data[k]-1;
+ }
+ // Adding the continuous
+ for(int k = 0; k < Kc; ++k) {
+ gn->data[iParm1++] = gsl_matrix_get(p->Xc,i,k);
+ }
+
+ for(int k1=0;k1<npar; ++k1)
+ if(gn->data[k1]!=0)
+ for(int k2=0;k2<npar; ++k2)
+ if(gn->data[k2]!=0)
+ *gsl_matrix_ptr(out,k1,k2) += (aux * gn->data[k1] * gn->data[k2]);
+ }
+ gsl_vector_free(gn);
+}
+
+
+double wgsl_mixed_optim_f(gsl_vector *v, void *params)
+{
+ double mLogLik=0;
+ fix_parm_mixed_T *p = (fix_parm_mixed_T *)params;
+ mLogLik = fLogit_mixed(v,p->X,p->nlev,p->Xc,p->y,p->lambdaL1,p->lambdaL2);
+ return mLogLik;
+}
+
+
+/* Compute both f and df together. */
+void
+wgsl_mixed_optim_fdf (gsl_vector *x, void *params,
+ double *f, gsl_vector *df)
+{
+ *f = wgsl_mixed_optim_f(x, params);
+ wgsl_mixed_optim_df(x, params, df);
+}
+
+
+int logistic_mixed_fit(gsl_vector *beta
+ ,gsl_matrix_int *X
+ ,gsl_vector_int *nlev
+ ,gsl_matrix *Xc // continuous covariates Matrix Nobs x Kc (NULL if not used)
+ ,gsl_vector *y
+ ,double lambdaL1
+ ,double lambdaL2)
+{
+
+ double mLogLik=0;
+ fix_parm_mixed_T p;
+ int npar = beta->size;
+ int iter=0;
+ double maxchange=0;
+
+ //Intializing fix parameters
+ p.X=X;
+ p.Xc=Xc;
+ p.nlev=nlev;
+ p.y=y;
+ p.lambdaL1=lambdaL1;
+ p.lambdaL2=lambdaL2;
+
+ //Initial fit
+ //#ifdef _RPR_DEBUG_
+ mLogLik = wgsl_mixed_optim_f(beta,&p);
+ //fprintf(stderr,"#Initial -log(Lik(0))=%lf\n",mLogLik);
+ //#endif //_RPR_DEBUG
+
+ gsl_matrix *myH = gsl_matrix_alloc(npar,npar); /* Hessian matrix*/
+ gsl_vector *stBeta = gsl_vector_alloc(npar); /* Direction to move */
+
+ gsl_vector *myG = gsl_vector_alloc(npar); /* Gradient*/
+ gsl_vector *tau = gsl_vector_alloc(npar); /* tau for QR*/
+
+ for(iter=0;iter<100;iter++){
+ wgsl_mixed_optim_hessian(beta,&p,myH); //Calculate Hessian
+ wgsl_mixed_optim_df(beta,&p,myG); //Calculate Gradient
+ gsl_linalg_QR_decomp(myH,tau); //Calculate next beta
+ gsl_linalg_QR_solve(myH,tau,myG,stBeta);
+ gsl_vector_sub(beta,stBeta);
+
+ //Monitor convergence
+ maxchange=0;
+ for(int i=0;i<npar; i++)
+ if(maxchange<fabs(stBeta->data[i]))
+ maxchange=fabs(stBeta->data[i]);
+
+#ifdef _RPR_DEBUG_
+ //mLogLik = wgsl_mixed_optim_f(beta,&p);
+ //fprintf(stderr,"#iter %d, -log(Lik(0))=%lf,%lf\n",(int)iter,mLogLik,maxchange);
+#endif //_RPR_DEBUG
+
+ if(maxchange<1E-4)
+ break;
+ }
+
+#ifdef _RPR_DEBUG_
+ //for (int i = 0; i < npar; i++)
+ // fprintf(stderr,"#par_%d= %lf\n",i,beta->data[i]);
+#endif //_RPR_DEBUG
+
+ //Final fit
+ mLogLik = wgsl_mixed_optim_f(beta,&p);
+ //fprintf(stderr,"#Final %d) -log(Lik(0))=%lf, maxchange %lf\n",iter,mLogLik,maxchange);
+
+ gsl_vector_free (tau);
+ gsl_vector_free (stBeta);
+ gsl_vector_free (myG);
+ gsl_matrix_free (myH);
+
+ return 0;
+}
+
+/***************/
+/* Categorical */
+/***************/
+
+// I need to bundle all the data that goes to the function to optimze together.
+typedef struct{
+ gsl_matrix_int *X;
+ gsl_vector_int *nlev;
+ gsl_vector *y;
+ double lambdaL1;
+ double lambdaL2;
+}fix_parm_cat_T;
+
+
+double fLogit_cat(gsl_vector *beta
+ ,gsl_matrix_int *X
+ ,gsl_vector_int *nlev
+ ,gsl_vector *y
+ ,double lambdaL1
+ ,double lambdaL2)
+{
+ int n = y->size;
+ // int k = X->size2;
+ int npar = beta->size;
+ double total = 0;
+ double aux = 0;
+ /* omp_set_num_threads(ompthr); */
+ /* /\* Changed loop start at 1 instead of 0 to avoid regularization of beta 0*\/ */
+ /* /\*#pragma omp parallel for reduction (+:total)*\/ */
+ for(int i = 1; i < npar; ++i)
+ total += beta->data[i]*beta->data[i];
+ total = (-total*lambdaL2/2);
+ /* /\*#pragma omp parallel for reduction (+:aux)*\/ */
+ for(int i = 1; i < npar; ++i)
+ aux += (beta->data[i]>0 ? beta->data[i] : -beta->data[i]);
+ total = total-aux*lambdaL1;
+ /* #pragma omp parallel for schedule(static) shared(n,beta,X,nlev,y) reduction (+:total) */
+ for(int i = 0; i < n; ++i) {
+ double Xbetai=beta->data[0];
+ int iParm=1;
+ for(int k = 0; k < X->size2; ++k) {
+ if(gsl_matrix_int_get(X,i,k)>0)
+ Xbetai+=beta->data[gsl_matrix_int_get(X,i,k)-1+iParm];
+ iParm+=nlev->data[k]-1;
+ }
+ total += y->data[i]*Xbetai-gsl_sf_log_1plusx(gsl_sf_exp(Xbetai));
+ }
+ return -total;
+}
+
+
+void logistic_cat_pred(gsl_vector *beta // Vector of parameters length = 1 + Sum_k(C_k - 1)
+ ,gsl_matrix_int *X //Matrix Nobs x K
+ ,gsl_vector_int *nlev // Vector with number categories
+ ,gsl_vector *yhat //Vector of prob. predicted by the logistic
+ )
+{
+ for(int i = 0; i < X->size1; ++i) {
+ double Xbetai=beta->data[0];
+ int iParm=1;
+ for(int k = 0; k < X->size2; ++k) {
+ if(gsl_matrix_int_get(X,i,k)>0)
+ Xbetai+=beta->data[gsl_matrix_int_get(X,i,k)-1+iParm];
+ iParm+=nlev->data[k]-1;
+ }
+ yhat->data[i]=1/(1 + gsl_sf_exp(-Xbetai));
+ }
+}
+
+
+/* The gradient of f, df = (df/dx, df/dy). */
+void
+wgsl_cat_optim_df (const gsl_vector *beta, void *params,
+ gsl_vector *out)
+{
+ fix_parm_cat_T *p = (fix_parm_cat_T *)params;
+ int n = p->y->size;
+ int K = p->X->size2;
+ int npar = beta->size;
+ // Intitialize gradient out necessary?
+ for(int i = 0; i < npar; ++i)
+ out->data[i]= 0;
+ /* Changed loop start at 1 instead of 0 to avoid regularization of beta 0 */
+ for(int i = 1; i < npar; ++i)
+ out->data[i]= p->lambdaL2*beta->data[i];
+ for(int i = 1; i < npar; ++i)
+ out->data[i]+= p->lambdaL1*((beta->data[i]>0)-(beta->data[i]<0));
+
+ for(int i = 0; i < n; ++i) {
+ double pn=0;
+ double Xbetai=beta->data[0];
+ int iParm=1;
+ for(int k = 0; k < K; ++k) {
+ if(gsl_matrix_int_get(p->X,i,k)>0)
+ Xbetai+=beta->data[gsl_matrix_int_get(p->X,i,k)-1+iParm];
+ iParm+=p->nlev->data[k]-1;
+ }
+ // total += y->data[i]*Xbetai-log(1+gsl_sf_exp(Xbetai));
+ pn= -( p->y->data[i] - 1/(1 + gsl_sf_exp(-Xbetai)) );
+
+ out->data[0]+= pn;
+ iParm=1;
+ for(int k = 0; k < K; ++k) {
+ if(gsl_matrix_int_get(p->X,i,k)>0)
+ out->data[gsl_matrix_int_get(p->X,i,k)-1+iParm]+=pn;
+ iParm+=p->nlev->data[k]-1;
+ }
+ }
+}
+
+
+/* The Hessian of f */
+void
+wgsl_cat_optim_hessian (const gsl_vector *beta, void *params,
+ gsl_matrix *out)
+{
+ fix_parm_cat_T *p = (fix_parm_cat_T *)params;
+ int n = p->y->size;
+ int K = p->X->size2;
+ int npar = beta->size;
+ // Intitialize Hessian out necessary ???
+ gsl_matrix_set_zero(out);
+ /* Changed loop start at 1 instead of 0 to avoid regularization of beta 0*/
+ for(int i = 1; i < npar; ++i)
+ gsl_matrix_set(out,i,i,(p->lambdaL2)); // Double check this
+ // L1 penalty not working yet, as not differentiable, I may need to do coordinate descent (as in glm_net)
+
+
+ for(int i = 0; i < n; ++i) {
+ double pn=0;
+ double aux=0;
+ double Xbetai=beta->data[0];
+ int iParm2=1;
+ int iParm1=1;
+ for(int k = 0; k < K; ++k) {
+ if(gsl_matrix_int_get(p->X,i,k)>0)
+ Xbetai+=beta->data[gsl_matrix_int_get(p->X,i,k)-1+iParm1];
+ iParm1+=p->nlev->data[k]-1; //-1?
+ }
+ // total += y->data[i]*Xbetai-log(1+gsl_sf_exp(Xbetai));
+ pn= 1/(1 + gsl_sf_exp(-Xbetai));
+ // Add a protection for pn very close to 0 or 1?
+ aux=pn*(1-pn);
+ *gsl_matrix_ptr(out,0,0)+=aux;
+ iParm2=1;
+ for(int k2 = 0; k2 < K; ++k2) {
+ if(gsl_matrix_int_get(p->X,i,k2)>0)
+ *gsl_matrix_ptr(out,0,gsl_matrix_int_get(p->X,i,k2)-1+iParm2)+=aux;
+ iParm2+=p->nlev->data[k2]-1; //-1?
+ }
+ iParm1=1;
+ for(int k1 = 0; k1 < K; ++k1) {
+ if(gsl_matrix_int_get(p->X,i,k1)>0)
+ *gsl_matrix_ptr(out,gsl_matrix_int_get(p->X,i,k1)-1+iParm1,0)+=aux;
+ iParm2=1;
+ for(int k2 = 0; k2 < K; ++k2) {
+ if((gsl_matrix_int_get(p->X,i,k1)>0) && (gsl_matrix_int_get(p->X,i,k2)>0))
+ *gsl_matrix_ptr(out
+ ,gsl_matrix_int_get(p->X,i,k1)-1+iParm1
+ ,gsl_matrix_int_get(p->X,i,k2)-1+iParm2
+ )+=aux;
+ iParm2+=p->nlev->data[k2]-1; //-1?
+ }
+ iParm1+=p->nlev->data[k1]-1; //-1?
+ }
+ }
+}
+
+
+double wgsl_cat_optim_f(gsl_vector *v, void *params)
+{
+ double mLogLik=0;
+ fix_parm_cat_T *p = (fix_parm_cat_T *)params;
+ mLogLik = fLogit_cat(v,p->X,p->nlev,p->y,p->lambdaL1,p->lambdaL2);
+ return mLogLik;
+}
+
+
+/* Compute both f and df together. */
+void
+wgsl_cat_optim_fdf (gsl_vector *x, void *params,
+ double *f, gsl_vector *df)
+{
+ *f = wgsl_cat_optim_f(x, params);
+ wgsl_cat_optim_df(x, params, df);
+}
+
+
+int logistic_cat_fit(gsl_vector *beta
+ ,gsl_matrix_int *X
+ ,gsl_vector_int *nlev
+ ,gsl_vector *y
+ ,double lambdaL1
+ ,double lambdaL2)
+{
+ double mLogLik=0;
+ fix_parm_cat_T p;
+ int npar = beta->size;
+ int iter=0;
+ double maxchange=0;
+
+ //Intializing fix parameters
+ p.X=X;
+ p.nlev=nlev;
+ p.y=y;
+ p.lambdaL1=lambdaL1;
+ p.lambdaL2=lambdaL2;
+
+ //Initial fit
+ //#ifdef _RPR_DEBUG_
+ mLogLik = wgsl_cat_optim_f(beta,&p);
+ //fprintf(stderr,"#Initial -log(Lik(0))=%lf\n",mLogLik);
+ //#endif //_RPR_DEBUG
+
+ gsl_matrix *myH = gsl_matrix_alloc(npar,npar); /* Hessian matrix*/
+ gsl_vector *stBeta = gsl_vector_alloc(npar); /* Direction to move */
+
+ gsl_vector *myG = gsl_vector_alloc(npar); /* Gradient*/
+ gsl_vector *tau = gsl_vector_alloc(npar); /* tau for QR*/
+
+ for(iter=0;iter<100;iter++){
+ wgsl_cat_optim_hessian(beta,&p,myH); //Calculate Hessian
+ wgsl_cat_optim_df(beta,&p,myG); //Calculate Gradient
+ gsl_linalg_QR_decomp(myH,tau); //Calculate next beta
+ gsl_linalg_QR_solve(myH,tau,myG,stBeta);
+ gsl_vector_sub(beta,stBeta);
+
+ //Monitor convergence
+ maxchange=0;
+ for(int i=0;i<npar; i++)
+ if(maxchange<fabs(stBeta->data[i]))
+ maxchange=fabs(stBeta->data[i]);
+
+#ifdef _RPR_DEBUG_
+ mLogLik = wgsl_cat_optim_f(beta,&p);
+ //fprintf(stderr,"#iter %d, -log(Lik(0))=%lf,%lf\n",(int)iter,mLogLik,maxchange);
+#endif //_RPR_DEBUG
+
+ if(maxchange<1E-4)
+ break;
+ }
+
+#ifdef _RPR_DEBUG_
+ //for (int i = 0; i < npar; i++)
+ // fprintf(stderr,"#par_%d= %lf\n",i,beta->data[i]);
+#endif //_RPR_DEBUG
+
+ //Final fit
+ mLogLik = wgsl_cat_optim_f(beta,&p);
+ //fprintf(stderr,"#Final %d) -log(Lik(0))=%lf, maxchange %lf\n",iter,mLogLik,maxchange);
+
+ gsl_vector_free (tau);
+ gsl_vector_free (stBeta);
+ gsl_vector_free (myG);
+ gsl_matrix_free (myH);
+
+ return 0;
+}
+
+
+/***************/
+/* Continuous */
+/***************/
+
+// I need to bundle all the data that goes to the function to optimze together.
+typedef struct{
+ gsl_matrix *Xc; // continuous covariates Matrix Nobs x Kc
+ gsl_vector *y;
+ double lambdaL1;
+ double lambdaL2;
+}fix_parm_cont_T;
+
+
+double fLogit_cont(gsl_vector *beta
+ ,gsl_matrix *Xc
+ ,gsl_vector *y
+ ,double lambdaL1
+ ,double lambdaL2)
+{
+ int n = y->size;
+ int npar = beta->size;
+ double total = 0;
+ double aux = 0;
+ /* omp_set_num_threads(ompthr); */
+ /* /\* Changed loop start at 1 instead of 0 to avoid regularization of beta_0*\/ */
+ /* /\*#pragma omp parallel for reduction (+:total)*\/ */
+ for(int i = 1; i < npar; ++i)
+ total += beta->data[i]*beta->data[i];
+ total = (-total*lambdaL2/2);
+ /* /\*#pragma omp parallel for reduction (+:aux)*\/ */
+ for(int i = 1; i < npar; ++i)
+ aux += (beta->data[i]>0 ? beta->data[i] : -beta->data[i]);
+ total = total-aux*lambdaL1;
+ /* #pragma omp parallel for schedule(static) shared(n,beta,X,nlev,y) reduction (+:total) */
+ for(int i = 0; i < n; ++i) {
+ double Xbetai=beta->data[0];
+ int iParm=1;
+ for(int k = 0; k < (Xc->size2); ++k)
+ Xbetai+= gsl_matrix_get(Xc,i,k)*beta->data[iParm++];
+ total += y->data[i]*Xbetai-gsl_sf_log_1plusx(gsl_sf_exp(Xbetai));
+ }
+ return -total;
+}
+
+
+void logistic_cont_pred(gsl_vector *beta // Vector of parameters length = 1 + Sum_k(C_k - 1)
+ ,gsl_matrix *Xc // continuous covariates Matrix Nobs x Kc (NULL if not used)
+ ,gsl_vector *yhat //Vector of prob. predicted by the logistic
+ )
+{
+ for(int i = 0; i < Xc->size1; ++i) {
+ double Xbetai=beta->data[0];
+ int iParm=1;
+ for(int k = 0; k < (Xc->size2); ++k)
+ Xbetai+= gsl_matrix_get(Xc,i,k)*beta->data[iParm++];
+ yhat->data[i]=1/(1 + gsl_sf_exp(-Xbetai));
+ }
+}
+
+
+/* The gradient of f, df = (df/dx, df/dy). */
+void
+wgsl_cont_optim_df (const gsl_vector *beta, void *params,
+ gsl_vector *out)
+{
+ fix_parm_cont_T *p = (fix_parm_cont_T *)params;
+ int n = p->y->size;
+ int Kc = p->Xc->size2;
+ int npar = beta->size;
+ // Intitialize gradient out necessary?
+ for(int i = 0; i < npar; ++i)
+ out->data[i]= 0;
+ /* Changed loop start at 1 instead of 0 to avoid regularization of beta 0 */
+ for(int i = 1; i < npar; ++i)
+ out->data[i]= p->lambdaL2*beta->data[i];
+ for(int i = 1; i < npar; ++i)
+ out->data[i]+= p->lambdaL1*((beta->data[i]>0)-(beta->data[i]<0));
+
+ for(int i = 0; i < n; ++i) {
+ double pn=0;
+ double Xbetai=beta->data[0];
+ int iParm=1;
+ for(int k = 0; k < Kc; ++k)
+ Xbetai+= gsl_matrix_get(p->Xc,i,k)*beta->data[iParm++];
+
+ pn= -( p->y->data[i] - 1/(1 + gsl_sf_exp(-Xbetai)) );
+
+ out->data[0]+= pn;
+ iParm=1;
+ // Adding the continuous
+ for(int k = 0; k < Kc; ++k) {
+ out->data[iParm++] += gsl_matrix_get(p->Xc,i,k)*pn;
+ }
+ }
+}
+
+
+/* The Hessian of f */
+void
+wgsl_cont_optim_hessian (const gsl_vector *beta, void *params,
+ gsl_matrix *out)
+{
+ fix_parm_cont_T *p = (fix_parm_cont_T *)params;
+ int n = p->y->size;
+ int Kc = p->Xc->size2;
+ int npar = beta->size;
+ gsl_vector *gn = gsl_vector_alloc(npar); /* gn */
+ // Intitialize Hessian out necessary ???
+ gsl_matrix_set_zero(out);
+ /* Changed loop start at 1 instead of 0 to avoid regularization of beta 0*/
+ for(int i = 1; i < npar; ++i)
+ gsl_matrix_set(out,i,i,(p->lambdaL2)); // Double check this
+ // L1 penalty not working yet, as not differentiable, I may need to do coordinate descent (as in glm_net)
+
+ for(int i = 0; i < n; ++i) {
+ double pn=0;
+ double aux=0;
+ double Xbetai=beta->data[0];
+ int iParm1=1;
+ for(int k = 0; k < Kc; ++k)
+ Xbetai+= gsl_matrix_get(p->Xc,i,k)*beta->data[iParm1++];
+
+ pn= 1/(1 + gsl_sf_exp(-Xbetai));
+ // Add a protection for pn very close to 0 or 1?
+ aux=pn*(1-pn);
+
+ // Calculate sub-gradient vector gn
+ gsl_vector_set_zero(gn);
+ gn->data[0]= 1;
+ iParm1=1;
+ for(int k = 0; k < Kc; ++k) {
+ gn->data[iParm1++] = gsl_matrix_get(p->Xc,i,k);
+ }
+
+ for(int k1=0;k1<npar; ++k1)
+ if(gn->data[k1]!=0)
+ for(int k2=0;k2<npar; ++k2)
+ if(gn->data[k2]!=0)
+ *gsl_matrix_ptr(out,k1,k2) += (aux * gn->data[k1] * gn->data[k2]);
+ }
+ gsl_vector_free(gn);
+}
+
+
+double wgsl_cont_optim_f(gsl_vector *v, void *params)
+{
+ double mLogLik=0;
+ fix_parm_cont_T *p = (fix_parm_cont_T *)params;
+ mLogLik = fLogit_cont(v,p->Xc,p->y,p->lambdaL1,p->lambdaL2);
+ return mLogLik;
+}
+
+
+/* Compute both f and df together. */
+void
+wgsl_cont_optim_fdf (gsl_vector *x, void *params,
+ double *f, gsl_vector *df)
+{
+ *f = wgsl_cont_optim_f(x, params);
+ wgsl_cont_optim_df(x, params, df);
+}
+
+
+int logistic_cont_fit(gsl_vector *beta
+ ,gsl_matrix *Xc // continuous covariates Matrix Nobs x Kc (NULL if not used)
+ ,gsl_vector *y
+ ,double lambdaL1
+ ,double lambdaL2)
+{
+
+ double mLogLik=0;
+ fix_parm_cont_T p;
+ int npar = beta->size;
+ int iter=0;
+ double maxchange=0;
+
+ //Intializing fix parameters
+ p.Xc=Xc;
+ p.y=y;
+ p.lambdaL1=lambdaL1;
+ p.lambdaL2=lambdaL2;
+
+ //Initial fit
+ //#ifdef _RPR_DEBUG_
+ mLogLik = wgsl_cont_optim_f(beta,&p);
+ //fprintf(stderr,"#Initial -log(Lik(0))=%lf\n",mLogLik);
+ //#endif //_RPR_DEBUG
+
+ gsl_matrix *myH = gsl_matrix_alloc(npar,npar); /* Hessian matrix*/
+ gsl_vector *stBeta = gsl_vector_alloc(npar); /* Direction to move */
+
+ gsl_vector *myG = gsl_vector_alloc(npar); /* Gradient*/
+ gsl_vector *tau = gsl_vector_alloc(npar); /* tau for QR*/
+
+ for(iter=0;iter<100;iter++){
+ wgsl_cont_optim_hessian(beta,&p,myH); //Calculate Hessian
+ wgsl_cont_optim_df(beta,&p,myG); //Calculate Gradient
+ gsl_linalg_QR_decomp(myH,tau); //Calculate next beta
+ gsl_linalg_QR_solve(myH,tau,myG,stBeta);
+ gsl_vector_sub(beta,stBeta);
+
+ //Monitor convergence
+ maxchange=0;
+ for(int i=0;i<npar; i++)
+ if(maxchange<fabs(stBeta->data[i]))
+ maxchange=fabs(stBeta->data[i]);
+
+#ifdef _RPR_DEBUG_
+ mLogLik = wgsl_cont_optim_f(beta,&p);
+ //fprintf(stderr,"#iter %d, -log(Lik(0))=%lf,%lf\n",(int)iter,mLogLik,maxchange);
+#endif //_RPR_DEBUG
+
+ if(maxchange<1E-4)
+ break;
+ }
+
+#ifdef _RPR_DEBUG_
+ //for (int i = 0; i < npar; i++)
+ // fprintf(stderr,"#par_%d= %lf\n",i,beta->data[i]);
+#endif //_RPR_DEBUG
+
+ //Final fit
+ mLogLik = wgsl_cont_optim_f(beta,&p);
+ //fprintf(stderr,"#Final %d) -log(Lik(0))=%lf, maxchange %lf\n",iter,mLogLik,maxchange);
+
+ gsl_vector_free (tau);
+ gsl_vector_free (stBeta);
+ gsl_vector_free (myG);
+ gsl_matrix_free (myH);
+
+ return 0;
+}
+