module OpenTox

  class CrossValidation
    field :validation_ids, type: Array, default: []
    field :folds, type: Integer
    field :nr_instances, type: Integer
    field :nr_unpredicted, type: Integer
    field :predictions, type: Array
    field :finished_at, type: Time 
  end

  class ClassificationCrossValidation < CrossValidation

    field :accept_values, type: Array
    field :confusion_matrix, type: Array
    field :weighted_confusion_matrix, type: Array
    field :accuracy, type: Float
    field :weighted_accuracy, type: Float
    field :true_rate, type: Hash
    field :predictivity, type: Hash
    # TODO auc, f-measure (usability??)

    def self.create model, n=10
      cv = self.new
      validation_ids = []
      nr_instances = 0
      nr_unpredicted = 0
      predictions = []
      validation_class = Object.const_get(self.to_s.sub(/Cross/,''))
      accept_values = Feature.find(model.prediction_feature_id).accept_values
      confusion_matrix = Array.new(accept_values.size,0){Array.new(accept_values.size,0)}
      weighted_confusion_matrix = Array.new(accept_values.size,0){Array.new(accept_values.size,0)}
      true_rate = {}
      predictivity = {}
      fold_nr = 1
      training_dataset = Dataset.find model.training_dataset_id
      training_dataset.folds(n).each do |fold|
        t = Time.now
        $logger.debug "Fold #{fold_nr}"
        validation = validation_class.create(model, fold[0], fold[1])
        validation_ids << validation.id
        nr_instances += validation.nr_instances
        nr_unpredicted += validation.nr_unpredicted
        predictions += validation.predictions
        validation.confusion_matrix.each_with_index do |r,i|
          r.each_with_index do |c,j|
            confusion_matrix[i][j] += c
            weighted_confusion_matrix[i][j] += validation.weighted_confusion_matrix[i][j]
          end
        end
        $logger.debug "Fold #{fold_nr}:  #{Time.now-t} seconds"
        fold_nr +=1
      end
      true_rate = {}
      predictivity = {}
      accept_values.each_with_index do |v,i|
        true_rate[v] = confusion_matrix[i][i]/confusion_matrix[i].reduce(:+).to_f
        predictivity[v] = confusion_matrix[i][i]/confusion_matrix.collect{|n| n[i]}.reduce(:+).to_f
      end
      confidence_sum = 0
      weighted_confusion_matrix.each do |r|
        r.each do |c|
          confidence_sum += c
        end
      end
      cv.update_attributes(
        nr_instances: nr_instances,
        nr_unpredicted: nr_unpredicted,
        accept_values: accept_values,
        confusion_matrix: confusion_matrix,
        weighted_confusion_matrix: weighted_confusion_matrix,
        accuracy: (confusion_matrix[0][0]+confusion_matrix[1][1])/(nr_instances-nr_unpredicted).to_f,
        weighted_accuracy: (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f,
        true_rate: true_rate,
        predictivity: predictivity,
        predictions: predictions.sort{|a,b| b[3] <=> a[3]}, # sort according to confidence
        finished_at: Time.now
      )
      cv.save
      cv
    end

    #Average area under roc  0.646
    #Area under roc  0.646
    #F measure carcinogen: 0.769, noncarcinogen: 0.348
  end

  class RegressionCrossValidation < Validation

    field :validation_ids, type: Array, default: []
    field :folds, type: Integer
    field :rmse, type: Float
    field :mae, type: Float
    field :weighted_rmse, type: Float
    field :weighted_mae, type: Float

    def self.create model, n=10
      cv = self.new
      validation_ids = []
      nr_instances = 0
      nr_unpredicted = 0
      predictions = []
      validation_class = Object.const_get(self.to_s.sub(/Cross/,''))
      fold_nr = 1
      training_dataset = Dataset.find model.training_dataset_id
      training_dataset.folds(n).each do |fold|
        t = Time.now
        $logger.debug "Predicting fold #{fold_nr}"

        validation = validation_class.create(model, fold[0], fold[1])
        validation_ids << validation.id
        nr_instances += validation.nr_instances
        nr_unpredicted += validation.nr_unpredicted
        predictions += validation.predictions
        $logger.debug "Fold #{fold_nr}:  #{Time.now-t} seconds"
        fold_nr +=1
      end
      rmse = 0
      weighted_rmse = 0
      rse = 0
      weighted_rse = 0
      mae = 0
      weighted_mae = 0
      rae = 0
      weighted_rae = 0
      n = 0
      confidence_sum = 0
      predictions.each do |pred|
        compound_id,activity,prediction,confidence = pred
        if activity and prediction
          error = prediction-activity
          rmse += error**2
          weighted_rmse += confidence*error**2
          mae += error.abs
          weighted_mae += confidence*error.abs
          n += 1
          confidence_sum += confidence
        else
          # TODO: create warnings
          p pred
        end
      end
      mae = mae/n
      weighted_mae = weighted_mae/confidence_sum
      rmse = Math.sqrt(rmse/n)
      weighted_rmse = Math.sqrt(weighted_rmse/confidence_sum)
      cv.update_attributes(
        folds: n,
        validation_ids: validation_ids,
        nr_instances: nr_instances,
        nr_unpredicted: nr_unpredicted,
        predictions: predictions.sort{|a,b| b[3] <=> a[3]},
        mae: mae,
        rmse: rmse,
        weighted_mae: weighted_mae,
        weighted_rmse: weighted_rmse
      )
      cv.save
      cv
    end

    def plot
      # RMSE
      x = predictions.collect{|p| p[1]}
      y = predictions.collect{|p| p[2]}
      R.assign "Measurement", x
      R.assign "Prediction", y
      R.eval "par(pty='s')" # sets the plot type to be square
      #R.eval "fitline <- lm(log(Prediction) ~ log(Measurement))"
      #R.eval "error <- log(Measurement)-log(Prediction)"
      R.eval "error <- Measurement-Prediction"
      R.eval "rmse <- sqrt(mean(error^2,na.rm=T))"
      R.eval "mae <- mean( abs(error), na.rm = TRUE)"
      R.eval "r <- cor(log(Prediction),log(Measurement))"
      R.eval "svg(filename='/tmp/#{id.to_s}.svg')"
      R.eval "plot(log(Prediction),log(Measurement),main='#{self.name}', sub=paste('RMSE: ',rmse, 'MAE :',mae, 'r^2: ',r^2),asp=1)"
      #R.eval "plot(log(Prediction),log(Measurement),main='#{self.name}', sub=paste('RMSE: ',rmse, 'MAE :',mae, 'r^2: '),asp=1)"
      #R.eval "plot(log(Prediction),log(Measurement),main='#{self.name}', ,asp=1)"
      R.eval "abline(0,1,col='blue')"
      #R.eval "abline(fitline,col='red')"
      R.eval "dev.off()"
      "/tmp/#{id.to_s}.svg"
    end
  end


end