PubChem classification download
[lazar] / lib / lazar.rb
1 require 'rubygems'
2 require "bundler/setup"
3 require "rest-client"
4 require 'addressable'
5 require 'yaml'
6 require 'json'
7 require 'logger'
8 require 'mongoid'
9 require 'rserve'
10 require "nokogiri"
11 require "base64"
12 require 'openbabel'
13
14 # Environment setup
15 ENV["LAZAR_ENV"] ||= "production"
16 raise "Incorrect lazar environment variable LAZAR_ENV '#{ENV["LAZAR_ENV"]}', please set it to 'production' or 'development'." unless ENV["LAZAR_ENV"].match(/production|development/)
17
18 ENV["MONGOID_ENV"] = ENV["LAZAR_ENV"] 
19 ENV["RACK_ENV"] = ENV["LAZAR_ENV"] # should set sinatra environment
20 # search for a central mongo database in use
21 # http://opentox.github.io/installation/2017/03/07/use-central-mongodb-in-docker-environment
22 CENTRAL_MONGO_IP = `grep -oP '^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}(?=.*mongodb)' /etc/hosts`.chomp
23 Mongoid.load_configuration({
24   :clients => {
25     :default => {
26       :database => ENV["LAZAR_ENV"],
27       :hosts => (CENTRAL_MONGO_IP.blank? ? ["localhost:27017"] : ["#{CENTRAL_MONGO_IP}:27017"]),
28     }
29   }
30 })
31 Mongoid.raise_not_found_error = false # return nil if no document is found
32 $mongo = Mongo::Client.new("mongodb://#{(CENTRAL_MONGO_IP.blank? ? "127.0.0.1" : CENTRAL_MONGO_IP)}:27017/#{ENV['LAZAR_ENV']}")
33 $gridfs = $mongo.database.fs
34
35 # Logger setup
36 STDOUT.sync = true # for redirection, etc see http://stackoverflow.com/questions/8549443/why-doesnt-logger-output-to-stdout-get-redirected-to-files
37 $logger = Logger.new STDOUT # STDERR did not work on my development machine (CH)
38 case ENV["LAZAR_ENV"]
39 when "production"
40   $logger.level = Logger::WARN
41   Mongo::Logger.level = Logger::WARN 
42 when "development"
43   $logger.level = Logger::DEBUG
44   Mongo::Logger.level = Logger::WARN 
45 end
46
47 # R setup
48 rlib = File.expand_path(File.join(File.dirname(__FILE__),"..","R"))
49 # should work on POSIX including os x
50 # http://stackoverflow.com/questions/19619582/number-of-processors-cores-in-command-line
51 NR_CORES = `getconf _NPROCESSORS_ONLN`.to_i
52 R = Rserve::Connection.new
53 R.eval ".libPaths('#{rlib}')"
54 R.eval "
55 suppressPackageStartupMessages({
56   library(labeling,lib=\"#{rlib}\")
57   library(iterators,lib=\"#{rlib}\")
58   library(foreach,lib=\"#{rlib}\")
59   library(ggplot2,lib=\"#{rlib}\")
60   library(grid,lib=\"#{rlib}\")
61   library(gridExtra,lib=\"#{rlib}\")
62   library(pls,lib=\"#{rlib}\")
63   library(caret,lib=\"#{rlib}\")
64   library(doMC,lib=\"#{rlib}\")
65   library(randomForest,lib=\"#{rlib}\")
66   library(plyr,lib=\"#{rlib}\")
67   registerDoMC(#{NR_CORES})
68 })
69 "
70
71 PUBCHEM_URI = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/"
72 CHEMBL_URI = "https://www.ebi.ac.uk/chembl/api/data/molecule/"
73
74 # OpenTox classes and includes
75 CLASSES = ["Feature","Substance","Dataset","CrossValidation","LeaveOneOutValidation","RepeatedCrossValidation"]# Algorithm and Models are modules
76
77 [ # be aware of the require sequence as it affects class/method overwrites
78   "overwrite.rb",
79   "rest-client-wrapper.rb", 
80   "error.rb",
81   "opentox.rb",
82   "feature.rb",
83   "physchem.rb",
84   "substance.rb",
85   "compound.rb",
86   "nanoparticle.rb",
87   "dataset.rb",
88   "algorithm.rb",
89   "similarity.rb",
90   "feature_selection.rb",
91   "model.rb",
92   "classification.rb",
93   "regression.rb",
94   "caret.rb",
95   "validation-statistics.rb",
96   "validation.rb",
97   "train-test-validation.rb",
98   "leave-one-out-validation.rb",
99   "crossvalidation.rb",
100   "download.rb"
101   #"import.rb",
102 ].each{ |f| require_relative f }