require 'csv'

module OpenTox

  # Ruby wrapper for OpenTox Dataset Webservices (http://opentox.org/dev/apis/api-1.2/dataset).
  class Dataset

    attr_writer :features, :compounds, :data_entries

    def initialize uri=nil, subjectid=SUBJECTID
      super uri, subjectid
      @features = []
      @compounds = []
      @data_entries = []
    end

    # Get data (lazy loading from dataset service)
    # overrides {OpenTox#metadata} to only load the metadata instead of the whole dataset
    # @return [Hash] the metadata
    def metadata force_update=false
      if @metadata.empty? or force_update
        uri = File.join(@uri,"metadata")
        begin
          parse_ntriples RestClientWrapper.get(uri,{},{:accept => "text/plain", :subjectid => @subjectid})
        rescue # fall back to rdfxml
          parse_rdfxml RestClientWrapper.get(uri,{},{:accept => "application/rdf+xml", :subjectid => @subjectid})
        end
        @metadata = @rdf.to_hash[RDF::URI.new(@uri)].inject({}) { |h, (predicate, values)| h[predicate] = values.collect{|v| v.to_s}; h }
      end
      @metadata
    end

    # @return [Array] feature objects (NOT uris)
    def features force_update=false
      if @features.empty? or force_update
        uri = File.join(@uri,"features")
        uris = RestClientWrapper.get(uri,{},{:accept => "text/uri-list", :subjectid => @subjectid}).split("\n") # ordered datasets return ordered features
        @features = uris.collect{|uri| Feature.new(uri)}
      end
      @features
    end

    # @return [Array] compound objects (NOT uris)
    def compounds force_update=false
      if @compounds.empty? or force_update
        uri = File.join(@uri,"compounds")
        uris = RestClientWrapper.get(uri,{},{:accept => "text/uri-list", :subjectid => @subjectid}).split("\n") # ordered datasets return ordered compounds
        @compounds = uris.collect{|uri| Compound.new(uri)}
      end
      @compounds
    end

    # @return [Array] with two dimensions, 
    #   first index: compounds, second index: features, values: compound feature values
    def data_entries force_update=false
      if @data_entries.empty? or force_update
        sparql = "SELECT ?cidx ?fidx ?value FROM <#{uri}> WHERE {
          ?data_entry <#{RDF::OLO.index}> ?cidx ;
                      <#{RDF::OT.values}> ?v .
          ?v          <#{RDF::OT.feature}> ?f;
                      <#{RDF::OT.value}> ?value .
          ?f          <#{RDF::OLO.index}> ?fidx.
          } ORDER BY ?fidx ?cidx"
          RestClientWrapper.get(service_uri,{:query => sparql},{:accept => "text/uri-list", :subjectid => @subjectid}).split("\n").each do |row|
            r,c,v = row.split("\t")
            @data_entries[r.to_i] ||= []
            #v = v.to_f if v.numeric?
            #v = nil if v.is_a? String and v.empty?
            @data_entries[r.to_i][c.to_i] = v
          end
        # TODO: fallbacks for external and unordered datasets
        features.each_with_index do |feature,i|
          if feature[RDF.type].include? RDF::OT.NumericFeature
            if feature[RDF.type].include? RDF::OT.NominalFeature
              if feature[RDF.type].include? RDF::OT.StringFeature
                @data_entries.each { |row| row[i] = row[i].to_s if row[i] }
              else
                @data_entries.each { |row| row[i] = row[i] if row[i] }
              end
            else
              @data_entries.each { |row| row[i] = row[i].to_f if row[i] }
            end
          end
        end
      end
      @data_entries
    end

    # Find data entry values for a given compound and feature
    # @param compound [OpenTox::Compound] OpenTox Compound object
    # @param feature [OpenTox::Feature] OpenTox Feature object
    # @return [Array] Data entry values
    def values(compound, feature)
      rows = (0 ... compounds.length).select { |r| compounds[r].uri == compound.uri }
      col = features.collect{|f| f.uri}.index feature.uri
      rows.collect{|row| data_entries[row][col]}
    end

    # Convenience methods to search by compound/feature URIs

    # Search a dataset for a feature given its URI
    # @param uri [String] Feature URI
    # @return [OpenTox::Feature] Feature object, or nil if not present
    def find_feature_uri(uri)
      features.select{|f| f.uri == uri}.first
    end

    # Search a dataset for a compound given its URI
    # @param uri [String] Compound URI
    # @return [OpenTox::Compound] Compound object, or nil if not present
    def find_compound_uri(uri)
      compounds.select{|f| f.uri == uri}.first
    end

    # for prediction result datasets
    # assumes that there are features with title prediction and confidence
    # @return [Array] of Hashes with keys { :compound, :value ,:confidence } (compound value is object not uri)
    def predictions
      predictions = []
      prediction_feature = nil
      confidence_feature = nil
      metadata[RDF::OT.predictedVariables].each do |uri|
        feature = OpenTox::Feature.new uri
        case feature.title
        when /prediction$/
          prediction_feature = feature
        when /confidence$/
          confidence_feature = feature
        end
      end
      if prediction_feature and confidence_feature
        compounds.each do |compound|
          value = values(compound,prediction_feature).first
          confidence = values(compound,confidence_feature).first
          predictions << {:compound => compound, :value => value, :confidence => confidence} if value and confidence
        end
      end
      predictions
    end

    # Adding data methods
    # (Alternatively, you can directly change @features and @compounds)

    # Create a dataset from file (csv,sdf,...)
    # @param filename [String]
    # @return [String] dataset uri
    def upload filename, wait=true
      uri = RestClientWrapper.put(@uri, {:file => File.new(filename)}, {:subjectid => @subjectid})
      wait_for_task uri if URI.task?(uri) and wait
      metadata true
      @uri
    end
   
    # @param compound [OpenTox::Compound]
    # @param feature [OpenTox::Feature]
    # @param value [Object] (will be converted to String)
    # @return [Array] data_entries
    def add_data_entry compound, feature, value
      @compounds << compound unless @compounds.collect{|c| c.uri}.include?(compound.uri)
      row = @compounds.collect{|c| c.uri}.index(compound.uri)
      @features << feature unless @features.collect{|f| f.uri}.include?(feature.uri)
      col = @features.collect{|f| f.uri}.index(feature.uri)
      @data_entries[row] ||= []
      if @data_entries[row][col] # duplicated values
        #row = @compounds.size
        @compounds << compound
        row = @compounds.collect{|c| c.uri}.rindex(compound.uri)
      end
      @data_entries[row][col] = value
    end

    # TODO: remove? might be dangerous if feature ordering is incorrect
    # MG: I would not remove this because add_data_entry is very slow (4 times searching in arrays)
    # CH: do you have measurements? compound and feature arrays are not that big, I suspect that feature search/creation is the time critical step 
    # @param row [Array] 
    # @example
    #   d = Dataset.new
    #   d.features << Feature.new(a)
    #   d.features << Feature.new(b)
    #   d << [ Compound.new("c1ccccc1"), feature-value-a, feature-value-b ]
    def << row
      compound = row.shift # removes the compound from the array
      bad_request_error "Dataset features are empty." unless @features
      bad_request_error "Row size '#{row.size}' does not match features size '#{@features.size}'." unless row.size == @features.size
      bad_request_error "First column is not a OpenTox::Compound" unless compound.class == OpenTox::Compound
      @compounds << compound
      @data_entries << row
    end

    # Serialisation

    # converts dataset to csv format including compound smiles as first column, other column headers are feature titles
    # @return [String]
    def to_csv
      CSV.generate do |csv|
        csv << ["SMILES"] + features.collect{|f| f.title}
        compounds.each_with_index do |c,i|
          csv << [c.smiles] + data_entries[i]
        end
      end
    end

    RDF_FORMATS.each do |format|

      # redefine rdf parse methods for all formats e.g. parse_rdfxml
      send :define_method, "parse_#{format}".to_sym do |rdf|
        # TODO: parse ordered dataset
        # TODO: parse data entries
        # TODO: parse metadata
        @rdf = RDF::Graph.new
        RDF::Reader.for(format).new(rdf) do |reader|
          reader.each_statement{ |statement| @rdf << statement }
        end
        query = RDF::Query.new({ :uri => { RDF.type => RDF::OT.Compound } })
        @compounds = query.execute(@rdf).collect { |solution| OpenTox::Compound.new solution.uri }
        query = RDF::Query.new({ :uri => { RDF.type => RDF::OT.Feature } })
        @features = query.execute(@rdf).collect { |solution| OpenTox::Feature.new solution.uri }
        @compounds.each_with_index do |c,i|
          @features.each_with_index do |f,j|
          end
        end
      end


      # redefine rdf serialization methods
      send :define_method, "to_#{format}".to_sym do
        @metadata[RDF.type] = [RDF::OT.Dataset, RDF::OT.OrderedDataset]
        create_rdf
        @features.each_with_index do |feature,i|
          @rdf << [RDF::URI.new(feature.uri), RDF::URI.new(RDF.type), RDF::URI.new(RDF::OT.Feature)]
          @rdf << [RDF::URI.new(feature.uri), RDF::URI.new(RDF::OLO.index), RDF::Literal.new(i)]
        end
        @compounds.each_with_index do |compound,i|
          @rdf << [RDF::URI.new(compound.uri), RDF::URI.new(RDF.type), RDF::URI.new(RDF::OT.Compound)]
          if defined? @neighbors and neighbors.include? compound
            @rdf << [RDF::URI.new(compound.uri), RDF::URI.new(RDF.type), RDF::URI.new(RDF::OT.Neighbor)]
          end

          @rdf << [RDF::URI.new(compound.uri), RDF::URI.new(RDF::OLO.index), RDF::Literal.new(i)]
          data_entry_node = RDF::Node.new
          @rdf << [RDF::URI.new(@uri), RDF::URI.new(RDF::OT.dataEntry), data_entry_node]
          @rdf << [data_entry_node, RDF::URI.new(RDF.type), RDF::URI.new(RDF::OT.DataEntry)]
          @rdf << [data_entry_node, RDF::URI.new(RDF::OLO.index), RDF::Literal.new(i)]
          @rdf << [data_entry_node, RDF::URI.new(RDF::OT.compound), RDF::URI.new(compound.uri)]
          @data_entries[i].each_with_index do |value,j|
            value_node = RDF::Node.new
            @rdf << [data_entry_node, RDF::URI.new(RDF::OT.values), value_node]
            @rdf << [value_node, RDF::URI.new(RDF::OT.feature), RDF::URI.new(@features[j].uri)]
            @rdf << [value_node, RDF::URI.new(RDF::OT.value), RDF::Literal.new(value)]
          end
        end
        RDF::Writer.for(format).buffer do |writer|
          @rdf.each{|statement| writer << statement}
        end
      end

    end

# TODO: fix bug that affects data_entry positions
=begin
    def to_ntriples # redefined string version for better performance

      ntriples = ""
      @metadata[RDF.type] = [ RDF::OT.Dataset, RDF::OT.OrderedDataset ]
      @metadata[RDF.type] ||= eval("RDF::OT."+self.class.to_s.split('::').last)
      @metadata[RDF::DC.date] ||= DateTime.now
      @metadata.each do |predicate,values|
        [values].flatten.each { |value| ntriples << "<#{@uri}> <#{predicate}> '#{value}' .\n" }
      end
      @parameters.each do |parameter|
        p_node = RDF::Node.new.to_s
        ntriples <<  "<#{@uri}> <#{RDF::OT.parameters}> #{p_node} .\n"
        ntriples <<  "#{p_node} <#{RDF.type}> <#{RDF::OT.Parameter}> .\n"
        parameter.each { |k,v| ntriples <<  "#{p_node} <#{k}> '#{v}' .\n" }
      end
      @features.each_with_index do |feature,i|
        ntriples <<  "<#{feature.uri}> <#{RDF.type}> <#{RDF::OT.Feature}> .\n"
        ntriples <<  "<#{feature.uri}> <#{RDF::OLO.index}> '#{i}' .\n"
      end
      @compounds.each_with_index do |compound,i|
        ntriples <<  "<#{compound.uri}> <#{RDF.type}> <#{RDF::OT.Compound}> .\n"
        if defined? @neighbors and neighbors.include? compound
          ntriples <<  "<#{compound.uri}> <#{RDF.type}> <#{RDF::OT.Neighbor}> .\n"
        end

        ntriples <<  "<#{compound.uri}> <#{RDF::OLO.index}> '#{i}' .\n"
        #data_entry_node = RDF::Node.new
        data_entry_node = "_:dataentry"+ i.to_s
        ntriples <<  "<#{@uri}> <#{RDF::OT.dataEntry}> #{data_entry_node} .\n"
        ntriples <<  "#{data_entry_node} <#{RDF.type}> <#{RDF::OT.DataEntry}> .\n"
        ntriples <<  "#{data_entry_node} <#{RDF::OLO.index}> '#{i}' .\n"
        ntriples <<  "#{data_entry_node} <#{RDF::OT.compound}> <#{compound.uri}> .\n"
        @data_entries[i].each_with_index do |value,j|
          value_node = data_entry_node+ "_value"+ j.to_s
          #value_node = RDF::Node.new
          ntriples <<  "#{data_entry_node} <#{RDF::OT.values}> #{value_node} .\n"
          ntriples <<  "#{value_node} <#{RDF::OT.feature}> <#{@features[j].uri}> .\n"
          ntriples <<  "#{value_node} <#{RDF::OT.value}> '#{value}' .\n"
        end
      end
      ntriples

    end
=end

    # Methods for for validation service

    # create a new dataset with the specified compounds and features
    # @param compound_indices [Array] compound indices (integers)
    # @param feats [Array] features objects
    # @param metadata [Hash]
    # @return [OpenTox::Dataset]
    def split( compound_indices, feats, metadata)

      bad_request_error "Dataset.split : Please give compounds as indices" if compound_indices.size==0 or !compound_indices[0].is_a?(Fixnum)
      bad_request_error "Dataset.split : Please give features as feature objects (given: #{feats})" if feats!=nil and feats.size>0 and !feats[0].is_a?(OpenTox::Feature)
      dataset = OpenTox::Dataset.new
      dataset.metadata = metadata
      dataset.features = (feats ? feats : self.features)
      compound_indices.each do |c_idx|
        dataset << [ self.compounds[c_idx] ] + dataset.features.each_with_index.collect{|f,f_idx| self.data_entries[c_idx][f_idx]}
      end
      dataset.put
      dataset
    end


    # maps a compound-index from another dataset to a compound-index from this dataset
    # mapping works as follows:
    # (compound c is the compound identified by the compound-index of the other dataset)
    # * c occurs only once in this dataset? map compound-index of other dataset to index in this dataset
    # * c occurs >1 in this dataset?
    # ** number of occurences is equal in both datasets? assume order is preserved(!) and map accordingly
    # ** number of occurences is not equal in both datasets? cannot map, raise error
    # @param dataset [OpenTox::Dataset] dataset that should be mapped to this dataset (fully loaded)
    # @param compound_index [Fixnum], corresponding to dataset
    def compound_index( dataset, compound_index )
      unless defined?(@index_map) and @index_map[dataset.uri]
        map = {}
        dataset.compounds.collect{|c| c.uri}.uniq.each do |compound|
          self_indices = compound_indices(compound)
          next unless self_indices
          dataset_indices = dataset.compound_indices(compound)
          if self_indices.size==1
            dataset_indices.size.times do |i|
              map[dataset_indices[i]] = self_indices[0]
            end
          elsif self_indices.size==dataset_indices.size
            # we do assume that the order is preseverd!
            dataset_indices.size.times do |i|
              map[dataset_indices[i]] = self_indices[i]
            end
          else
            raise "cannot map compound #{compound} from dataset #{dataset.uri} to dataset #{uri}, "+
              "compound occurs #{dataset_indices.size} times and #{self_indices.size} times"
          end
        end
        @index_map = {} unless defined?(@index_map)
        @index_map[dataset.uri] = map
      end
      @index_map[dataset.uri][compound_index]
    end

    # returns the inidices of the compound in the dataset
    # @param compound [OpenTox::Compound]
    # @return [Array] compound index (position) of the compound in the dataset, array-size is 1 unless multiple occurences
    def compound_indices( compound )
      unless defined?(@cmp_indices) and @cmp_indices.has_key?(compound)
        @cmp_indices = {}
        @compounds.size.times do |i|
          c = @compounds[i].uri
          if @cmp_indices[c]==nil
            @cmp_indices[c] = [i]
          else
            @cmp_indices[c] = @cmp_indices[c]+[i]
          end
        end
      end
      @cmp_indices[compound]
    end

    # returns compound feature value using the compound-index and the feature_uri
    def data_entry_value(compound_index, feature_uri)
      data_entries(true) if @data_entries.empty?
      col = @features.collect{|f| f.uri}.index feature_uri
      @data_entries[compound_index] ?  @data_entries[compound_index][col] : nil
    end
  end

end