diff options
Diffstat (limited to 'lib/serializer.rb')
-rw-r--r-- | lib/serializer.rb | 297 |
1 files changed, 297 insertions, 0 deletions
diff --git a/lib/serializer.rb b/lib/serializer.rb new file mode 100644 index 0000000..3def252 --- /dev/null +++ b/lib/serializer.rb @@ -0,0 +1,297 @@ +require 'spreadsheet' +require 'yajl' + +module OpenTox + + module Serializer + + # modelled according to to http://n2.talis.com/wiki/RDF_JSON_Specification + class Owl + + attr_accessor :object + + def initialize + + @object = { + # this should come from opntox.owl + OT.Compound => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OT.Feature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OT.NominalFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OT.NumericFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OT.StringFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OT.Dataset => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OT.DataEntry => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OT.FeatureValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OT.Algorithm => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OT.Parameter => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + + OT.compound => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + OT.feature => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + OT.dataEntry => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + OT.acceptValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + OT.values => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + #XSD.anyUri => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + OT.algorithm => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + OT.parameters => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + + DC.title => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , + DC.identifier => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , + DC.contributor => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , + DC.creator => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , + OT.isA => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , + + OT.hasSource => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , + OT.value => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , + OT.paramScope => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , + OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , + + #Untyped Individual: http://localhost/algorithm + } + + @data_entries = {} + @values_id = 0 + @parameter_id = 0 + + @classes = Set.new + @object_properties = Set.new + @annotation_properties = Set.new + @datatype_properties = Set.new + + @objects = Set.new + end + + def add_compound(uri) + #@classes << OT.Compound unless @classes.include? OT.Compound + @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Compound }] } + end + + def add_feature(uri,metadata) + #@classes << OT.Feature unless @classes.include? OT.Feature + #@classes << OT.NominalFeature unless @classes.include? OT.NominalFeature + #@classes << OT.NumericFeature unless @classes.include? OT.NumericFeature + #@classes << OT.StringFeature unless @classes.include? OT.StringFeature + @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }] } + add_metadata uri, metadata + end + + def add_dataset(dataset) + + @dataset = dataset.uri + + @object[dataset.uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] } + + add_metadata dataset.uri, dataset.metadata + + dataset.compounds.each { |compound| add_compound compound } + + dataset.features.each { |feature,metadata| add_feature feature,metadata } + + dataset.data_entries.each do |compound,entry| + entry.each do |feature,values| + values.each { |value| add_data_entry compound,feature,value } + end + end + + end + + def add_algorithm(uri,metadata,parameters) + @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] } + add_metadata uri, metadata + add_parameters uri, parameters + #metadata.each { |u,v| @object[uri][u] = [{"type" => type(v), "value" => v }] } + end + + def add_model(uri,metadata) + end + + def add_metadata(uri,metadata) + #@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT[type] }] } + metadata.each do |u,v| + @object[uri][u] = [{"type" => type(v), "value" => v }] + end + end + + def add_parameters(uri,parameters) + #@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT[type] }] } + @object[uri][OT.parameters] = [] unless @object[uri][OT.parameters] + parameters.each do |p| + parameter = "_:parameter#{@parameter_id}" + @parameter_id += 1 + @object[uri][OT.parameters] << {"type" => "bnode", "value" => parameter} + @object[parameter] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Parameter }] } + add_metadata parameter, p + end + end + + def add_data_entry(compound,feature,value) + add_compound(compound) unless @object[compound] + add_feature(feature,{}) unless @object[feature] + unless data_entry = @data_entries[compound] + data_entry = "_:dataentry#{@data_entries.size}" + @data_entries[compound] = data_entry + @object[@dataset][OT.dataEntry] = [] unless @object[@dataset][OT.dataEntry] + @object[@dataset][OT.dataEntry] << {"type" => "bnode", "value" => data_entry} + @object[data_entry] = { + RDF["type"] => [{ "type" => "uri", "value" => OT.DataEntry }], + OT.compound => [{ "type" => "uri", "value" => compound }], + OT.values => [], + } + end + values = "_:values#{@values_id}" + @values_id += 1 + @object[data_entry][OT.values] << {"type" => "bnode", "value" => values} + case type(value) + when "uri" + v = [{ "type" => "uri", "value" => value}] + when "literal" + v = [{ "type" => "literal", "value" => value, "datatype" => datatype(value) }] + else + raise "Illegal type #{type(value)} for #{value}." + end + @object[values] = { + RDF["type"] => [{ "type" => "uri", "value" => OT.FeatureValue }], + OT.feature => [{ "type" => "uri", "value" => feature }], + OT.value => v + } + @object[feature][RDF["type"]] << { "type" => "uri", "value" => featuretype(value) } + end + + # Serializers + + def ntriples + + #rdf_types + @triples = Set.new + @object.each do |s,entry| + s = url(s) if type(s) == "uri" + entry.each do |p,objects| + p = url(p) + objects.each do |o| + case o["type"] + when "uri" + o = url(o["value"]) + when "literal" + o = literal(o["value"],datatype(o["value"])) + when "bnode" + o = o["value"] + end + @triples << [s,p,o] + end + end + end + @triples.sort.collect{ |s| s.join(' ').concat(" .") }.join("\n")+"\n" + end + + def rdfxml + Tempfile.open("owl-serializer"){|f| f.write(ntriples); @path = f.path} + `rapper -i ntriples -o rdfxml #{@path}` + end + + def json + #rdf_types + Yajl::Encoder.encode(@object) + end + + # Helpers for type detection + private + + def datatype(value) + if value.is_a? TrueClass or value.is_a? FalseClass + XSD.boolean + elsif value.is_a? Float + XSD.float + else + XSD.string + end + end + + def featuretype(value) + if value.is_a? TrueClass or value.is_a? FalseClass + datatype = OT.NominalFeature + elsif value.is_a? Float + datatype = OT.NumericFeature + else + datatype = OT.StringFeature + end + end + + def type(value) + begin + uri = URI.parse(value) + if uri.class == URI::HTTP or uri.class == URI::HTTPS + "uri" + elsif value.match(/^_/) + "bnode" + else + "literal" + end + rescue + "literal" + end + end + + def literal(value,type) + # concat and << are faster string concatination operators than + + '"'.concat(value.to_s).concat('"^^<').concat(type).concat('>') + end + + def url(uri) + # concat and << are faster string concatination operators than + + '<'.concat(uri).concat('>') + end + + def rdf_types + @classes.each { |c| @object[c] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } } + @object_properties.each { |p| @object[p] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['ObjectProperty'] }] } } + @annotation_properties.each { |a| @object[a] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['AnnotationProperty'] }] } } + @datatype_properties.each { |d| @object[d] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['DatatypeProperty'] }] } } + end + + end + + class Spreadsheets # to avoid nameclash with Spreadsheet gem + + def initialize(dataset) + @rows = [] + @rows << ["SMILES"] + features = dataset.features.keys + @rows.first << features + @rows.first.flatten! + dataset.data_entries.each do |compound,entries| + smiles = Compound.new(compound).smiles + row = Array.new(@rows.first.size) + row[0] = smiles + entries.each do |feature, values| + i = features.index(feature)+1 + values.each do |value| + row[i] = value #TODO overwrites duplicated values + end + end + @rows << row + end + end + + def csv + @rows.collect{|r| r.join(", ")}.join("\n") + end + + def excel + Spreadsheet.client_encoding = 'UTF-8' + book = Spreadsheet::Workbook.new + sheet = book.create_worksheet(:name => '') + sheet.column(0).width = 100 + i = 0 + @rows.each do |row| + row.each do |c| + sheet.row(i).push c + end + i+=1 + end + book + end + + end + + + end +end |