<?xml version="1.0"?>

<!DOCTYPE owl [
  <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">
  <!ENTITY rdfs "http://www.w3.org/2000/01/rdf-schema#">
  <!ENTITY xsd "http://www.w3.org/2001/XMLSchema#">
  <!ENTITY owl "http://www.w3.org/2002/07/owl#">
  <!ENTITY cc "http://web.resource.org/cc/#">
  <!ENTITY project "http://ebiquity.umbc.edu/ontology/project.owl#">
  <!ENTITY person "http://ebiquity.umbc.edu/ontology/person.owl#">
  <!ENTITY pub "http://ebiquity.umbc.edu/ontology/publication.owl#">
  <!ENTITY assert "http://ebiquity.umbc.edu/ontology/assertion.owl#">]>

<!--
  This ontology document is licensed under the Creative Commons
  Attribution License. To view a copy of this license, visit
  http://creativecommons.org/licenses/by/2.0/ or send a letter to
  Creative Commons, 559 Nathan Abbott Way, Stanford, California
  94305, USA.
-->

<rdf:RDF 
  xmlns:rdf = "&rdf;"
  xmlns:rdfs = "&rdfs;"
  xmlns:xsd = "&xsd;"
  xmlns:owl = "&owl;"
  xmlns:cc = "&cc;"
  xmlns:project = "&project;"
  xmlns:person = "&person;"
  xmlns:pub = "&pub;"
  xmlns:assert = "&assert;">
  <pub:InProceedings rdf:about="http://ebiquity.umbc.edu/paper/html/id/451/The-ICWSM-2009-Spinn3r-Dataset">
    <rdfs:label><![CDATA[The ICWSM 2009 Spinn3r Dataset]]></rdfs:label>
    <pub:title><![CDATA[The ICWSM 2009 Spinn3r Dataset]]></pub:title>
    <pub:publishedOn rdf:datatype="&xsd;dateTime">2009-05-17T00:00:00-05:00</pub:publishedOn>
    <pub:abstract><![CDATA[The dataset, provided by Spinn3r.com, is a set of 44 million blog posts made between August 1st and October 1st, 2008. The post includes the text as syndicated, as well as metadata such as the blog's homepage, timestamps, etc. The data is formatted in XML and is further arranged into tiers approximating to some degree search engine ranking. The total size of the dataset is 142 GB uncompressed, (27 GB compressed). 
This dataset spans a number of big news events (the Olympics; both US presidential nominating conventions; the beginnings of the financial crisis; ...) as well as everything else you might expect to find posted to blogs. 
To get access to the Spinn3r dataset, please download and sign the usage agreement , and email it to dataset-request (at) icwsm.org. Once your form is processed (usually within 1-3 days), you will be sent a URL and password where you can download the collection. 
Here is a sample of blog posts from the collection. The XML format is described on the Spinn3r website.
]]></pub:abstract>
    <pub:note><![CDATA[<a href=]]></pub:note>
    <pub:address><![CDATA[San Jose, CA]]></pub:address>
    <pub:googleKey>iEsSw9XtrUcJ</pub:googleKey>
    <pub:googleCitations>24</pub:googleCitations>
    <pub:tag><![CDATA[icwsm]]></pub:tag>
    <pub:tag><![CDATA[social media]]></pub:tag>
    <pub:tag><![CDATA[data]]></pub:tag>
    <pub:tag><![CDATA[social]]></pub:tag>
    <pub:tag><![CDATA[blog]]></pub:tag>
    <pub:tag><![CDATA[blog]]></pub:tag>
    <pub:booktitle><![CDATA[Third Annual Conference on Weblogs and Social Media (ICWSM 2009)]]></pub:booktitle>
    <pub:publisher><![CDATA[AAAI]]></pub:publisher>
    <pub:author>
       <rdf:List>
         <rdf:first><person:Collaborator rdf:about="http://ebiquity.umbc.edu/person/html/Kevin/Burton/"><person:name><![CDATA[Kevin  Burton]]></person:name><rdfs:label><![CDATA[Kevin  Burton]]></rdfs:label></person:Collaborator></rdf:first>
         <rdf:rest>
           <rdf:List>
             <rdf:first><person:Alumnus rdf:about="http://ebiquity.umbc.edu/person/html/Akshay/Java/"><person:name><![CDATA[Akshay  Java]]></person:name><rdfs:label><![CDATA[Akshay  Java]]></rdfs:label></person:Alumnus></rdf:first>
             <rdf:rest>
               <rdf:List>
                 <rdf:first><person:Collaborator rdf:about="http://ebiquity.umbc.edu/person/html/Ian/Soboroff/"><person:name><![CDATA[Ian  Soboroff]]></person:name><rdfs:label><![CDATA[Ian  Soboroff]]></rdfs:label></person:Collaborator></rdf:first>
                 <rdf:rest rdf:resource="&rdf;nil" />
               </rdf:List>
             </rdf:rest>
           </rdf:List>
         </rdf:rest>
       </rdf:List>
    </pub:author>
    <pub:firstAuthor><person:Collaborator rdf:about="http://ebiquity.umbc.edu/person/html/Kevin/Burton/"><person:name><![CDATA[Kevin  Burton]]></person:name><rdfs:label><![CDATA[Kevin  Burton]]></rdfs:label></person:Collaborator></pub:firstAuthor>
  </pub:InProceedings>

  <rdf:Description rdf:about="">
    <cc:License rdf:resource="http://creativecommons.org/licenses/by/2.0/" />
  </rdf:Description>

</rdf:RDF>

