<?xml version="1.0"?>

<!DOCTYPE owl [
  <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">
  <!ENTITY rdfs "http://www.w3.org/2000/01/rdf-schema#">
  <!ENTITY xsd "http://www.w3.org/2001/XMLSchema#">
  <!ENTITY owl "http://www.w3.org/2002/07/owl#">
  <!ENTITY cc "http://web.resource.org/cc/#">
  <!ENTITY event "http://ebiquity.umbc.edu/ontology/event.owl#">
  <!ENTITY person "http://ebiquity.umbc.edu/ontology/person.owl#">
  <!ENTITY assert "http://ebiquity.umbc.edu/ontology/assertion.owl#">]>

<!--
  This ontology document is licensed under the Creative Commons
  Attribution License. To view a copy of this license, visit
  http://creativecommons.org/licenses/by/2.0/ or send a letter to
  Creative Commons, 559 Nathan Abbott Way, Stanford, California
  94305, USA.
-->

<rdf:RDF 
  xmlns:rdf = "&rdf;"
  xmlns:rdfs = "&rdfs;"
  xmlns:xsd = "&xsd;"
  xmlns:owl = "&owl;"
  xmlns:cc = "&cc;"
  xmlns:event = "&event;"
  xmlns:person = "&person;"
  xmlns:assert = "&assert;">
  <event:Event rdf:about="http://ebiquity.umbc.edu/event/html/id/276/Textual-Representations-for-Corpus-Based-Bilingual-Retrieval-">
    <rdfs:label><![CDATA[Textual Representations for Corpus-Based Bilingual Retrieval ]]></rdfs:label>
    <event:title><![CDATA[Textual Representations for Corpus-Based Bilingual Retrieval ]]></event:title>
    <event:speaker><person:Alumnus rdf:about="http://ebiquity.umbc.edu/person/html/Paul/McNamee/"><person:name><![CDATA[Paul  McNamee]]></person:name><rdfs:label><![CDATA[Paul  McNamee]]></rdfs:label></person:Alumnus></event:speaker>
    <event:startDate rdf:datatype="&xsd;dateTime">2008-11-24T09:00:00-05:00</event:startDate>
    <event:endDate rdf:datatype="&xsd;dateTime">2008-11-24T11:30:00-05:00</event:endDate>
    <event:abstract><![CDATA[The traditional approach to information retrieval is based on using
words as the indexing and search terms for documents. One part of this
research investigates alternative methods for representing text,
including a method based on overlapping sequences of characters called
n-gram tokenization. N-grams are studied in depth and one notable
finding is that they achieve a 20% improvement in retrieval
effectiveness over words in certain situations.
<p>
The other focus of this research is improving retrieval performance
when foreign language documents must be searched and translation is
required. In this scenario bilingual dictionaries are often used to
translate user queries; however even among the most commonly spoken
languages, for which large bilingual lexicons exist, dictionary-based
translation suffers from several significant problems. These include:
difficulty handling proper names, which are often missing; issues
related to morphological variation since entries, or query terms, may
not be lemmatized; and, an inability to robustly handle multiword
phrases, especially non-compositional expressions. These problems can
be addressed when translation is accomplished using parallel
collections, sets of documents available in more than one language.
Using parallel texts enables statistical translation of character
n-grams rather than words or stemmed words, and with this technique
highly effective bilingual retrieval performance is obtained.
Translation of multiword expressions is also explored.
<p>
In this dissertation I present an overview of the field of cross-
language information retrieval and then introduce the foundational
concepts in n-gram tokenization and corpus-based translation. Then
monolingual and bilingual experiments on test sets in 13 languages are
described. Analysis of these experiments gives insight into: the
relative efficacy of various tokenization methods; reasons why n-grams
are effective; the utility of automated relevance feedback, in both
monolingual and bilingual contexts; the interplay between tokenization
and translation; and, how translation resource selection and size
influence bilingual retrieval.
]]></event:abstract>
    <event:host><person:Collaborator rdf:about="http://ebiquity.umbc.edu/person/html/Charles/Nicholas/"><person:name><![CDATA[Charles  Nicholas]]></person:name><rdfs:label><![CDATA[Charles  Nicholas]]></rdfs:label></person:Collaborator></event:host>
  </event:Event>

  <rdf:Description rdf:about="">
    <cc:License rdf:resource="http://creativecommons.org/licenses/by/2.0/" />
  </rdf:Description>

</rdf:RDF>

