mirror of https://github.com/tdwg/dwc.git
920 lines
33 KiB
HTML
920 lines
33 KiB
HTML
<!DOCTYPE html>
|
|
|
|
<html lang="en">
|
|
<head>
|
|
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
|
|
|
<title>Darwin Core Text Guide</title>
|
|
<link href="http://rs.tdwg.org/dwc/" rel="schema.DwC">
|
|
<meta content="Darwin Core Text Guide" name="DC.title">
|
|
<meta content="Guidelines for implementing Darwin Core in Text files." name=
|
|
"DC.description">
|
|
<meta content="biodiversity, standards" name="DC.subject">
|
|
<meta content="Darwin Core Task Group" name="DC.creator">
|
|
<meta content="Tim Robertson (GBIF)" name="DC.contributor">
|
|
<meta content="John Wieczorek (MVZ)" name="DC.contributor">
|
|
<meta content="Markus Döring (GBIF)" name="DC.contributor">
|
|
<meta content="Renato De Giovanni (CRIA)" name="DC.contributor">
|
|
<meta content="Dave Vieglais (KUNHM)" name="DC.contributor">
|
|
<meta content="2009-02-12" name="DC.dateAccepted">
|
|
<meta content="http://rs.tdwg.org/dwc/2009-12-07/terms/guides/text/" name=
|
|
"DC.replaces">
|
|
<meta content="2014-11-08" name="DC.modified">
|
|
<meta content="http://rs.tdwg.org/dwc/2014-11-08/terms/guides/text" name=
|
|
"DC.identifier">
|
|
<meta content="text/html" name="DC.format">
|
|
<meta content="Biodiversity Information Standards TDWG" name="DC.publisher">
|
|
<meta content="http://creativecommons.org/licenses/by/4.0/legalcode" name=
|
|
"DC.license">
|
|
<meta content="public" name="DC.accessRights">
|
|
<meta content="Darwin Core Text Guide. 2014" name="DC.bibliographicCitation">
|
|
<meta content="en" name="DC.language">
|
|
<link href="http://www.tdwg.org/" rel="meta">
|
|
<link href="../../../DarwinCore_files/default.css" rel="stylesheet" type=
|
|
"text/css">
|
|
<script src="../../../DarwinCore_files/default.js" type="text/javascript"></script>
|
|
</head>
|
|
|
|
<body>
|
|
<table border="0" cellpadding="0" cellspacing="0" style=
|
|
"background-color: #617394" width="100%">
|
|
<tbody>
|
|
<tr>
|
|
<td>
|
|
<a href="http://www.tdwg.org"><img alt=
|
|
"Biodiversity Information Standards (TDWG) logo" height="89" src=
|
|
"../../../DarwinCore_files/TDWGlogo_Twiki.gif" width="148"></a>
|
|
</td>
|
|
|
|
<td align="right" height="100" valign="top" width="100%"></td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
|
|
<table border="0" cellpadding="0" cellspacing="0" width="100%">
|
|
<tbody>
|
|
<tr>
|
|
<td style="background-color: #617394" width="15%">
|
|
<div id="menu">
|
|
<div class="topMenu" id="nav">
|
|
<div class="menuItem level1">
|
|
<a href="../../../index.htm" onfocus=
|
|
"blurLink(this);">Introduction</a>
|
|
</div>
|
|
<!-- <div class="menuItem"><a href="../../../index.htm#glossary" onfocus="blurLink(this);">Glossary</a></div> -->
|
|
|
|
<div class="subMenu">
|
|
<div class="menuItem">
|
|
<a href="../../../index.htm#references" onfocus=
|
|
"blurLink(this);">References</a>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="menuItem level1">
|
|
<a href="../../index.htm" onfocus="blurLink(this);">Quick
|
|
Reference Guide</a>
|
|
</div>
|
|
|
|
<div class="menuItem level1">
|
|
<a href="../../simple/index.htm" onfocus=
|
|
"blurLink(this);">Simple Darwin Core</a>
|
|
</div>
|
|
|
|
<div class="separator">
|
|
<hr>
|
|
</div>
|
|
|
|
<div class="menuItem level1">
|
|
<a href="../../namespace/index.htm" onfocus=
|
|
"blurLink(this);">Namespace Policy</a>
|
|
</div>
|
|
|
|
<div class="menuItem level1 highlighted">
|
|
<a href="../text/index.htm" onfocus="blurLink(this);">Text
|
|
Guide</a>
|
|
</div>
|
|
|
|
<div class="subMenu">
|
|
<div class="menuItem">
|
|
<a href="./index.htm#introduction" onfocus=
|
|
"blurLink(this);">Introduction</a>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="subMenu">
|
|
<div class="menuItem">
|
|
<a href="./index.htm#metafile" onfocus=
|
|
"blurLink(this);">Metafile Content</a>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="subMenu">
|
|
<div class="menuItem">
|
|
<a href="./index.htm#implement" onfocus=
|
|
"blurLink(this);">Implementation Guide</a>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="subMenu">
|
|
<div class="menuItem">
|
|
<a href="./index.htm#database" onfocus=
|
|
"blurLink(this);">Database Example</a>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="subMenu">
|
|
<div class="menuItem">
|
|
<a href="./index.htm#tools" onfocus=
|
|
"blurLink(this);">Tools</a>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="menuItem level1">
|
|
<a href="../xml/index.htm" onfocus="blurLink(this);">XML
|
|
Guide</a>
|
|
</div>
|
|
|
|
<div class="separator">
|
|
<hr>
|
|
</div>
|
|
|
|
<div class="menuItem level1">
|
|
<a href="../../history/index.htm" onfocus=
|
|
"blurLink(this);">Complete History</a>
|
|
</div>
|
|
|
|
<div class="menuItem level1">
|
|
<a href="../../history/decisions/index.htm" onfocus=
|
|
"blurLink(this);">Decision History</a>
|
|
</div>
|
|
|
|
<div class="menuItem level1">
|
|
<a href="../../history/dwctoabcd/index.htm" onfocus=
|
|
"blurLink(this);">Mapping to ABCD</a>
|
|
</div>
|
|
|
|
<div class="menuItem level1">
|
|
<a href="../../history/versions/index.htm" onfocus=
|
|
"blurLink(this);">Mapping to Old Versions</a>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</td>
|
|
|
|
<td>
|
|
<center><h1>Darwin Core Text Guide</h1></center>
|
|
|
|
<p></p>
|
|
|
|
<table cellspacing="0" class="docinfo">
|
|
<tbody>
|
|
<tr>
|
|
<th>Title:</th>
|
|
|
|
<td>Darwin Core Text Guide</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<th>Date Issued:</th>
|
|
|
|
<td>2009-02-12</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<th>Date Modified:</th>
|
|
|
|
<td>2014-11-08</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<th>Abstract:</th>
|
|
|
|
<td>Guidelines for implementing Darwin Core in Text files.</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<th>Contributors:</th>
|
|
|
|
<td>Tim Robertson (GBIF), Markus Döring (GBIF), John
|
|
Wieczorek (MVZ), Renato De Giovanni (CRIA), Dave Vieglais
|
|
(KUNHM)</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<th>Legal:</th>
|
|
|
|
<td>This document is governed by the standard legal, copyright,
|
|
licensing provisions and disclaimers issued by the Taxonomic
|
|
Databases Working Group.</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<th>Part of TDWG Standard:</th>
|
|
|
|
<td>
|
|
<a href=
|
|
"http://www.tdwg.org/standards/450/">http://www.tdwg.org/standards/450/</a>
|
|
</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<th>Creator:</th>
|
|
|
|
<td>Darwin Core Task Group</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<th>Identifier:</th>
|
|
|
|
<td>http://rs.tdwg.org/dwc/2014-11-08/terms/guides/text/</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<th>Latest Version:</th>
|
|
|
|
<td>
|
|
<a href=
|
|
"http://rs.tdwg.org/dwc/terms/guides/text/index.htm">http://rs.tdwg.org/dwc/terms/guides/text/</a>
|
|
</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<th>Replaces:</th>
|
|
|
|
<td>
|
|
<a href=
|
|
"http://rs.tdwg.org/dwc/2009-12-07/terms/guides/text/index.htm">
|
|
http://rs.tdwg.org/dwc/2009-12-07/terms/guides/text/</a>
|
|
</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<th>Document Status:</th>
|
|
|
|
<td>Current Standard</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</td>
|
|
</tr>
|
|
</tbody>
|
|
</table><a id="introduction" name="introduction"></a>
|
|
|
|
<h2>1. Introduction</h2><b>Audience</b>: This document is targeted toward
|
|
those who wish to use or share information based on the Darwin Core terms
|
|
using text files. It provides technical details on how to construct these
|
|
files and complementary metadata files that describe their content.
|
|
|
|
<p>This document provides guidelines for formatting and sharing Darwin Core
|
|
terms [<a href="../../index.htm">TERMS</a>] in <em>fielded text</em> formats,
|
|
such as one or more comma separated value (CSV) files. Data conforming to the
|
|
Simple Darwin Core [<a href="../../simple/index.htm">SIMPLEDWC</a>] (CSV
|
|
format and having the first row include Darwin Core standard term names) can
|
|
be shared in a single file, while a non-standard text file can be understood
|
|
using an [<a href="http://www.w3.org/XML/">XML</a>] metafile to describe its
|
|
contents and formatting.<br>
|
|
<img src="images/usage.png"><br>
|
|
More complex structure can be shared in multiple related files. The
|
|
description of content and relationships between files can be achieved using
|
|
the metafile. This guideline makes recommendations for the simple case of a
|
|
<em>core</em> file, upon which Darwin Core <em>records</em> are based, and
|
|
<em>extensions</em> that are linked to records in that core file.
|
|
Specifically, extension records have a <em>many-to-one</em> relationship with
|
|
records in the core file. For example, a core file might contain specimen
|
|
records, with one specimen per row in the file, while an extension file
|
|
contains one or more identifications for those specimens, with one
|
|
identification per row in the extension file, and with an identifier to the
|
|
specimen for each identification row. This example would allow many
|
|
identifications to be associated with each specimen.</p>
|
|
|
|
<h3>1.1 Simple Example Metafile Content</h3>A simple comma separated values
|
|
(CSV) data file with the following content:
|
|
<pre class="example">
|
|
ID,Species,Count
|
|
123,"Cryptantha gypsophila Reveal & C.R. Broome",12
|
|
124,"Buxbaumia piperi",2
|
|
</pre>can be described with the following Darwin Core metafile:
|
|
<pre class="example">
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|
<archive xmlns="http://rs.tdwg.org/dwc/text/"
|
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
|
xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
|
xsi:schemaLocation="http://rs.tdwg.org/dwc/text/ http://rs.tdwg.org/dwc/text/tdwg_dwc_text.xsd">
|
|
<core rowType="http://rs.tdwg.org/dwc/xsd/simpledarwincore/SimpleDarwinRecord" ignoreHeaderLines="1">
|
|
<files>
|
|
<location>http://data.gbif.org/download/specimens.csv</location>
|
|
</files>
|
|
<field index="0" term="http://rs.tdwg.org/dwc/terms/catalogNumber" />
|
|
<field index="1" term="http://rs.tdwg.org/dwc/terms/scientificName" />
|
|
<field index="2" term="http://rs.tdwg.org/dwc/terms/individualCount" />
|
|
<!-- A constant value has no index, but applies to all rows -->
|
|
<field term="http://rs.tdwg.org/dwc/terms/datasetID" default="urn:lsid:tim.lsid.tdwg.org:collections:1"/>
|
|
</core>
|
|
</archive>
|
|
</pre>
|
|
<p>These same data could be understood without the metafile if the first row
|
|
of the CSV file contained the term names:
|
|
<pre class="example">
|
|
type,institutionCode,collectionCode,catalogNumber,scientificName,individualCount,datasetID
|
|
PhysicalObject,ANSP,PH,123,"Cryptantha gypsophila Reveal & C.R. Broome",12,urn:lsid:tim.lsid.tdwg.org:collections:1
|
|
PhysicalObject,ANSP,PH,124,"Buxbaumia piperi",2,urn:lsid:tim.lsid.tdwg.org:collections:1
|
|
</pre>
|
|
|
|
<h3>1.2 XML versus <em>Fielded Text</em></h3>
|
|
|
|
<p>Many resources exist on the web describing the advantages of Extensible
|
|
Markup Language [<a href="http://www.w3.org/XML/">XML</a>] over less
|
|
structured content such as <em>fielded text</em>. The Darwin Core Text Guide
|
|
(this document) is not meant to promote the use of <em>fielded text</em> over
|
|
XML for data exchange, but rather to provide recommendations for how to
|
|
handle such data files when necessary.<br>
|
|
Two scenarios that might benefit from the use of <em>fielded text</em>
|
|
are:</p>
|
|
|
|
<ul>
|
|
<li>The transfer of large numbers of Darwin Core records and related data
|
|
from one database to another. Typically databases are very efficient at
|
|
exporting and importing comma separated text files.</li>
|
|
|
|
<li>The description of legacy data existing in a <em>fielded text</em>
|
|
format, such that it might be automatically understood and loaded into
|
|
another system. It could be that this system would then serve the data in
|
|
another format such as XML.</li>
|
|
</ul>
|
|
<p><a id="metafile" name="metafile"></a>
|
|
|
|
<h2>2. Metafile Content</h2>
|
|
|
|
<p>The text metafile schema [<a href=
|
|
"../../../text/tdwg_dwc_text.xsd">TEXTSCHEMA</a>] provides technical details
|
|
for the structure of a metafile by defining the elements and attributes
|
|
required to describe the contents and relationships between text files. These
|
|
elements and attributes, with descriptions and specifications for their use
|
|
in a metafile, are described in the following table.</p>
|
|
|
|
<h3>2.1 The <archive> element</h3>The <archive> element is the
|
|
container for the list of related files (one core and zero or more
|
|
extensions). The <archive> element has just one attribute,
|
|
<em>metadata</em>.
|
|
|
|
<p></p>Attributes
|
|
|
|
<table class="border">
|
|
<thead>
|
|
<tr>
|
|
<th>Attribute</th>
|
|
|
|
<th>Description</th>
|
|
|
|
<th>Required</th>
|
|
|
|
<th>Default</th>
|
|
</tr>
|
|
</thead>
|
|
|
|
<tbody>
|
|
<tr>
|
|
<td class=""><em>metadata</em></td>
|
|
|
|
<td>Contains a qualified Uniform Resource Locator (URL) defining the
|
|
location of a metadata description of the entire archive. The format of
|
|
the metadata is not prescribed, but a standardized format such as
|
|
Ecological Metadata Language (EML), Federal Geographic Data Committee
|
|
(FGDC), or ISO 19115 family is recommended.</td>
|
|
|
|
<td></td>
|
|
<td>
|
|
</tr>
|
|
</tbody>
|
|
</table>Elements
|
|
|
|
<table class="border">
|
|
<thead>
|
|
<tr>
|
|
<th>Element</th>
|
|
|
|
<th>Description</th>
|
|
</tr>
|
|
</thead>
|
|
|
|
<tbody>
|
|
<tr>
|
|
<td class="">
|
|
<a href="#coreTag"><core></a>
|
|
</td>
|
|
|
|
<td>
|
|
An <archive> must contain exactly one <a href=
|
|
"#coreTag"><core></a> element, representing the data entity
|
|
(the actual file and its column header mappings to Darwin Core terms)
|
|
upon which records are based.<br>
|
|
If extensions are being used, each record in the core data must have
|
|
a unique identifier. The field for this identifier must be specified
|
|
in an explicit <id> field in order to associate extension
|
|
records with the core record.
|
|
</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="">
|
|
<a href="#coreTag"><extension></a>
|
|
</td>
|
|
|
|
<td>
|
|
An <archive> may define zero or more <a href=
|
|
"#coreTag"><extension></a> elements, each representing an
|
|
individual extension entity directly related to the core. In addition
|
|
to the general file attributes described below, every extension
|
|
entity must have an explicit <coreId> field to relate the
|
|
extension record to a row in the core entity. The extension itself
|
|
does not have to have a unique ID field and many rows can point to
|
|
the same core record.
|
|
</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
<p>
|
|
|
|
<h3><a id="coreTag" name="coreTag">2.2 The <core> or <extension>
|
|
element</a></h3>
|
|
|
|
<p></p>Attributes
|
|
|
|
<table class="border">
|
|
<thead>
|
|
<tr>
|
|
<th>Attribute</th>
|
|
|
|
<th>Description</th>
|
|
|
|
<th>Required</th>
|
|
|
|
<th>Default</th>
|
|
</tr>
|
|
</thead>
|
|
|
|
<tbody>
|
|
<tr>
|
|
<td class=""><em>rowType</em></td>
|
|
|
|
<td>
|
|
A Unified Resource Identifier (URI) for the term identifying the
|
|
class of data represented by each row, for example,
|
|
http://rs.tdwg.org/dwc/terms/Occurrence for Occurrence records or
|
|
http://rs.tdwg.org/dwc/terms/Taxon for Taxon records. Additional
|
|
classes may be referenced by URI and defined outside the Darwin Core
|
|
specification. The row type defaults to the ambiguous
|
|
SimpleDarwinRecord. For convenience the URIs for classes defined by
|
|
the Darwin Core are listed below:
|
|
|
|
<dl>
|
|
<dt>Simple Darwin Record</dt>
|
|
|
|
<dd>
|
|
http://rs.tdwg.org/dwc/xsd/simpledarwincore/SimpleDarwinRecord</dd>
|
|
|
|
<dt>Occurrence</dt>
|
|
|
|
<dd>http://rs.tdwg.org/dwc/terms/Occurrence</dd>
|
|
|
|
<dt>Event</dt>
|
|
|
|
<dd>http://rs.tdwg.org/dwc/terms/Event</dd>
|
|
|
|
<dt>Location</dt>
|
|
|
|
<dd>http://purl.org/dc/terms/Location</dd>
|
|
|
|
<dt>GeologicalContext</dt>
|
|
|
|
<dd>http://purl.org/dc/terms/GeologicalContext</dd>
|
|
|
|
<dt>Identification</dt>
|
|
|
|
<dd>http://rs.tdwg.org/dwc/terms/Identification</dd>
|
|
|
|
<dt>Taxon</dt>
|
|
|
|
<dd>http://rs.tdwg.org/dwc/terms/Taxon</dd>
|
|
|
|
<dt>ResourceRelationship</dt>
|
|
|
|
<dd>http://rs.tdwg.org/dwc/terms/ResourceRelationship</dd>
|
|
|
|
<dt>MeasurementOrFact</dt>
|
|
|
|
<dd>http://rs.tdwg.org/dwc/terms/MeasurementOrFact</dd>
|
|
</dl>
|
|
</td>
|
|
|
|
<td>✓</td>
|
|
|
|
<td>http://rs.tdwg.org/dwc/xsd/simpledarwincore/SimpleDarwinRecord</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class=""><em>fieldsTerminatedBy</em></td>
|
|
|
|
<td>Specifies the delimiter between fields. Typical values might be ","
|
|
or "\t" for CSV or Tab files respectively.</td>
|
|
<td>
|
|
|
|
<td>,</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class=""><em>linesTerminatedBy</em></td>
|
|
|
|
<td>Specifies the row separator character.</td>
|
|
<td>
|
|
|
|
<td>\n</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class=""><em>fieldsEnclosedBy</em></td>
|
|
|
|
<td>Specifies the character used to enclose (mark the start and end of)
|
|
each field. CSV files frequently use the double quote character
|
|
("), but the default is no enclosing character. Note that a comma
|
|
separated value file that has commas within the content of any field
|
|
must have an enclosing character.</td>
|
|
<td>
|
|
|
|
<td>"</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class=""><em>encoding</em></td>
|
|
|
|
<td>
|
|
Specifies the <a href=
|
|
"http://en.wikipedia.org/wiki/Character_encoding">character
|
|
encoding</a> for the data file. The encoding is extremely important,
|
|
but often ignored. The most frequently used encodings are:
|
|
|
|
<dl>
|
|
<dt>UTF-8</dt>
|
|
|
|
<dd>8-bit Unicode Transformation Format.</dd>
|
|
|
|
<dt>UTF-16</dt>
|
|
|
|
<dd>16-bit Unicode Transformation Format.</dd>
|
|
|
|
<dt>ISO-8859-1</dt>
|
|
|
|
<dd>Commonly known as Latin-1 and a common default on systems
|
|
configured for a single western European language.</dd>
|
|
|
|
<dt>Windows-1252</dt>
|
|
|
|
<dd>Commonly known as WinLatin and a common default of legacy
|
|
versions of Microsoft Windows based operating systems.</dd>
|
|
</dl>
|
|
</td>
|
|
<td>
|
|
|
|
<td>UTF-8</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class=""><em>ignoreHeaderLines</em></td>
|
|
|
|
<td>Specifies the number lines to ignore from the beginning of the
|
|
file. This can be used to ignore files with column headings or preamble
|
|
comments for example.</td>
|
|
<td>
|
|
|
|
<td>0</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class=""><em>dateFormat</em></td>
|
|
|
|
<td>
|
|
When verbatim dates are consistent in format, this field can be used
|
|
to indicate the format represented. It is recommended to use the
|
|
date, dateTime and time for field formats wherever possible, but
|
|
where verbatim dates are required, a format may be specified here.
|
|
This should be considered a 'hint' for consumers. It is recommended
|
|
that consumers support the minimum combinations of DD MM and YYYY
|
|
with the separators / and -. Examples:
|
|
|
|
<dl>
|
|
<dt>DDMMYYYY</dt>
|
|
|
|
<dd>For dates of the form 21121978</dd>
|
|
|
|
<dt>DD-MM-YYYY</dt>
|
|
|
|
<dd>For dates of the form 21-12-1978</dd>
|
|
|
|
<dt>MMDDYYYY</dt>
|
|
|
|
<dd>For dates of the form 12211978</dd>
|
|
|
|
<dt>MM-DD-YYYY</dt>
|
|
|
|
<dd>For dates of the form 12-21-1978</dd>
|
|
|
|
<dt>YYYYMMDD</dt>
|
|
|
|
<dd>For dates of the form 19781221</dd>
|
|
</dl>
|
|
</td>
|
|
<td>
|
|
|
|
<td>YYYY-MM-DD</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>Elements
|
|
|
|
<table class="border">
|
|
<thead>
|
|
<tr>
|
|
<th>Element</th>
|
|
|
|
<th>Description</th>
|
|
</tr>
|
|
</thead>
|
|
|
|
<tbody>
|
|
<tr>
|
|
<td class="">
|
|
<a href="#files"><files></a>
|
|
</td>
|
|
|
|
<td>
|
|
<a href="#coreTag"><core></a> or <a href=
|
|
"#coreTag"><extension></a> element must contain one <a href=
|
|
"#files"><files></a> element to locate the data being
|
|
described.
|
|
</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class=""><id></td>
|
|
|
|
<td>
|
|
If extensions are being used, the <a href="#coreTag"><core></a>
|
|
must contain an <id> element that indicates the identifier for
|
|
a record.
|
|
</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class=""><coreId></td>
|
|
|
|
<td>
|
|
If extensions are being used, the <a href=
|
|
"#coreTag"><extension></a> element must contain a
|
|
<coreId> element that indicates the column in the extension
|
|
file that contains the core record identifier (the matching
|
|
<id> in the core file).
|
|
</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="">
|
|
<a href="#fieldTag"><field></a>
|
|
</td>
|
|
|
|
<td>
|
|
A <a href="#coreTag"><core></a> or <a href=
|
|
"#coreTag"><extension></a> element must contain one or more
|
|
<a href="#fieldTag"><field></a> elements, each representing a
|
|
'column' in the row.
|
|
</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
<p>
|
|
|
|
<h3><a id="files" name="files">2.3 The <files> element</a></h3>
|
|
|
|
<p>The files element must contain one or more <location> elements, each
|
|
defining where a file resides. Each core or extension entity can be composed
|
|
from one or more files. If an entity has data in more than one file, use the
|
|
<location> element multiple times, once for each file that makes up the
|
|
entity.</p>
|
|
|
|
<p></p>Elements
|
|
|
|
<table class="border">
|
|
<thead>
|
|
<tr>
|
|
<th>Element</th>
|
|
|
|
<th>Description</th>
|
|
</tr>
|
|
</thead>
|
|
|
|
<tbody>
|
|
<tr>
|
|
<td class=""><em>location</em></td>
|
|
|
|
<td>
|
|
Specifies the location of the file being described, which may take
|
|
either of the following forms:
|
|
|
|
<ul>
|
|
<li>A web accessible URL such as
|
|
"http://www.gbif.org/data/specimen.csv" or
|
|
"ftp://ftp.gbif.org/tim/specimen.txt".</li>
|
|
|
|
<li>A filepath relative to the location of the metafile such as
|
|
"specimen.txt","./specimen.txt", "data/specimen.txt".</li>
|
|
</ul>
|
|
</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
<p>
|
|
|
|
<h3><a id="fieldTag" name="fieldTag">2.4 The <field>
|
|
element</a></h3>The field element is used to specify the location and content
|
|
of data within a file. There must be one field element for every term being
|
|
shared for the entity, whether explicitly or through the use of a default
|
|
value for all rows in the file.
|
|
|
|
<p></p>Attributes
|
|
|
|
<table class="border">
|
|
<thead>
|
|
<tr>
|
|
<th>Attribute</th>
|
|
|
|
<th>Description</th>
|
|
|
|
<th>Required</th>
|
|
|
|
<th>Default</th>
|
|
</tr>
|
|
</thead>
|
|
|
|
<tbody>
|
|
<tr>
|
|
<td class=""><em>index</em></td>
|
|
|
|
<td>Specifies the position of the column in the row. The first column
|
|
has an index of 0, the second column 1, etc. If no column index is
|
|
specified, then the term and the default may be used to define a
|
|
constant value for all rows.</td>
|
|
<td>
|
|
<td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class=""><em>term</em></td>
|
|
|
|
<td>A Unified Resource Identifier (URI) for the term represented by
|
|
this field. For example, a field containing the scientific name would
|
|
have term="http://rs.tdwg.org/dwc/terms/scientificName".
|
|
Terms outside of the Darwin Core specification may be used, such as
|
|
those from the Dublin Core Metadata Initative, for example,
|
|
dcterms:modified would be
|
|
term="http://purl.org/dc/terms/modified".</td>
|
|
|
|
<td>✓</td>
|
|
<td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class=""><em>default</em></td>
|
|
|
|
<td>Specifies value to use if one is not supplied for the field in a
|
|
given row. If no index is supplied, the default can be used to define a
|
|
constant for all rows for a field that is not in the data file.</td>
|
|
<td>
|
|
<td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class=""><em>vocabulary</em></td>
|
|
|
|
<td>A Unified Resource Identifier (URI) for a vocabulary that the
|
|
source values for this field are based on. The URI ideally should
|
|
resolve to some machine readable definition like SKOS, RDF or at least
|
|
some simple text or html file often found for ISO or RFC standards. For
|
|
example http://rs.gbif.org/vocabulary/gbif/nomenclatural_code.xml,
|
|
http://www.ietf.org/rfc/rfc3066.txt or
|
|
http://www.iso.org/iso/list-en1-semic-3.txt .</td>
|
|
<td>
|
|
|
|
<td></td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
<p><a id="implement" name="implement"></a>
|
|
|
|
<h2>3. Implementation Guide</h2>
|
|
|
|
<h3>3.1 Extension example</h3>The following example illustrates the use of
|
|
extensions. In this example there are three files in the archive, all of
|
|
which are located in the same directory as the metafile. The whales.txt file
|
|
acts as a core file of Taxon records. The whales.txt file is extended by two
|
|
other files, types.txt and distribution.txt. The types.txt file contains
|
|
records of a type specified in an external definition at
|
|
http://http://rs.gbif.org/terms/1.0/Types and consists of Dublin Core and
|
|
Darwin Core terms, while the distribution.txt file contains records of a type
|
|
specified at http://http://rs.gbif.org/terms/1.0/Distribution and consists of
|
|
Darwin Core terms plus an additional term for threatStatus. Both extension
|
|
files are related to the core file by the taxonNameID fields. Presumably,
|
|
this archive contains information about whale species, type specimen records
|
|
for those species, and lists of countries and the threat status for those
|
|
species.<br>
|
|
<img src="images/extension.png"><br>
|
|
<pre class="example">
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|
<archive xmlns="http://rs.tdwg.org/dwc/text/"
|
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
|
xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
|
xsi:schemaLocation="http://rs.tdwg.org/dwc/text/ http://rs.tdwg.org/dwc/text/tdwg_dwc_text.xsd">
|
|
|
|
<core encoding="UTF-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" ignoreHeaderLines="1"
|
|
rowType="http://rs.tdwg.org/dwc/terms/Taxon">
|
|
<files>
|
|
<location>whales.txt</location>
|
|
</files>
|
|
<id index="0" />
|
|
<field index="0" term="http://rs.tdwg.org/dwc/terms/taxonID" />
|
|
<field index="1" term="http://purl.org/dc/terms/modified" />
|
|
<field index="2" term="http://rs.tdwg.org/dwc/terms/scientificName"/>
|
|
<field index="3" term="http://rs.tdwg.org/dwc/terms/acceptedNameUsageID"/>
|
|
<field index="4" term="http://rs.tdwg.org/dwc/terms/parentNameUsageID"/>
|
|
<field index="5" term="http://rs.tdwg.org/dwc/terms/originalNameUsageID"/>
|
|
</core>
|
|
|
|
<extension encoding="UTF-8" fieldsTerminatedBy="," linesTerminatedBy="\n" fieldsEnclosedBy='"' ignoreHeaderLines="1"
|
|
rowType="http://rs.gbif.org/terms/1.0/Types">
|
|
<files>
|
|
<location>types.csv</location>
|
|
</files>
|
|
<coreId index="0" />
|
|
<field index="1" term="http://purl.org/dc/terms/bibliographicCitation"/>
|
|
<field index="2" term="http://rs.tdwg.org/dwc/terms/catalogNumber"/>
|
|
<field index="3" term="http://rs.tdwg.org/dwc/terms/collectionCode"/>
|
|
<field index="4" term="http://rs.tdwg.org/dwc/terms/institutionCode"/>
|
|
<field index="5" term="http://rs.tdwg.org/dwc/terms/typeStatus"/>
|
|
</extension>
|
|
|
|
<extension encoding="UTF-8" fieldsTerminatedBy="," linesTerminatedBy="\n" fieldsEnclosedBy='"' ignoreHeaderLines="1"
|
|
rowType="http://rs.gbif.org/terms/1.0/Distribution">
|
|
<files>
|
|
<location>distribution.csv</location>
|
|
</files>
|
|
<coreId index="0" />
|
|
<field index="1" term="http://rs.tdwg.org/dwc/terms/countryCode"/>
|
|
<field index="2" term="http://rs.gbif.org/terms/1.0/threatStatus"/>
|
|
<field index="3" term="http://rs.tdwg.org/dwc/terms/occurrenceStatus"/>
|
|
</extension>
|
|
</archive>
|
|
</pre><a id="database" name="database"></a>
|
|
|
|
<h2>4. Database Example</h2>
|
|
|
|
<h3>4.1 MySQL</h3>It is very easy to produce <em>fielded text</em> using the
|
|
<code>SELECT INTO outfile</code> command from MySQL. The encoding of the
|
|
resulting file will depend on the server variables and collations used, and
|
|
might need to be modified before the operation is done. Note that MySQL will
|
|
export NULL values as \N by default. Use the IFNULL() function as shown in
|
|
the following example to avoid this.
|
|
<pre class="example">
|
|
SELECT
|
|
IFNULL(id, ''), IFNULL(scientific_name, ''), IFNULL(count,'')
|
|
INTO outfile '/tmp/dwc.txt'
|
|
FIELDS TERMINATED BY ','
|
|
OPTIONALLY ENCLOSED BY '"'
|
|
LINES TERMINATED BY '\n'
|
|
FROM
|
|
dwc;
|
|
</pre><a id="tools" name="tools"></a>
|
|
|
|
<h2>5. Tools</h2>
|
|
|
|
<p>For tools and applications, including a Java-based application to read
|
|
Darwin Core text archives, see the <i>Darwin Core Tools and Applications</i>
|
|
page [<a href=
|
|
"https://github.com/tdwg/dwc-documentation/blob/master/doc/resources.md">TOOLS</a>].</p>
|
|
<hr>
|
|
<!-- Footer -->
|
|
|
|
<p>Except where otherwise noted, content on this site is licensed under a
|
|
<a href="http://creativecommons.org/licenses/by/4.0/" rel="license">Creative
|
|
Commons Attribution 4.0 International License</a>.</p>
|
|
|
|
<p><a href="http://creativecommons.org/licenses/by/4.0/" rel=
|
|
"license"><img alt="Creative Commons License" id="creative_commons_icon" src=
|
|
"http://i.creativecommons.org/l/by/4.0/88x31.png"></a> Copyright 2011-2014 -
|
|
Biodiversity Information Standards - TDWG - <a href=
|
|
"http://www.tdwg.org/about-tdwg/contact-us/">Contact Us</a><br>
|
|
<!-- The following is used to register Google Analytics for Darwin Core -->
|
|
<script src="http://www.google-analytics.com/urchin.js" type=
|
|
"text/javascript"></script> <script type="text/javascript">
|
|
|
|
_uacct = "UA-386033-4";
|
|
urchinTracker();
|
|
</script></p>
|
|
</body>
|
|
</html> |