mnage uniquement
darcs-hash:20060505083659-72cb0-ebd538f09b6178702c3deccf96fa684a1f80a8d7.gz
This commit is contained in:
parent
80b70a333d
commit
56e9249400
1 changed files with 0 additions and 150 deletions
|
@ -1,69 +1,9 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<!--
|
|
||||||
sitemap_gen.py example configuration script
|
|
||||||
|
|
||||||
This file specifies a set of sample input parameters for the
|
|
||||||
sitemap_gen.py client.
|
|
||||||
|
|
||||||
You should copy this file into "config.xml" and modify it for
|
|
||||||
your server.
|
|
||||||
|
|
||||||
|
|
||||||
********************************************************* -->
|
|
||||||
|
|
||||||
|
|
||||||
<!-- ** MODIFY **
|
|
||||||
The "site" node describes your basic web site.
|
|
||||||
|
|
||||||
Required attributes:
|
|
||||||
base_url - the top-level URL of the site being mapped
|
|
||||||
store_into - the webserver path to the desired output file.
|
|
||||||
This should end in '.xml' or '.xml.gz'
|
|
||||||
(the script will create this file)
|
|
||||||
|
|
||||||
Optional attributes:
|
|
||||||
verbose - an integer from 0 (quiet) to 3 (noisy) for
|
|
||||||
how much diagnostic output the script gives
|
|
||||||
suppress_search_engine_notify="1"
|
|
||||||
- disables notifying search engines about the new map
|
|
||||||
(same as the "testing" command-line argument.)
|
|
||||||
default_encoding
|
|
||||||
- names a character encoding to use for URLs and
|
|
||||||
file paths. (Example: "UTF-8")
|
|
||||||
-->
|
|
||||||
<site
|
<site
|
||||||
base_url="http://www.crans.org/"
|
base_url="http://www.crans.org/"
|
||||||
store_into="/var/www/site_www/www.xml.gz"
|
store_into="/var/www/site_www/www.xml.gz"
|
||||||
verbose="1"
|
verbose="1"
|
||||||
>
|
>
|
||||||
|
|
||||||
<!-- ********************************************************
|
|
||||||
INPUTS
|
|
||||||
|
|
||||||
All the various nodes in this section control where the script
|
|
||||||
looks to find URLs.
|
|
||||||
|
|
||||||
MODIFY or DELETE these entries as appropriate for your server.
|
|
||||||
********************************************************* -->
|
|
||||||
|
|
||||||
<!-- ** MODIFY or DELETE **
|
|
||||||
"url" nodes specify individual URLs to include in the map.
|
|
||||||
|
|
||||||
Required attributes:
|
|
||||||
href - the URL
|
|
||||||
|
|
||||||
Optional attributes:
|
|
||||||
lastmod - timestamp of last modification (ISO8601 format)
|
|
||||||
changefreq - how often content at this URL is usually updated
|
|
||||||
priority - value 0.0 to 1.0 of relative importance in your site
|
|
||||||
<url href="http://www.example.com/stats?q=name" />
|
|
||||||
<url
|
|
||||||
href="http://www.example.com/stats?q=age"
|
|
||||||
lastmod="2004-11-14T01:00:00-07:00"
|
|
||||||
changefreq="yearly"
|
|
||||||
priority="0.3"
|
|
||||||
/>
|
|
||||||
-->
|
|
||||||
<url
|
<url
|
||||||
href="http://www.crans.org/"
|
href="http://www.crans.org/"
|
||||||
lastmod="2006-05-04T22:40:00+02:00"
|
lastmod="2006-05-04T22:40:00+02:00"
|
||||||
|
@ -83,99 +23,9 @@
|
||||||
priority="0.5"
|
priority="0.5"
|
||||||
/>
|
/>
|
||||||
|
|
||||||
<!-- ** MODIFY or DELETE **
|
|
||||||
"urllist" nodes name text files with lists of URLs.
|
|
||||||
An example file "example_urllist.txt" is provided.
|
|
||||||
|
|
||||||
Required attributes:
|
|
||||||
path - path to the file
|
|
||||||
|
|
||||||
Optional attributes:
|
|
||||||
encoding - encoding of the file if not US-ASCII
|
|
||||||
<urllist path="example_urllist.txt" encoding="UTF-8" />
|
|
||||||
-->
|
|
||||||
|
|
||||||
|
|
||||||
<!-- ** MODIFY or DELETE **
|
|
||||||
"directory" nodes tell the script to walk the file system
|
|
||||||
and include all files and directories in the Sitemap.
|
|
||||||
|
|
||||||
Required attributes:
|
|
||||||
path - path to begin walking from
|
|
||||||
url - URL equivalent of that path
|
|
||||||
|
|
||||||
Optional attributes:
|
|
||||||
default_file - name of the index or default file for directory URLs
|
|
||||||
<directory path="/var/www/icons" url="http://www.example.com/images/" />
|
|
||||||
<directory
|
|
||||||
path="/var/www/docroot"
|
|
||||||
url="http://www.example.com/"
|
|
||||||
default_file="index.html"
|
|
||||||
/>
|
|
||||||
-->
|
|
||||||
|
|
||||||
|
|
||||||
<!-- ** MODIFY or DELETE **
|
|
||||||
"accesslog" nodes tell the script to scan webserver log files to
|
|
||||||
extract URLs on your site. Both Common Logfile Format (Apache's default
|
|
||||||
logfile) and Extended Logfile Format (IIS's default logfile) can be read.
|
|
||||||
|
|
||||||
Required attributes:
|
|
||||||
path - path to the file
|
|
||||||
|
|
||||||
Optional attributes:
|
|
||||||
encoding - encoding of the file if not US-ASCII
|
|
||||||
<accesslog path="/etc/httpd/logs/access.log" encoding="UTF-8" />
|
|
||||||
<accesslog path="/etc/httpd/logs/access.log.0" encoding="UTF-8" />
|
|
||||||
<accesslog path="/etc/httpd/logs/access.log.1.gz" encoding="UTF-8" />
|
|
||||||
-->
|
|
||||||
|
|
||||||
|
|
||||||
<!-- ** MODIFY or DELETE **
|
|
||||||
"sitemap" nodes tell the script to scan other Sitemap files. This can
|
|
||||||
be useful to aggregate the results of multiple runs of this script into
|
|
||||||
a single Sitemap.
|
|
||||||
|
|
||||||
Required attributes:
|
|
||||||
path - path to the file
|
|
||||||
-->
|
|
||||||
<sitemap path="/var/www/site_www/www.xml.gz" />
|
<sitemap path="/var/www/site_www/www.xml.gz" />
|
||||||
|
|
||||||
|
|
||||||
<!-- ********************************************************
|
|
||||||
FILTERS
|
|
||||||
|
|
||||||
Filters specify wild-card patterns that the script compares
|
|
||||||
against all URLs it finds. Filters can be used to exclude
|
|
||||||
certain URLs from your Sitemap, for instance if you have
|
|
||||||
hidden content that you hope the search engines don't find.
|
|
||||||
|
|
||||||
Filters can be either type="wildcard", which means standard
|
|
||||||
path wildcards (* and ?) are used to compare against URLs,
|
|
||||||
or type="regexp", which means regular expressions are used
|
|
||||||
to compare.
|
|
||||||
|
|
||||||
Filters are applied in the order specified in this file.
|
|
||||||
|
|
||||||
An action="drop" filter causes exclusion of matching URLs.
|
|
||||||
An action="pass" filter causes inclusion of matching URLs,
|
|
||||||
shortcutting any other later filters that might also match.
|
|
||||||
If no filter at all matches a URL, the URL will be included.
|
|
||||||
Together you can build up fairly complex rules.
|
|
||||||
|
|
||||||
The default action is "drop".
|
|
||||||
The default type is "wildcard".
|
|
||||||
|
|
||||||
You can MODIFY or DELETE these entries as appropriate for
|
|
||||||
your site. However, unlike above, the example entries in
|
|
||||||
this section are not contrived and may be useful to you as
|
|
||||||
they are.
|
|
||||||
********************************************************* -->
|
|
||||||
|
|
||||||
<!-- Exclude URLs that end with a '~' (IE: emacs backup files) -->
|
|
||||||
<filter action="drop" type="wildcard" pattern="*~" />
|
<filter action="drop" type="wildcard" pattern="*~" />
|
||||||
|
|
||||||
<!-- Exclude URLs within UNIX-style hidden files or directories -->
|
|
||||||
<filter action="drop" type="regexp" pattern="/\.[^/]*" />
|
<filter action="drop" type="regexp" pattern="/\.[^/]*" />
|
||||||
|
|
||||||
<filter action="drop" type="wildcard" pattern="*?action=fullsearch*" />
|
<filter action="drop" type="wildcard" pattern="*?action=fullsearch*" />
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue