mnage uniquement
darcs-hash:20060505083659-72cb0-ebd538f09b6178702c3deccf96fa684a1f80a8d7.gz
This commit is contained in:
parent
80b70a333d
commit
56e9249400
1 changed files with 0 additions and 150 deletions
|
@ -1,69 +1,9 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
sitemap_gen.py example configuration script
|
||||
|
||||
This file specifies a set of sample input parameters for the
|
||||
sitemap_gen.py client.
|
||||
|
||||
You should copy this file into "config.xml" and modify it for
|
||||
your server.
|
||||
|
||||
|
||||
********************************************************* -->
|
||||
|
||||
|
||||
<!-- ** MODIFY **
|
||||
The "site" node describes your basic web site.
|
||||
|
||||
Required attributes:
|
||||
base_url - the top-level URL of the site being mapped
|
||||
store_into - the webserver path to the desired output file.
|
||||
This should end in '.xml' or '.xml.gz'
|
||||
(the script will create this file)
|
||||
|
||||
Optional attributes:
|
||||
verbose - an integer from 0 (quiet) to 3 (noisy) for
|
||||
how much diagnostic output the script gives
|
||||
suppress_search_engine_notify="1"
|
||||
- disables notifying search engines about the new map
|
||||
(same as the "testing" command-line argument.)
|
||||
default_encoding
|
||||
- names a character encoding to use for URLs and
|
||||
file paths. (Example: "UTF-8")
|
||||
-->
|
||||
<site
|
||||
base_url="http://www.crans.org/"
|
||||
store_into="/var/www/site_www/www.xml.gz"
|
||||
verbose="1"
|
||||
>
|
||||
|
||||
<!-- ********************************************************
|
||||
INPUTS
|
||||
|
||||
All the various nodes in this section control where the script
|
||||
looks to find URLs.
|
||||
|
||||
MODIFY or DELETE these entries as appropriate for your server.
|
||||
********************************************************* -->
|
||||
|
||||
<!-- ** MODIFY or DELETE **
|
||||
"url" nodes specify individual URLs to include in the map.
|
||||
|
||||
Required attributes:
|
||||
href - the URL
|
||||
|
||||
Optional attributes:
|
||||
lastmod - timestamp of last modification (ISO8601 format)
|
||||
changefreq - how often content at this URL is usually updated
|
||||
priority - value 0.0 to 1.0 of relative importance in your site
|
||||
<url href="http://www.example.com/stats?q=name" />
|
||||
<url
|
||||
href="http://www.example.com/stats?q=age"
|
||||
lastmod="2004-11-14T01:00:00-07:00"
|
||||
changefreq="yearly"
|
||||
priority="0.3"
|
||||
/>
|
||||
-->
|
||||
<url
|
||||
href="http://www.crans.org/"
|
||||
lastmod="2006-05-04T22:40:00+02:00"
|
||||
|
@ -83,99 +23,9 @@
|
|||
priority="0.5"
|
||||
/>
|
||||
|
||||
<!-- ** MODIFY or DELETE **
|
||||
"urllist" nodes name text files with lists of URLs.
|
||||
An example file "example_urllist.txt" is provided.
|
||||
|
||||
Required attributes:
|
||||
path - path to the file
|
||||
|
||||
Optional attributes:
|
||||
encoding - encoding of the file if not US-ASCII
|
||||
<urllist path="example_urllist.txt" encoding="UTF-8" />
|
||||
-->
|
||||
|
||||
|
||||
<!-- ** MODIFY or DELETE **
|
||||
"directory" nodes tell the script to walk the file system
|
||||
and include all files and directories in the Sitemap.
|
||||
|
||||
Required attributes:
|
||||
path - path to begin walking from
|
||||
url - URL equivalent of that path
|
||||
|
||||
Optional attributes:
|
||||
default_file - name of the index or default file for directory URLs
|
||||
<directory path="/var/www/icons" url="http://www.example.com/images/" />
|
||||
<directory
|
||||
path="/var/www/docroot"
|
||||
url="http://www.example.com/"
|
||||
default_file="index.html"
|
||||
/>
|
||||
-->
|
||||
|
||||
|
||||
<!-- ** MODIFY or DELETE **
|
||||
"accesslog" nodes tell the script to scan webserver log files to
|
||||
extract URLs on your site. Both Common Logfile Format (Apache's default
|
||||
logfile) and Extended Logfile Format (IIS's default logfile) can be read.
|
||||
|
||||
Required attributes:
|
||||
path - path to the file
|
||||
|
||||
Optional attributes:
|
||||
encoding - encoding of the file if not US-ASCII
|
||||
<accesslog path="/etc/httpd/logs/access.log" encoding="UTF-8" />
|
||||
<accesslog path="/etc/httpd/logs/access.log.0" encoding="UTF-8" />
|
||||
<accesslog path="/etc/httpd/logs/access.log.1.gz" encoding="UTF-8" />
|
||||
-->
|
||||
|
||||
|
||||
<!-- ** MODIFY or DELETE **
|
||||
"sitemap" nodes tell the script to scan other Sitemap files. This can
|
||||
be useful to aggregate the results of multiple runs of this script into
|
||||
a single Sitemap.
|
||||
|
||||
Required attributes:
|
||||
path - path to the file
|
||||
-->
|
||||
<sitemap path="/var/www/site_www/www.xml.gz" />
|
||||
|
||||
|
||||
<!-- ********************************************************
|
||||
FILTERS
|
||||
|
||||
Filters specify wild-card patterns that the script compares
|
||||
against all URLs it finds. Filters can be used to exclude
|
||||
certain URLs from your Sitemap, for instance if you have
|
||||
hidden content that you hope the search engines don't find.
|
||||
|
||||
Filters can be either type="wildcard", which means standard
|
||||
path wildcards (* and ?) are used to compare against URLs,
|
||||
or type="regexp", which means regular expressions are used
|
||||
to compare.
|
||||
|
||||
Filters are applied in the order specified in this file.
|
||||
|
||||
An action="drop" filter causes exclusion of matching URLs.
|
||||
An action="pass" filter causes inclusion of matching URLs,
|
||||
shortcutting any other later filters that might also match.
|
||||
If no filter at all matches a URL, the URL will be included.
|
||||
Together you can build up fairly complex rules.
|
||||
|
||||
The default action is "drop".
|
||||
The default type is "wildcard".
|
||||
|
||||
You can MODIFY or DELETE these entries as appropriate for
|
||||
your site. However, unlike above, the example entries in
|
||||
this section are not contrived and may be useful to you as
|
||||
they are.
|
||||
********************************************************* -->
|
||||
|
||||
<!-- Exclude URLs that end with a '~' (IE: emacs backup files) -->
|
||||
<filter action="drop" type="wildcard" pattern="*~" />
|
||||
|
||||
<!-- Exclude URLs within UNIX-style hidden files or directories -->
|
||||
<filter action="drop" type="regexp" pattern="/\.[^/]*" />
|
||||
|
||||
<filter action="drop" type="wildcard" pattern="*?action=fullsearch*" />
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue