pekko/config/storage-conf.xml
2009-10-01 07:49:17 +02:00

320 lines
14 KiB
XML

<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
-->
<Storage>
<!--======================================================================-->
<!-- Basic Configuration -->
<!--======================================================================-->
<!--
~ The name of this cluster. This is mainly used to prevent machines in
~ one logical cluster from joining another.
-->
<ClusterName>Test Cluster</ClusterName>
<!--
~ Keyspaces and ColumnFamilies:
~ A ColumnFamily is the Cassandra concept closest to a relational
~ table. Keyspaces are separate groups of ColumnFamilies. Except in
~ very unusual circumstances you will have one Keyspace per application.
~ There is an implicit keyspace named 'system' for Cassandra internals.
~ The default ColumnSort is Time for standard column families. For
~ super column families, specifying ColumnSort is not supported; the
~ supercolumns themselves are always name-sorted and their subcolumns
~ are always time-sorted.
-->
<Keyspaces>
<Keyspace Name="akka">
<!--
~ The CompareWith attribute tells Cassandra how to sort the columns
~ for slicing operations. The default is BytesType, which is a
~ straightforward lexical comparison of the bytes in each column.
~ Other options are AsciiType, UTF8Type, LexicalUUIDType, TimeUUIDType,
~ and LongType. You can also specify the fully-qualified class
~ name to a class of your choice extending
~ org.apache.cassandra.db.marshal.AbstractType.
~
~ SuperColumns have a similar CompareSubcolumnsWith attribute.
~
~ BytesType: Simple sort by byte value. No validation is performed.
~ AsciiType: Like BytesType, but validates that the input can be
~ parsed as US-ASCII.
~ UTF8Type: A string encoded as UTF8
~ LongType: A 64bit long
~ LexicalUUIDType: A 128bit UUID, compared lexically (by byte value)
~ TimeUUIDType: a 128bit version 1 UUID, compared by timestamp
~
~ (To get the closest approximation to 0.3-style supercolumns, you
~ would use CompareWith=UTF8Type CompareSubcolumnsWith=LongType.)
~ If FlushPeriodInMinutes is configured and positive, it will be
~ flushed to disk with that period whether it is dirty or not. This
~ is intended for lightly-used columnfamilies so that they do not
~ prevent commitlog segments from being purged.
-->
<ColumnFamily CompareWith="UTF8Type" Name="map" />
<!-- FIXME: change vector to a super column -->
<ColumnFamily CompareWith="UTF8Type" Name="vector"/>
<ColumnFamily CompareWith="UTF8Type" Name="ref"/>
<!--ColumnFamily CompareWith="BytesType"
Name="Standard1"
FlushPeriodInMinutes="60"/>
<ColumnFamily CompareWith="UTF8Type" Name="Standard2"/>
<ColumnFamily CompareWith="TimeUUIDType" Name="StandardByUUID1"/>
<ColumnFamily ColumnType="Super"
CompareWith="UTF8Type"
CompareSubcolumnsWith="UTF8Type"
Name="Super1"/-->
</Keyspace>
</Keyspaces>
<!--
~ Partitioner: any IPartitioner may be used, including your own as long
~ as it is on the classpath. Out of the box, Cassandra provides
~ org.apache.cassandra.dht.RandomPartitioner,
~ org.apache.cassandra.dht.OrderPreservingPartitioner, and
~ org.apache.cassandra.dht.CollatingOrderPreservingPartitioner.
~ (CollatingOPP colates according to EN,US rules, not naive byte
~ ordering. Use this as an example if you need locale-aware collation.)
~ Range queries require using an order-preserving partitioner.
~
~ Achtung! Changing this parameter requires wiping your data
~ directories, since the partitioner can modify the sstable on-disk
~ format.
-->
<Partitioner>org.apache.cassandra.dht.RandomPartitioner</Partitioner>
<!--
~ If you are using an order-preserving partitioner and you know your key
~ distribution, you can specify the token for this node to use. (Keys
~ are sent to the node with the "closest" token, so distributing your
~ tokens equally along the key distribution space will spread keys
~ evenly across your cluster.) This setting is only checked the first
~ time a node is started.
~ This can also be useful with RandomPartitioner to force equal spacing
~ of tokens around the hash space, especially for clusters with a small
~ number of nodes.
-->
<InitialToken></InitialToken>
<!--
~ EndPointSnitch: Setting this to the class that implements
~ IEndPointSnitch which will see if two endpoints are in the same data
~ center or on the same rack. Out of the box, Cassandra provides
~ org.apache.cassandra.locator.EndPointSnitch
-->
<EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
<!--
~ Strategy: Setting this to the class that implements
~ IReplicaPlacementStrategy will change the way the node picker works.
~ Out of the box, Cassandra provides
~ org.apache.cassandra.locator.RackUnawareStrategy and
~ org.apache.cassandra.locator.RackAwareStrategy (place one replica in
~ a different datacenter, and the others on different racks in the same
~ one.)
-->
<ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
<!-- Number of replicas of the data -->
<ReplicationFactor>1</ReplicationFactor>
<!--
~ Directories: Specify where Cassandra should store different data on
~ disk. Keep the data disks and the CommitLog disks separate for best
~ performance
-->
<CommitLogDirectory>~/tmp/cassandra/commitlog</CommitLogDirectory>
<DataFileDirectories>
<DataFileDirectory>~/tmp/lib/cassandra/data</DataFileDirectory>
</DataFileDirectories>
<CalloutLocation>~/tmp/cassandra/callouts</CalloutLocation>
<BootstrapFileDirectory>~/tmp/cassandra/bootstrap</BootstrapFileDirectory>
<StagingFileDirectory>~/tmp/cassandra/staging</StagingFileDirectory>
<!--
~ Addresses of hosts that are deemed contact points. Cassandra nodes
~ use this list of hosts to find each other and learn the topology of
~ the ring. You must change this if you are running multiple nodes!
-->
<Seeds>
<Seed>127.0.0.1</Seed>
</Seeds>
<!-- Miscellaneous -->
<!-- Time to wait for a reply from other nodes before failing the command -->
<RpcTimeoutInMillis>5000</RpcTimeoutInMillis>
<!-- Size to allow commitlog to grow to before creating a new segment -->
<CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
<!-- Local hosts and ports -->
<!--
~ Address to bind to and tell other nodes to connect to. You _must_
~ change this if you want multiple nodes to be able to communicate!
~
~ Leaving it blank leaves it up to InetAddress.getLocalHost(). This
~ will always do the Right Thing *if* the node is properly configured
~ (hostname, name resolution, etc), and the Right Thing is to use the
~ address associated with the hostname (it might not be).
-->
<ListenAddress>localhost</ListenAddress>
<!-- TCP port, for commands and data -->
<StoragePort>7000</StoragePort>
<!-- UDP port, for membership communications (gossip) -->
<ControlPort>7001</ControlPort>
<!--
~ The address to bind the Thrift RPC service to. Unlike ListenAddress
~ above, you *can* specify 0.0.0.0 here if you want Thrift to listen on
~ all interfaces.
~
~ Leaving this blank has the same effect it does for ListenAddress,
~ (i.e. it will be based on the configured hostname of the node).
-->
<ThriftAddress>localhost</ThriftAddress>
<!-- Thrift RPC port (the port clients connect to). -->
<ThriftPort>9160</ThriftPort>
<!--
~ Whether or not to use a framed transport for Thrift. If this option
~ is set to true then you must also use a framed transport on the
~ client-side, (framed and non-framed transports are not compatible).
-->
<ThriftFramedTransport>false</ThriftFramedTransport>
<!--======================================================================-->
<!-- Memory, Disk, and Performance -->
<!--======================================================================-->
<!--
~ Buffer size to use when performing contiguous column slices. Increase
~ this to the size of the column slices you typically perform.
~ (Name-based queries are performed with a buffer size of
~ ColumnIndexSizeInKB.)
-->
<SlicedBufferSizeInKB>64</SlicedBufferSizeInKB>
<!--
~ Buffer size to use when flushing memtables to disk. (Only one
~ memtable is ever flushed at a time.) Increase (decrease) the index
~ buffer size relative to the data buffer if you have few (many)
~ columns per key. Bigger is only better _if_ your memtables get large
~ enough to use the space. (Check in your data directory after your
~ app has been running long enough.) -->
<FlushDataBufferSizeInMB>32</FlushDataBufferSizeInMB>
<FlushIndexBufferSizeInMB>8</FlushIndexBufferSizeInMB>
<!--
~ Add column indexes to a row after its contents reach this size.
~ Increase if your column values are large, or if you have a very large
~ number of columns. The competing causes are, Cassandra has to
~ deserialize this much of the row to read a single column, so you want
~ it to be small - at least if you do many partial-row reads - but all
~ the index data is read for each access, so you don't want to generate
~ that wastefully either.
-->
<ColumnIndexSizeInKB>64</ColumnIndexSizeInKB>
<!--
~ The maximum amount of data to store in memory per ColumnFamily before
~ flushing to disk. Note: There is one memtable per column family, and
~ this threshold is based solely on the amount of data stored, not
~ actual heap memory usage (there is some overhead in indexing the
~ columns).
-->
<MemtableSizeInMB>64</MemtableSizeInMB>
<!--
~ The maximum number of columns in millions to store in memory per
~ ColumnFamily before flushing to disk. This is also a per-memtable
~ setting. Use with MemtableSizeInMB to tune memory usage.
-->
<MemtableObjectCountInMillions>0.1</MemtableObjectCountInMillions>
<!--
~ Unlike most systems, in Cassandra writes are faster than reads, so
~ you can afford more of those in parallel. A good rule of thumb is 2
~ concurrent reads per processor core. Increase ConcurrentWrites to
~ the number of clients writing at once if you enable CommitLogSync +
~ CommitLogSyncDelay. -->
<ConcurrentReads>8</ConcurrentReads>
<ConcurrentWrites>32</ConcurrentWrites>
<!--
~ CommitLogSync may be either "periodic" or "batch." When in batch
~ mode, Cassandra won't ack writes until the commit log has been
~ fsynced to disk. It will wait up to CommitLogSyncBatchWindowInMS
~ milliseconds for other writes, before performing the sync.
~ This is less necessary in Cassandra than in traditional databases
~ since replication reduces the odds of losing data from a failure
~ after writing the log entry but before it actually reaches the disk.
~ So the other option is "timed," where writes may be acked immediately
~ and the CommitLog is simply synced every CommitLogSyncPeriodInMS
~ milliseconds.
-->
<CommitLogSync>periodic</CommitLogSync>
<!--
~ Interval at which to perform syncs of the CommitLog in periodic mode.
~ Usually the default of 1000ms is fine; increase it only if the
~ CommitLog PendingTasks backlog in jmx shows that you are frequently
~ scheduling a second sync while the first has not yet been processed.
-->
<CommitLogSyncPeriodInMS>1000</CommitLogSyncPeriodInMS>
<!--
~ Delay (in milliseconds) during which additional commit log entries
~ may be written before fsync in batch mode. This will increase
~ latency slightly, but can vastly improve throughput where there are
~ many writers. Set to zero to disable (each entry will be synced
~ individually). Reasonable values range from a minimal 0.1 to 10 or
~ even more if throughput matters more than latency.
-->
<!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> -->
<!--
~ Time to wait before garbage-collection deletion markers. Set this to
~ a large enough value that you are confident that the deletion marker
~ will be propagated to all replicas by the time this many seconds has
~ elapsed, even in the face of hardware failures. The default value is
~ ten days.
-->
<GCGraceSeconds>864000</GCGraceSeconds>
<!--
~ Number of threads to run when flushing memtables to disk. Set this to
~ the number of disks you physically have in your machine allocated for DataDirectory * 2.
~ If you are planning to use the Binary Memtable, its recommended to increase the max threads
~ to maintain a higher quality of service while under load when normal memtables are flushing to disk.
-->
<FlushMinThreads>1</FlushMinThreads>
<FlushMaxThreads>1</FlushMaxThreads>
<!--
~ The threshold size in megabytes the binary memtable must grow to, before it's submitted for flushing to disk.
-->
<BinaryMemtableSizeInMB>256</BinaryMemtableSizeInMB>
</Storage>