269 lines
12 KiB
XML
269 lines
12 KiB
XML
<!--
|
|
~ Licensed to the Apache Software Foundation (ASF) under one
|
|
~ or more contributor license agreements. See the NOTICE file
|
|
~ distributed with this work for additional information
|
|
~ regarding copyright ownership. The ASF licenses this file
|
|
~ to you under the Apache License, Version 2.0 (the
|
|
~ "License"); you may not use this file except in compliance
|
|
~ with the License. You may obtain a copy of the License at
|
|
~
|
|
~ http://www.apache.org/licenses/LICENSE-2.0
|
|
~
|
|
~ Unless required by applicable law or agreed to in writing,
|
|
~ software distributed under the License is distributed on an
|
|
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
~ KIND, either express or implied. See the License for the
|
|
~ specific language governing permissions and limitations
|
|
~ under the License.
|
|
-->
|
|
<Storage>
|
|
<!--======================================================================-->
|
|
<!-- Basic Configuration -->
|
|
<!--======================================================================-->
|
|
|
|
<!-- The name of this cluster. This is mainly used to prevent machines in
|
|
one logical cluster from joining any other cluster. -->
|
|
<ClusterName>akka</ClusterName>
|
|
|
|
<!-- Keyspaces and ColumnFamilies
|
|
A ColumnFamily is the Cassandra concept closest to a relational table.
|
|
Keyspaces are separate groups of ColumnFamilies. Except in very
|
|
unusual circumstances you will have one Keyspace per application.
|
|
|
|
There is an implicit keyspace named 'system' for Cassandra internals.
|
|
|
|
The default ColumnSort is Time for standard column families.
|
|
For super column families, specifying ColumnSort is not supported;
|
|
the supercolumns themselves are always name-sorted and their subcolumns
|
|
are always time-sorted.
|
|
-->
|
|
<Keyspaces>
|
|
<Keyspace Name="akka">
|
|
<!-- The fraction of keys per sstable whose locations we
|
|
keep in memory in "mostly LRU" order. (JUST the key
|
|
locations, NOT any column values.)
|
|
|
|
The amount of memory used by the default setting of
|
|
0.01 is comparable to the amount used by the internal
|
|
per-sstable key index. Consider increasing this is
|
|
fine if you have fewer, wider rows. Set to 0 to
|
|
disable entirely.
|
|
-->
|
|
<KeysCachedFraction>0.01</KeysCachedFraction>
|
|
<!--
|
|
The CompareWith attribute tells Cassandra how to sort the columns
|
|
for slicing operations. For backwards compatibility, the default
|
|
is to use AsciiType, which is probably NOT what you want.
|
|
Other options are BytesType, UTF8Type, LexicalUUIDType, TimeUUIDType, and LongType.
|
|
You can also specify the fully-qualified class name to a class
|
|
of your choice implementing org.apache.cassandra.db.marshal.IType.
|
|
|
|
SuperColumns have a similar CompareSubcolumnsWith attribute.
|
|
|
|
ByteType: simple sort by byte value. No validation is performed.
|
|
AsciiType: like BytesType, but validates that the input can be parsed as US-ASCII.
|
|
UTF8Type: A string encoded as UTF8
|
|
LongType: A 64bit long
|
|
LexicalUUIDType: a 128bit UUID, compared lexically (by byte value)
|
|
TimeUUIDType: a 128bit version 1 UUID, compared by timestamp
|
|
|
|
(To get the closest approximation to 0.3-style supercolumns,
|
|
you would use CompareWith=UTF8Type CompareSubcolumnsWith=LongType.)
|
|
|
|
if FlushPeriodInMinutes is configured and positive, it will be
|
|
flushed to disk with that period whether it is dirty or not.
|
|
This is intended for lightly-used columnfamilies so that they
|
|
do not prevent commitlog segments from being purged.
|
|
|
|
-->
|
|
<ColumnFamily CompareWith="UTF8Type" Name="map"/>
|
|
<!-- FIXME: change vector to a super column -->
|
|
<ColumnFamily CompareWith="UTF8Type" Name="vector"/>
|
|
<ColumnFamily CompareWith="UTF8Type" Name="ref"/>
|
|
|
|
<!--ColumnFamily CompareWith="UTF8Type" Name="Standard1" FlushPeriodInMinutes="60"/>
|
|
<ColumnFamily CompareWith="TimeUUIDType" Name="StandardByUUID1"/>
|
|
<ColumnFamily ColumnType="Super" CompareWith="UTF8Type" CompareSubcolumnsWith="UTF8Type" Name="Super1"/-->
|
|
</Keyspace>
|
|
</Keyspaces>
|
|
|
|
<!-- Partitioner: any IPartitioner may be used, including your own
|
|
as long as it is on the classpath. Out of the box,
|
|
Cassandra provides
|
|
org.apache.cassandra.dht.RandomPartitioner,
|
|
org.apache.cassandra.dht.OrderPreservingPartitioner, and
|
|
org.apache.cassandra.dht.CollatingOrderPreservingPartitioner.
|
|
(CollatingOPP colates according to EN,US rules, not naive byte ordering.
|
|
Use this as an example if you need locale-aware collation.)
|
|
Range queries require using OrderPreservingPartitioner or a subclass.
|
|
|
|
Achtung! Changing this parameter requires wiping your data directories,
|
|
since the partitioner can modify the sstable on-disk format.
|
|
-->
|
|
<Partitioner>org.apache.cassandra.dht.RandomPartitioner</Partitioner>
|
|
|
|
<!-- If you are using the OrderPreservingPartitioner and you know your key
|
|
distribution, you can specify the token for this node to use.
|
|
(Keys are sent to the node with the "closest" token, so distributing
|
|
your tokens equally along the key distribution space will spread
|
|
keys evenly across your cluster.) This setting is only checked the
|
|
first time a node is started.
|
|
|
|
This can also be useful with RandomPartitioner to force equal
|
|
spacing of tokens around the hash space, especially for
|
|
clusters with a small number of nodes. -->
|
|
<InitialToken></InitialToken>
|
|
|
|
|
|
<!-- EndPointSnitch: Setting this to the class that implements IEndPointSnitch
|
|
which will see if two endpoints are in the same data center or on the same rack.
|
|
Out of the box, Cassandra provides
|
|
org.apache.cassandra.locator.EndPointSnitch
|
|
-->
|
|
<EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
|
|
|
|
<!-- Strategy: Setting this to the class that implements IReplicaPlacementStrategy
|
|
will change the way the node picker works.
|
|
Out of the box, Cassandra provides
|
|
org.apache.cassandra.locator.RackUnawareStrategy
|
|
org.apache.cassandra.locator.RackAwareStrategy
|
|
(place one replica in a different datacenter, and the
|
|
others on different racks in the same one.)
|
|
-->
|
|
<ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
|
|
|
|
<!-- Number of replicas of the data-->
|
|
<ReplicationFactor>1</ReplicationFactor>
|
|
|
|
<!-- Directories: Specify where Cassandra should store different data on disk
|
|
Keep the data disks and the CommitLog disks separate for best performance
|
|
-->
|
|
<CommitLogDirectory>cassandra/commitlog</CommitLogDirectory>
|
|
<DataFileDirectories>
|
|
<DataFileDirectory>cassandra/data</DataFileDirectory>
|
|
</DataFileDirectories>
|
|
<CalloutLocation>cassandra/callouts</CalloutLocation>
|
|
<BootstrapFileDirectory>cassandra/bootstrap</BootstrapFileDirectory>
|
|
<StagingFileDirectory>cassandra/staging</StagingFileDirectory>
|
|
|
|
|
|
<!-- Addresses of hosts that are deemed contact points. Cassandra nodes use
|
|
this list of hosts to find each other and learn the topology of the ring.
|
|
You must change this if you are running multiple nodes!
|
|
-->
|
|
<Seeds>
|
|
<Seed>127.0.0.1</Seed>
|
|
</Seeds>
|
|
|
|
|
|
<!-- Miscellaneous -->
|
|
|
|
<!-- time to wait for a reply from other nodes before failing the command -->
|
|
<RpcTimeoutInMillis>5000</RpcTimeoutInMillis>
|
|
<!-- size to allow commitlog to grow to before creating a new segment -->
|
|
<CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
|
|
|
|
|
|
<!-- Local hosts and ports -->
|
|
|
|
<!-- Address to bind to and tell other nodes to connect to.
|
|
You _must_ change this if you want multiple nodes to be able
|
|
to communicate!
|
|
|
|
Leaving it blank leaves it up to InetAddress.getLocalHost().
|
|
This will always do the Right Thing *if* the node is properly
|
|
configured (hostname, name resolution, etc), and the Right
|
|
Thing is to use the address associated with the hostname (it
|
|
might not be). -->
|
|
<ListenAddress>localhost</ListenAddress>
|
|
<!-- TCP port, for commands and data -->
|
|
<StoragePort>7000</StoragePort>
|
|
<!-- UDP port, for membership communications (gossip) -->
|
|
<ControlPort>7001</ControlPort>
|
|
|
|
<!-- The address to bind the Thrift RPC service to. Unlike
|
|
ListenAddress above, you *can* specify 0.0.0.0 here if you want
|
|
Thrift to listen on all interfaces.
|
|
|
|
Leaving this blank has the same effect it does for ListenAddress,
|
|
(i.e. it will be based on the configured hostname of the node).
|
|
-->
|
|
<ThriftAddress>localhost</ThriftAddress>
|
|
<!-- Thrift RPC port (the port clients connect to). -->
|
|
<ThriftPort>9160</ThriftPort>
|
|
|
|
|
|
<!--======================================================================-->
|
|
<!-- Memory, Disk, and Performance -->
|
|
<!--======================================================================-->
|
|
|
|
<!-- Buffer size to use when flushing memtables to disk.
|
|
(Only one memtable is ever flushed at a time.)
|
|
Increase (decrease) the index buffer size relative to the data buffer
|
|
if you have few (many) columns per key.
|
|
Bigger is only better _if_ your memtables get large enough to use the space.
|
|
(Check in your data directory after your app has been running long enough.)
|
|
-->
|
|
<FlushDataBufferSizeInMB>32</FlushDataBufferSizeInMB>
|
|
<FlushIndexBufferSizeInMB>8</FlushIndexBufferSizeInMB>
|
|
|
|
<!-- Add column indexes to a row after its contents reach this size.
|
|
Increase if your column values are large, or if you have a very large
|
|
number of columns. The competing causes are, Cassandra has to
|
|
deserialize this much of the row to read a single column, so you
|
|
want it to be small - at least if you do many partial-row reads
|
|
- but all the index data is read for each access, so
|
|
you don't want to generate that wastefully either. -->
|
|
<ColumnIndexSizeInKB>64</ColumnIndexSizeInKB>
|
|
|
|
<!--
|
|
The maximum amount of data to store in memory per ColumnFamily before flushing to
|
|
disk. Note: There is one memtable per column family, and this threshold
|
|
is based solely on the amount of data stored, not actual heap memory
|
|
usage (there is some overhead in indexing the columns).
|
|
-->
|
|
<MemtableSizeInMB>64</MemtableSizeInMB>
|
|
<!--
|
|
The maximum number of columns in millions to store in memory per ColumnFamily
|
|
before flushing to disk. This is also a per-memtable setting.
|
|
Use with MemtableSizeInMB to tune memory usage.
|
|
-->
|
|
<MemtableObjectCountInMillions>0.1</MemtableObjectCountInMillions>
|
|
|
|
<!-- Unlike most systems, in Cassandra writes are faster than
|
|
reads, so you can afford more of those in parallel.
|
|
A good rule of thumb is 2 concurrent reads per processor core.
|
|
Increase ConcurrentWrites to the number of clients writing
|
|
at once if you enable CommitLogSync + CommitLogSyncDelay. -->
|
|
<ConcurrentReads>8</ConcurrentReads>
|
|
<ConcurrentWrites>32</ConcurrentWrites>
|
|
|
|
<!-- Turn on CommitLogSync to improve durability.
|
|
When enabled, Cassandra won't ack writes until the commit log
|
|
has been synced to disk. This is less necessary in Cassandra
|
|
than in traditional databases since replication reduces the
|
|
odds of losing data from a failure after writing the log
|
|
entry but before it actually reaches the disk.
|
|
-->
|
|
<CommitLogSync>false</CommitLogSync>
|
|
<!-- Delay (in microseconds) during which additional commit log
|
|
entries may be written before fsync. This will increase
|
|
latency slightly, but can vastly improve throughput where
|
|
there are many writers. Set to zero to disable
|
|
(each entry will be synced individually).
|
|
Reasonable values range from a minimal 100 to even 10000
|
|
if throughput matters more than latency. (10000us = 10ms
|
|
write latency isn't even that bad by traditional db
|
|
standards.)
|
|
-->
|
|
<CommitLogSyncDelay>1000</CommitLogSyncDelay>
|
|
|
|
|
|
<!-- Time to wait before garbage-collection deletion markers.
|
|
Set this to a large enough value that you are confident
|
|
that the deletion marker will be propagated to all replicas
|
|
by the time this many seconds has elapsed, even in the
|
|
face of hardware failures. The default value is ten days.
|
|
-->
|
|
<GCGraceSeconds>864000</GCGraceSeconds>
|
|
</Storage>
|