392 lines
17 KiB
XML
392 lines
17 KiB
XML
<!--
|
|
~ Licensed to the Apache Software Foundation (ASF) under one
|
|
~ or more contributor license agreements. See the NOTICE file
|
|
~ distributed with this work for additional information
|
|
~ regarding copyright ownership. The ASF licenses this file
|
|
~ to you under the Apache License, Version 2.0 (the
|
|
~ "License"); you may not use this file except in compliance
|
|
~ with the License. You may obtain a copy of the License at
|
|
~
|
|
~ http://www.apache.org/licenses/LICENSE-2.0
|
|
~
|
|
~ Unless required by applicable law or agreed to in writing,
|
|
~ software distributed under the License is distributed on an
|
|
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
~ KIND, either express or implied. See the License for the
|
|
~ specific language governing permissions and limitations
|
|
~ under the License.
|
|
-->
|
|
<Storage>
|
|
<!--======================================================================-->
|
|
<!-- Basic Configuration -->
|
|
<!--======================================================================-->
|
|
|
|
<!--
|
|
~ The name of this cluster. This is mainly used to prevent machines in
|
|
~ one logical cluster from joining another.
|
|
-->
|
|
<ClusterName>akka</ClusterName>
|
|
|
|
<!--
|
|
~ Turn on to make new [non-seed] nodes automatically migrate the right data
|
|
~ to themselves. (If no InitialToken is specified, they will pick one
|
|
~ such that they will get half the range of the most-loaded node.)
|
|
~ If a node starts up without bootstrapping, it will mark itself bootstrapped
|
|
~ so that you can't subsequently accidently bootstrap a node with
|
|
~ data on it. (You can reset this by wiping your data and commitlog
|
|
~ directories.)
|
|
~
|
|
~ Off by default so that new clusters and upgraders from 0.4 don't
|
|
~ bootstrap immediately. You should turn this on when you start adding
|
|
~ new nodes to a cluster that already has data on it. (If you are upgrading
|
|
~ from 0.4, start your cluster with it off once before changing it to true.
|
|
~ Otherwise, no data will be lost but you will incur a lot of unnecessary
|
|
~ I/O before your cluster starts up.)
|
|
-->
|
|
<AutoBootstrap>false</AutoBootstrap>
|
|
|
|
<!--
|
|
~ Keyspaces and ColumnFamilies:
|
|
~ A ColumnFamily is the Cassandra concept closest to a relational
|
|
~ table. Keyspaces are separate groups of ColumnFamilies. Except in
|
|
~ very unusual circumstances you will have one Keyspace per application.
|
|
|
|
~ There is an implicit keyspace named 'system' for Cassandra internals.
|
|
-->
|
|
<Keyspaces>
|
|
<Keyspace Name="akka">
|
|
<!--
|
|
~ ColumnFamily definitions have one required attribute (Name)
|
|
~ and several optional ones.
|
|
~
|
|
~ The CompareWith attribute tells Cassandra how to sort the columns
|
|
~ for slicing operations. The default is BytesType, which is a
|
|
~ straightforward lexical comparison of the bytes in each column.
|
|
~ Other options are AsciiType, UTF8Type, LexicalUUIDType, TimeUUIDType,
|
|
~ and LongType. You can also specify the fully-qualified class
|
|
~ name to a class of your choice extending
|
|
~ org.apache.cassandra.db.marshal.AbstractType.
|
|
~
|
|
~ SuperColumns have a similar CompareSubcolumnsWith attribute.
|
|
~
|
|
~ BytesType: Simple sort by byte value. No validation is performed.
|
|
~ AsciiType: Like BytesType, but validates that the input can be
|
|
~ parsed as US-ASCII.
|
|
~ UTF8Type: A string encoded as UTF8
|
|
~ LongType: A 64bit long
|
|
~ LexicalUUIDType: A 128bit UUID, compared lexically (by byte value)
|
|
~ TimeUUIDType: a 128bit version 1 UUID, compared by timestamp
|
|
~
|
|
~ (To get the closest approximation to 0.3-style supercolumns, you
|
|
~ would use CompareWith=UTF8Type CompareSubcolumnsWith=LongType.)
|
|
~
|
|
~ An optional `Comment` attribute may be used to attach additional
|
|
~ human-readable information about the column family to its definition.
|
|
~
|
|
~ The optional KeysCached attribute specifies
|
|
~ the number of keys per sstable whose locations we keep in
|
|
~ memory in "mostly LRU" order. (JUST the key locations, NOT any
|
|
~ column values.) Specify a fraction (value less than 1), a percentage
|
|
~ (ending in a % sign) or an absolute number of keys to cache.
|
|
~ KeysCached defaults to 200000 keys.
|
|
~
|
|
~ The optional RowsCached attribute specifies the number of rows
|
|
~ whose entire contents we cache in memory. Do not use this on
|
|
~ ColumnFamilies with large rows, or ColumnFamilies with high write:read
|
|
~ ratios. Specify a fraction (value less than 1), a percentage (ending in
|
|
~ a % sign) or an absolute number of rows to cache.
|
|
~ RowsCached defaults to 0, i.e., row cache is off by default.
|
|
~
|
|
~ Remember, when using caches as a percentage, they WILL grow with
|
|
~ your data set!
|
|
-->
|
|
<ColumnFamily Name="map"
|
|
CompareWith="UTF8Type"
|
|
KeysCached="100%" />
|
|
<!-- FIXME: change vector to a super column -->
|
|
<ColumnFamily Name="vector"
|
|
CompareWith="UTF8Type"
|
|
KeysCached="100%" />
|
|
<ColumnFamily Name="ref"
|
|
CompareWith="UTF8Type"
|
|
KeysCached="100%" />
|
|
|
|
<!--ColumnFamily Name="Standard1" CompareWith="BytesType"/>
|
|
<ColumnFamily Name="Standard2"
|
|
CompareWith="UTF8Type"
|
|
KeysCached="100%"/>
|
|
<ColumnFamily Name="StandardByUUID1" CompareWith="TimeUUIDType" />
|
|
<ColumnFamily Name="Super1"
|
|
ColumnType="Super"
|
|
CompareWith="BytesType"
|
|
CompareSubcolumnsWith="BytesType" />
|
|
<ColumnFamily Name="Super2"
|
|
ColumnType="Super"
|
|
CompareWith="UTF8Type"
|
|
CompareSubcolumnsWith="UTF8Type"
|
|
RowsCached="10000"
|
|
KeysCached="50%"
|
|
Comment="A column family with supercolumns, whose column and subcolumn names are UTF8 strings"/-->
|
|
|
|
<!--
|
|
~ Strategy: Setting this to the class that implements
|
|
~ IReplicaPlacementStrategy will change the way the node picker works.
|
|
~ Out of the box, Cassandra provides
|
|
~ org.apache.cassandra.locator.RackUnawareStrategy and
|
|
~ org.apache.cassandra.locator.RackAwareStrategy (place one replica in
|
|
~ a different datacenter, and the others on different racks in the same
|
|
~ one.)
|
|
-->
|
|
<ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
|
|
|
|
<!-- Number of replicas of the data -->
|
|
<ReplicationFactor>1</ReplicationFactor>
|
|
|
|
<!--
|
|
~ EndPointSnitch: Setting this to the class that implements
|
|
~ AbstractEndpointSnitch, which lets Cassandra know enough
|
|
~ about your network topology to route requests efficiently.
|
|
~ Out of the box, Cassandra provides org.apache.cassandra.locator.EndPointSnitch,
|
|
~ and PropertyFileEndPointSnitch is available in contrib/.
|
|
-->
|
|
<EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
|
|
|
|
</Keyspace>
|
|
</Keyspaces>
|
|
|
|
<!--
|
|
~ Authenticator: any IAuthenticator may be used, including your own as long
|
|
~ as it is on the classpath. Out of the box, Cassandra provides
|
|
~ org.apache.cassandra.auth.AllowAllAuthenticator and,
|
|
~ org.apache.cassandra.auth.SimpleAuthenticator
|
|
~ (SimpleAuthenticator uses access.properties and passwd.properties by
|
|
~ default).
|
|
~
|
|
~ If you don't specify an authenticator, AllowAllAuthenticator is used.
|
|
-->
|
|
<Authenticator>org.apache.cassandra.auth.AllowAllAuthenticator</Authenticator>
|
|
|
|
<!--
|
|
~ Partitioner: any IPartitioner may be used, including your own as long
|
|
~ as it is on the classpath. Out of the box, Cassandra provides
|
|
~ org.apache.cassandra.dht.RandomPartitioner,
|
|
~ org.apache.cassandra.dht.OrderPreservingPartitioner, and
|
|
~ org.apache.cassandra.dht.CollatingOrderPreservingPartitioner.
|
|
~ (CollatingOPP colates according to EN,US rules, not naive byte
|
|
~ ordering. Use this as an example if you need locale-aware collation.)
|
|
~ Range queries require using an order-preserving partitioner.
|
|
~
|
|
~ Achtung! Changing this parameter requires wiping your data
|
|
~ directories, since the partitioner can modify the sstable on-disk
|
|
~ format.
|
|
-->
|
|
<Partitioner>org.apache.cassandra.dht.RandomPartitioner</Partitioner>
|
|
|
|
<!--
|
|
~ If you are using an order-preserving partitioner and you know your key
|
|
~ distribution, you can specify the token for this node to use. (Keys
|
|
~ are sent to the node with the "closest" token, so distributing your
|
|
~ tokens equally along the key distribution space will spread keys
|
|
~ evenly across your cluster.) This setting is only checked the first
|
|
~ time a node is started.
|
|
|
|
~ This can also be useful with RandomPartitioner to force equal spacing
|
|
~ of tokens around the hash space, especially for clusters with a small
|
|
~ number of nodes.
|
|
-->
|
|
<InitialToken></InitialToken>
|
|
|
|
<!--
|
|
~ Directories: Specify where Cassandra should store different data on
|
|
~ disk. Keep the data disks and the CommitLog disks separate for best
|
|
~ performance
|
|
-->
|
|
<CommitLogDirectory>cassandra/commitlog</CommitLogDirectory>
|
|
<DataFileDirectories>
|
|
<DataFileDirectory>cassandra/data</DataFileDirectory>
|
|
</DataFileDirectories>
|
|
|
|
|
|
<!--
|
|
~ Addresses of hosts that are deemed contact points. Cassandra nodes
|
|
~ use this list of hosts to find each other and learn the topology of
|
|
~ the ring. You must change this if you are running multiple nodes!
|
|
-->
|
|
<Seeds>
|
|
<Seed>127.0.0.1</Seed>
|
|
</Seeds>
|
|
|
|
|
|
<!-- Miscellaneous -->
|
|
|
|
<!-- Time to wait for a reply from other nodes before failing the command -->
|
|
<RpcTimeoutInMillis>10000</RpcTimeoutInMillis>
|
|
<!-- Size to allow commitlog to grow to before creating a new segment -->
|
|
<CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
|
|
|
|
|
|
<!-- Local hosts and ports -->
|
|
|
|
<!--
|
|
~ Address to bind to and tell other nodes to connect to. You _must_
|
|
~ change this if you want multiple nodes to be able to communicate!
|
|
~
|
|
~ Leaving it blank leaves it up to InetAddress.getLocalHost(). This
|
|
~ will always do the Right Thing *if* the node is properly configured
|
|
~ (hostname, name resolution, etc), and the Right Thing is to use the
|
|
~ address associated with the hostname (it might not be).
|
|
-->
|
|
<ListenAddress>localhost</ListenAddress>
|
|
<!-- internal communications port -->
|
|
<StoragePort>7000</StoragePort>
|
|
|
|
<!--
|
|
~ The address to bind the Thrift RPC service to. Unlike ListenAddress
|
|
~ above, you *can* specify 0.0.0.0 here if you want Thrift to listen on
|
|
~ all interfaces.
|
|
~
|
|
~ Leaving this blank has the same effect it does for ListenAddress,
|
|
~ (i.e. it will be based on the configured hostname of the node).
|
|
-->
|
|
<ThriftAddress>localhost</ThriftAddress>
|
|
<!-- Thrift RPC port (the port clients connect to). -->
|
|
<ThriftPort>9160</ThriftPort>
|
|
<!--
|
|
~ Whether or not to use a framed transport for Thrift. If this option
|
|
~ is set to true then you must also use a framed transport on the
|
|
~ client-side, (framed and non-framed transports are not compatible).
|
|
-->
|
|
<ThriftFramedTransport>false</ThriftFramedTransport>
|
|
|
|
|
|
<!--======================================================================-->
|
|
<!-- Memory, Disk, and Performance -->
|
|
<!--======================================================================-->
|
|
|
|
<!--
|
|
~ Access mode. mmapped i/o is substantially faster, but only practical on
|
|
~ a 64bit machine (which notably does not include EC2 "small" instances)
|
|
~ or relatively small datasets. "auto", the safe choice, will enable
|
|
~ mmapping on a 64bit JVM. Other values are "mmap", "mmap_index_only"
|
|
~ (which may allow you to get part of the benefits of mmap on a 32bit
|
|
~ machine by mmapping only index files) and "standard".
|
|
~ (The buffer size settings that follow only apply to standard,
|
|
~ non-mmapped i/o.)
|
|
-->
|
|
<DiskAccessMode>auto</DiskAccessMode>
|
|
|
|
<!--
|
|
~ Size of compacted row above which to log a warning. (If compacted
|
|
~ rows do not fit in memory, Cassandra will crash. This is explained
|
|
~ in http://wiki.apache.org/cassandra/CassandraLimitations and is
|
|
~ scheduled to be fixed in 0.7.)
|
|
-->
|
|
<RowWarningThresholdInMB>512</RowWarningThresholdInMB>
|
|
|
|
<!--
|
|
~ Buffer size to use when performing contiguous column slices. Increase
|
|
~ this to the size of the column slices you typically perform.
|
|
~ (Name-based queries are performed with a buffer size of
|
|
~ ColumnIndexSizeInKB.)
|
|
-->
|
|
<SlicedBufferSizeInKB>64</SlicedBufferSizeInKB>
|
|
|
|
<!--
|
|
~ Buffer size to use when flushing memtables to disk. (Only one
|
|
~ memtable is ever flushed at a time.) Increase (decrease) the index
|
|
~ buffer size relative to the data buffer if you have few (many)
|
|
~ columns per key. Bigger is only better _if_ your memtables get large
|
|
~ enough to use the space. (Check in your data directory after your
|
|
~ app has been running long enough.) -->
|
|
<FlushDataBufferSizeInMB>32</FlushDataBufferSizeInMB>
|
|
<FlushIndexBufferSizeInMB>8</FlushIndexBufferSizeInMB>
|
|
|
|
<!--
|
|
~ Add column indexes to a row after its contents reach this size.
|
|
~ Increase if your column values are large, or if you have a very large
|
|
~ number of columns. The competing causes are, Cassandra has to
|
|
~ deserialize this much of the row to read a single column, so you want
|
|
~ it to be small - at least if you do many partial-row reads - but all
|
|
~ the index data is read for each access, so you don't want to generate
|
|
~ that wastefully either.
|
|
-->
|
|
<ColumnIndexSizeInKB>64</ColumnIndexSizeInKB>
|
|
|
|
<!--
|
|
~ Flush memtable after this much data has been inserted, including
|
|
~ overwritten data. There is one memtable per column family, and
|
|
~ this threshold is based solely on the amount of data stored, not
|
|
~ actual heap memory usage (there is some overhead in indexing the
|
|
~ columns).
|
|
-->
|
|
<MemtableThroughputInMB>64</MemtableThroughputInMB>
|
|
<!--
|
|
~ Throughput setting for Binary Memtables. Typically these are
|
|
~ used for bulk load so you want them to be larger.
|
|
-->
|
|
<BinaryMemtableThroughputInMB>256</BinaryMemtableThroughputInMB>
|
|
<!--
|
|
~ The maximum number of columns in millions to store in memory per
|
|
~ ColumnFamily before flushing to disk. This is also a per-memtable
|
|
~ setting. Use with MemtableThroughputInMB to tune memory usage.
|
|
-->
|
|
<MemtableOperationsInMillions>0.3</MemtableOperationsInMillions>
|
|
<!--
|
|
~ The maximum time to leave a dirty memtable unflushed.
|
|
~ (While any affected columnfamilies have unflushed data from a
|
|
~ commit log segment, that segment cannot be deleted.)
|
|
~ This needs to be large enough that it won't cause a flush storm
|
|
~ of all your memtables flushing at once because none has hit
|
|
~ the size or count thresholds yet. For production, a larger
|
|
~ value such as 1440 is recommended.
|
|
-->
|
|
<MemtableFlushAfterMinutes>60</MemtableFlushAfterMinutes>
|
|
|
|
<!--
|
|
~ Unlike most systems, in Cassandra writes are faster than reads, so
|
|
~ you can afford more of those in parallel. A good rule of thumb is 2
|
|
~ concurrent reads per processor core. Increase ConcurrentWrites to
|
|
~ the number of clients writing at once if you enable CommitLogSync +
|
|
~ CommitLogSyncDelay. -->
|
|
<ConcurrentReads>8</ConcurrentReads>
|
|
<ConcurrentWrites>32</ConcurrentWrites>
|
|
|
|
<!--
|
|
~ CommitLogSync may be either "periodic" or "batch." When in batch
|
|
~ mode, Cassandra won't ack writes until the commit log has been
|
|
~ fsynced to disk. It will wait up to CommitLogSyncBatchWindowInMS
|
|
~ milliseconds for other writes, before performing the sync.
|
|
|
|
~ This is less necessary in Cassandra than in traditional databases
|
|
~ since replication reduces the odds of losing data from a failure
|
|
~ after writing the log entry but before it actually reaches the disk.
|
|
~ So the other option is "periodic," where writes may be acked immediately
|
|
~ and the CommitLog is simply synced every CommitLogSyncPeriodInMS
|
|
~ milliseconds.
|
|
-->
|
|
<CommitLogSync>periodic</CommitLogSync>
|
|
<!--
|
|
~ Interval at which to perform syncs of the CommitLog in periodic mode.
|
|
~ Usually the default of 10000ms is fine; increase it if your i/o
|
|
~ load is such that syncs are taking excessively long times.
|
|
-->
|
|
<CommitLogSyncPeriodInMS>10000</CommitLogSyncPeriodInMS>
|
|
<!--
|
|
~ Delay (in milliseconds) during which additional commit log entries
|
|
~ may be written before fsync in batch mode. This will increase
|
|
~ latency slightly, but can vastly improve throughput where there are
|
|
~ many writers. Set to zero to disable (each entry will be synced
|
|
~ individually). Reasonable values range from a minimal 0.1 to 10 or
|
|
~ even more if throughput matters more than latency.
|
|
-->
|
|
<!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> -->
|
|
|
|
<!--
|
|
~ Time to wait before garbage-collection deletion markers. Set this to
|
|
~ a large enough value that you are confident that the deletion marker
|
|
~ will be propagated to all replicas by the time this many seconds has
|
|
~ elapsed, even in the face of hardware failures. The default value is
|
|
~ ten days.
|
|
-->
|
|
<GCGraceSeconds>864000</GCGraceSeconds>
|
|
</Storage>
|