pekko/config/storage-conf.xml

<!--
 ~ Licensed to the Apache Software Foundation (ASF) under one
 ~ or more contributor license agreements.  See the NOTICE file
 ~ distributed with this work for additional information
 ~ regarding copyright ownership.  The ASF licenses this file
 ~ to you under the Apache License, Version 2.0 (the
 ~ "License"); you may not use this file except in compliance
 ~ with the License.  You may obtain a copy of the License at
 ~
 ~    http://www.apache.org/licenses/LICENSE-2.0
 ~
 ~ Unless required by applicable law or agreed to in writing,
 ~ software distributed under the License is distributed on an
 ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 ~ KIND, either express or implied.  See the License for the
 ~ specific language governing permissions and limitations
 ~ under the License.
 -->
<Storage>
  <!--======================================================================-->
  <!-- Basic Configuration                                                  -->
  <!--======================================================================-->

  <!-- The name of this cluster. This is mainly used to prevent machines in
one logical cluster from joining any other cluster. -->
  <ClusterName>akka</ClusterName>

  <!-- Keyspaces and ColumnFamilies
       A ColumnFamily is the Cassandra concept closest to a relational table.
       Keyspaces are separate groups of ColumnFamilies.  Except in very
       unusual circumstances you will have one Keyspace per application.

       There is an implicit keyspace named 'system' for Cassandra internals.

       The default ColumnSort is Time for standard column families.
       For super column families, specifying ColumnSort is not supported;
       the supercolumns themselves are always name-sorted and their subcolumns
       are always time-sorted.
  -->
  <Keyspaces>
    <Keyspace Name="akka">
      <!-- The fraction of keys per sstable whose locations we
           keep in memory in "mostly LRU" order.  (JUST the key
           locations, NOT any column values.)

           The amount of memory used by the default setting of
           0.01 is comparable to the amount used by the internal
           per-sstable key index. Consider increasing this is
           fine if you have fewer, wider rows.  Set to 0 to
           disable entirely.
      -->
      <KeysCachedFraction>0.01</KeysCachedFraction>
      <!--
           The CompareWith attribute tells Cassandra how to sort the columns
           for slicing operations.  For backwards compatibility, the default
           is to use AsciiType, which is probably NOT what you want.
           Other options are BytesType, UTF8Type, LexicalUUIDType, TimeUUIDType, and LongType.
           You can also specify the fully-qualified class name to a class
           of your choice implementing org.apache.cassandra.db.marshal.IType.

           SuperColumns have a similar CompareSubcolumnsWith attribute.

           ByteType: simple sort by byte value.  No validation is performed.
           AsciiType: like BytesType, but validates that the input can be parsed as US-ASCII.
           UTF8Type: A string encoded as UTF8
           LongType: A 64bit long
           LexicalUUIDType: a 128bit UUID, compared lexically (by byte value)
           TimeUUIDType: a 128bit version 1 UUID, compared by timestamp

           (To get the closest approximation to 0.3-style supercolumns,
           you would use CompareWith=UTF8Type CompareSubcolumnsWith=LongType.)

           if FlushPeriodInMinutes is configured and positive, it will be
           flushed to disk with that period whether it is dirty or not.
           This is intended for lightly-used columnfamilies so that they
           do not prevent commitlog segments from being purged.

      -->
      <ColumnFamily CompareWith="UTF8Type" Name="map"/>
      <!-- FIXME: change vector to a super column -->
      <ColumnFamily CompareWith="UTF8Type" Name="vector"/>
      <ColumnFamily CompareWith="UTF8Type" Name="ref"/>

      <!--ColumnFamily CompareWith="UTF8Type" Name="Standard1" FlushPeriodInMinutes="60"/>
      <ColumnFamily CompareWith="TimeUUIDType" Name="StandardByUUID1"/>
      <ColumnFamily ColumnType="Super" CompareWith="UTF8Type" CompareSubcolumnsWith="UTF8Type" Name="Super1"/-->
    </Keyspace>
  </Keyspaces>

  <!-- Partitioner: any IPartitioner may be used, including your own
       as long as it is on the classpath.  Out of the box,
       Cassandra provides
       org.apache.cassandra.dht.RandomPartitioner,
       org.apache.cassandra.dht.OrderPreservingPartitioner, and
       org.apache.cassandra.dht.CollatingOrderPreservingPartitioner.
       (CollatingOPP colates according to EN,US rules, not naive byte ordering.
       Use this as an example if you need locale-aware collation.)
       Range queries require using OrderPreservingPartitioner or a subclass.

       Achtung!  Changing this parameter requires wiping your data directories,
       since the partitioner can modify the sstable on-disk format.
  -->
  <Partitioner>org.apache.cassandra.dht.RandomPartitioner</Partitioner>

  <!-- If you are using the OrderPreservingPartitioner and you know your key
distribution, you can specify the token for this node to use.
(Keys are sent to the node with the "closest" token, so distributing
your tokens equally along the key distribution space will spread
keys evenly across your cluster.)  This setting is only checked the
first time a node is started.

This can also be useful with RandomPartitioner to force equal
spacing of tokens around the hash space, especially for
clusters with a small number of nodes. -->
  <InitialToken></InitialToken>


  <!-- EndPointSnitch: Setting this to the class that implements IEndPointSnitch
       which will see if two endpoints are in the same data center or on the same rack.
       Out of the box, Cassandra provides
       org.apache.cassandra.locator.EndPointSnitch
  -->
  <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>

  <!-- Strategy: Setting this to the class that implements IReplicaPlacementStrategy
       will change the way the node picker works.
       Out of the box, Cassandra provides
       org.apache.cassandra.locator.RackUnawareStrategy
       org.apache.cassandra.locator.RackAwareStrategy
       (place one replica in a different datacenter, and the
       others on different racks in the same one.)
  -->
  <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>

  <!-- Number of replicas of the data-->
  <ReplicationFactor>1</ReplicationFactor>

  <!-- Directories: Specify where Cassandra should store different data on disk
       Keep the data disks and the CommitLog disks separate for best performance
  -->
  <CommitLogDirectory>cassandra/commitlog</CommitLogDirectory>
  <DataFileDirectories>
    <DataFileDirectory>cassandra/data</DataFileDirectory>
  </DataFileDirectories>
  <CalloutLocation>cassandra/callouts</CalloutLocation>
  <BootstrapFileDirectory>cassandra/bootstrap</BootstrapFileDirectory>
  <StagingFileDirectory>cassandra/staging</StagingFileDirectory>


  <!-- Addresses of hosts that are deemed contact points. Cassandra nodes use
       this list of hosts to find each other and learn the topology of the ring.
       You must change this if you are running multiple nodes!
  -->
  <Seeds>
    <Seed>127.0.0.1</Seed>
  </Seeds>


  <!-- Miscellaneous -->

  <!-- time to wait for a reply from other nodes before failing the command -->
  <RpcTimeoutInMillis>5000</RpcTimeoutInMillis>
  <!-- size to allow commitlog to grow to before creating a new segment -->
  <CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>


  <!-- Local hosts and ports -->

  <!-- Address to bind to and tell other nodes to connect to.
You _must_ change this if you want multiple nodes to be able
to communicate!

Leaving it blank leaves it up to InetAddress.getLocalHost().
This will always do the Right Thing *if* the node is properly
configured (hostname, name resolution, etc), and the Right
Thing is to use the address associated with the hostname (it
might not be). -->
  <ListenAddress>localhost</ListenAddress>
  <!-- TCP port, for commands and data -->
  <StoragePort>7000</StoragePort>
  <!-- UDP port, for membership communications (gossip) -->
  <ControlPort>7001</ControlPort>

  <!-- The address to bind the Thrift RPC service to. Unlike
       ListenAddress above, you *can* specify 0.0.0.0 here if you want
       Thrift to listen on all interfaces.

       Leaving this blank has the same effect it does for ListenAddress,
       (i.e. it will be based on the configured hostname of the node).
  -->
  <ThriftAddress>localhost</ThriftAddress>
  <!-- Thrift RPC port (the port clients connect to). -->
  <ThriftPort>9160</ThriftPort>


  <!--======================================================================-->
  <!-- Memory, Disk, and Performance                                        -->
  <!--======================================================================-->

  <!-- Buffer size to use when flushing memtables to disk.
     (Only one memtable is ever flushed at a time.)
     Increase (decrease) the index buffer size relative to the data buffer
     if you have few (many) columns per key.
     Bigger is only better _if_ your memtables get large enough to use the space.
     (Check in your data directory after your app has been running long enough.)
  -->
  <FlushDataBufferSizeInMB>32</FlushDataBufferSizeInMB>
  <FlushIndexBufferSizeInMB>8</FlushIndexBufferSizeInMB>

  <!-- Add column indexes to a row after its contents reach this size.
Increase if your column values are large, or if you have a very large
number of columns.  The competing causes are, Cassandra has to
deserialize this much of the row to read a single column, so you
want it to be small - at least if you do many partial-row reads
- but all the index data is read for each access, so
you don't want to generate that wastefully either. -->
  <ColumnIndexSizeInKB>64</ColumnIndexSizeInKB>

  <!--
    The maximum amount of data to store in memory per ColumnFamily before flushing to
    disk. Note: There is one memtable per column family, and this threshold
    is based solely on the amount of data stored, not actual heap memory
    usage (there is some overhead in indexing the columns).
  -->
  <MemtableSizeInMB>64</MemtableSizeInMB>
  <!--
    The maximum number of columns in millions to store in memory per ColumnFamily
    before flushing to disk.  This is also a per-memtable setting.
    Use with MemtableSizeInMB to tune memory usage.
  -->
  <MemtableObjectCountInMillions>0.1</MemtableObjectCountInMillions>

  <!-- Unlike most systems, in Cassandra writes are faster than
reads, so you can afford more of those in parallel.
A good rule of thumb is 2 concurrent reads per processor core.
Increase ConcurrentWrites to the number of clients writing
at once if you enable CommitLogSync + CommitLogSyncDelay. -->
  <ConcurrentReads>8</ConcurrentReads>
  <ConcurrentWrites>32</ConcurrentWrites>

  <!-- Turn on CommitLogSync to improve durability.
       When enabled, Cassandra won't ack writes until the commit log
       has been synced to disk.  This is less necessary in Cassandra
       than in traditional databases since replication reduces the
       odds of losing data from a failure after writing the log
       entry but before it actually reaches the disk.
  -->
  <CommitLogSync>false</CommitLogSync>
  <!-- Delay (in microseconds) during which additional commit log
       entries may be written before fsync.  This will increase
       latency slightly, but can vastly improve throughput where
       there are many writers.  Set to zero to disable
       (each entry will be synced individually).
   Reasonable values range from a minimal 100 to even 10000
       if throughput matters more than latency.  (10000us = 10ms
       write latency isn't even that bad by traditional db
       standards.)
  -->
  <CommitLogSyncDelay>1000</CommitLogSyncDelay>


  <!-- Time to wait before garbage-collection deletion markers.
       Set this to a large enough value that you are confident
       that the deletion marker will be propagated to all replicas
       by the time this many seconds has elapsed, even in the
       face of hardware failures.  The default value is ten days.
  -->
  <GCGraceSeconds>864000</GCGraceSeconds>
</Storage>