upgrade cassandra to 0.5.0

2010-02-17 15:25:35 +01:00 · 2010-02-17 15:25:35 +01:00 · a73b66a9c7
commit a73b66a9c7
parent 963d76beae
3 changed files with 189 additions and 121 deletions
--- a/akka-persistence/akka-persistence-cassandra/pom.xml
+++ b/akka-persistence/akka-persistence-cassandra/pom.xml
@ -24,7 +24,7 @@
    <dependency>
      <groupId>org.apache.cassandra</groupId>
      <artifactId>cassandra</artifactId>
-      <version>0.4.1</version>
+      <version>0.5.0</version>
    </dependency>
     <dependency>
      <groupId>log4j</groupId>
--- a/config/storage-conf.xml
+++ b/config/storage-conf.xml
@ -15,27 +15,43 @@
 ~ KIND, either express or implied.  See the License for the
 ~ specific language governing permissions and limitations
 ~ under the License.
- -->
+-->
 <Storage>
  <!--======================================================================-->
  <!-- Basic Configuration                                                  -->
  <!--======================================================================-->

-  <!-- The name of this cluster. This is mainly used to prevent machines in
-one logical cluster from joining any other cluster. -->
+  <!-- 
+   ~ The name of this cluster.  This is mainly used to prevent machines in
+   ~ one logical cluster from joining another.
+  -->
  <ClusterName>akka</ClusterName>

-  <!-- Keyspaces and ColumnFamilies
-       A ColumnFamily is the Cassandra concept closest to a relational table.
-       Keyspaces are separate groups of ColumnFamilies.  Except in very
-       unusual circumstances you will have one Keyspace per application.
+  <!--
+   ~ Turn on to make new [non-seed] nodes automatically migrate the right data 
+   ~ to themselves.  (If no InitialToken is specified, they will pick one 
+   ~ such that they will get half the range of the most-loaded node.)
+   ~ If a node starts up without bootstrapping, it will mark itself bootstrapped
+   ~ so that you can't subsequently accidently bootstrap a node with
+   ~ data on it.  (You can reset this by wiping your data and commitlog
+   ~ directories.)
+   ~
+   ~ Off by default so that new clusters and upgraders from 0.4 don't
+   ~ bootstrap immediately.  You should turn this on when you start adding
+   ~ new nodes to a cluster that already has data on it.  (If you are upgrading
+   ~ from 0.4, start your cluster with it off once before changing it to true.
+   ~ Otherwise, no data will be lost but you will incur a lot of unnecessary
+   ~ I/O before your cluster starts up.)
+  -->
+  <AutoBootstrap>false</AutoBootstrap>

-       There is an implicit keyspace named 'system' for Cassandra internals.
+  <!--
+   ~ Keyspaces and ColumnFamilies:
+   ~ A ColumnFamily is the Cassandra concept closest to a relational
+   ~ table.  Keyspaces are separate groups of ColumnFamilies.  Except in
+   ~ very unusual circumstances you will have one Keyspace per application.

-       The default ColumnSort is Time for standard column families.
-       For super column families, specifying ColumnSort is not supported;
-       the supercolumns themselves are always name-sorted and their subcolumns
-       are always time-sorted.
+   ~ There is an implicit keyspace named 'system' for Cassandra internals.
  -->
  <Keyspaces>
    <Keyspace Name="akka">
@ -87,183 +103,235 @@ one logical cluster from joining any other cluster. -->
    </Keyspace>
  </Keyspaces>

-  <!-- Partitioner: any IPartitioner may be used, including your own
-       as long as it is on the classpath.  Out of the box,
-       Cassandra provides
-       org.apache.cassandra.dht.RandomPartitioner,
-       org.apache.cassandra.dht.OrderPreservingPartitioner, and
-       org.apache.cassandra.dht.CollatingOrderPreservingPartitioner.
-       (CollatingOPP colates according to EN,US rules, not naive byte ordering.
-       Use this as an example if you need locale-aware collation.)
-       Range queries require using OrderPreservingPartitioner or a subclass.
-
-       Achtung!  Changing this parameter requires wiping your data directories,
-       since the partitioner can modify the sstable on-disk format.
+  <!--
+   ~ Partitioner: any IPartitioner may be used, including your own as long
+   ~ as it is on the classpath.  Out of the box, Cassandra provides
+   ~ org.apache.cassandra.dht.RandomPartitioner,
+   ~ org.apache.cassandra.dht.OrderPreservingPartitioner, and
+   ~ org.apache.cassandra.dht.CollatingOrderPreservingPartitioner.
+   ~ (CollatingOPP colates according to EN,US rules, not naive byte
+   ~ ordering.  Use this as an example if you need locale-aware collation.)
+   ~ Range queries require using an order-preserving partitioner.
+   ~
+   ~ Achtung!  Changing this parameter requires wiping your data
+   ~ directories, since the partitioner can modify the sstable on-disk
+   ~ format.
  -->
  <Partitioner>org.apache.cassandra.dht.RandomPartitioner</Partitioner>

-  <!-- If you are using the OrderPreservingPartitioner and you know your key
-distribution, you can specify the token for this node to use.
-(Keys are sent to the node with the "closest" token, so distributing
-your tokens equally along the key distribution space will spread
-keys evenly across your cluster.)  This setting is only checked the
-first time a node is started.
+  <!--
+   ~ If you are using an order-preserving partitioner and you know your key
+   ~ distribution, you can specify the token for this node to use. (Keys
+   ~ are sent to the node with the "closest" token, so distributing your
+   ~ tokens equally along the key distribution space will spread keys
+   ~ evenly across your cluster.)  This setting is only checked the first
+   ~ time a node is started. 

-This can also be useful with RandomPartitioner to force equal
-spacing of tokens around the hash space, especially for
-clusters with a small number of nodes. -->
+   ~ This can also be useful with RandomPartitioner to force equal spacing
+   ~ of tokens around the hash space, especially for clusters with a small
+   ~ number of nodes.
+  -->
  <InitialToken></InitialToken>

-
-  <!-- EndPointSnitch: Setting this to the class that implements IEndPointSnitch
-       which will see if two endpoints are in the same data center or on the same rack.
-       Out of the box, Cassandra provides
-       org.apache.cassandra.locator.EndPointSnitch
+  <!--
+   ~ EndPointSnitch: Setting this to the class that implements
+   ~ IEndPointSnitch which will see if two endpoints are in the same data
+   ~ center or on the same rack. Out of the box, Cassandra provides
+   ~ org.apache.cassandra.locator.EndPointSnitch
  -->
  <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>

-  <!-- Strategy: Setting this to the class that implements IReplicaPlacementStrategy
-       will change the way the node picker works.
-       Out of the box, Cassandra provides
-       org.apache.cassandra.locator.RackUnawareStrategy
-       org.apache.cassandra.locator.RackAwareStrategy
-       (place one replica in a different datacenter, and the
-       others on different racks in the same one.)
+  <!--
+   ~ Strategy: Setting this to the class that implements
+   ~ IReplicaPlacementStrategy will change the way the node picker works.
+   ~ Out of the box, Cassandra provides
+   ~ org.apache.cassandra.locator.RackUnawareStrategy and
+   ~ org.apache.cassandra.locator.RackAwareStrategy (place one replica in
+   ~ a different datacenter, and the others on different racks in the same
+   ~ one.)
  -->
  <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>

-  <!-- Number of replicas of the data-->
+  <!-- Number of replicas of the data -->
  <ReplicationFactor>1</ReplicationFactor>

-  <!-- Directories: Specify where Cassandra should store different data on disk
-       Keep the data disks and the CommitLog disks separate for best performance
+  <!--
+   ~ Directories: Specify where Cassandra should store different data on
+   ~ disk.  Keep the data disks and the CommitLog disks separate for best
+   ~ performance
  -->
  <CommitLogDirectory>cassandra/commitlog</CommitLogDirectory>
  <DataFileDirectories>
-    <DataFileDirectory>cassandra/data</DataFileDirectory>
+      <DataFileDirectory>cassandra/data</DataFileDirectory>
  </DataFileDirectories>
  <CalloutLocation>cassandra/callouts</CalloutLocation>
-  <BootstrapFileDirectory>cassandra/bootstrap</BootstrapFileDirectory>
  <StagingFileDirectory>cassandra/staging</StagingFileDirectory>


-  <!-- Addresses of hosts that are deemed contact points. Cassandra nodes use
-       this list of hosts to find each other and learn the topology of the ring.
-       You must change this if you are running multiple nodes!
+  <!--
+   ~ Addresses of hosts that are deemed contact points. Cassandra nodes
+   ~ use this list of hosts to find each other and learn the topology of
+   ~ the ring. You must change this if you are running multiple nodes!
  -->
  <Seeds>
-    <Seed>127.0.0.1</Seed>
+      <Seed>127.0.0.1</Seed>
  </Seeds>


  <!-- Miscellaneous -->

-  <!-- time to wait for a reply from other nodes before failing the command -->
+  <!-- Time to wait for a reply from other nodes before failing the command -->
  <RpcTimeoutInMillis>5000</RpcTimeoutInMillis>
-  <!-- size to allow commitlog to grow to before creating a new segment -->
+  <!-- Size to allow commitlog to grow to before creating a new segment -->
  <CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>


  <!-- Local hosts and ports -->

-  <!-- Address to bind to and tell other nodes to connect to.
-You _must_ change this if you want multiple nodes to be able
-to communicate!
-
-Leaving it blank leaves it up to InetAddress.getLocalHost().
-This will always do the Right Thing *if* the node is properly
-configured (hostname, name resolution, etc), and the Right
-Thing is to use the address associated with the hostname (it
-might not be). -->
+  <!-- 
+   ~ Address to bind to and tell other nodes to connect to.  You _must_
+   ~ change this if you want multiple nodes to be able to communicate!  
+   ~
+   ~ Leaving it blank leaves it up to InetAddress.getLocalHost(). This
+   ~ will always do the Right Thing *if* the node is properly configured
+   ~ (hostname, name resolution, etc), and the Right Thing is to use the
+   ~ address associated with the hostname (it might not be).
+  -->
  <ListenAddress>localhost</ListenAddress>
  <!-- TCP port, for commands and data -->
  <StoragePort>7000</StoragePort>
  <!-- UDP port, for membership communications (gossip) -->
  <ControlPort>7001</ControlPort>

-  <!-- The address to bind the Thrift RPC service to. Unlike
-       ListenAddress above, you *can* specify 0.0.0.0 here if you want
-       Thrift to listen on all interfaces.
-
-       Leaving this blank has the same effect it does for ListenAddress,
-       (i.e. it will be based on the configured hostname of the node).
+  <!--
+   ~ The address to bind the Thrift RPC service to. Unlike ListenAddress
+   ~ above, you *can* specify 0.0.0.0 here if you want Thrift to listen on
+   ~ all interfaces.
+   ~
+   ~ Leaving this blank has the same effect it does for ListenAddress,
+   ~ (i.e. it will be based on the configured hostname of the node).
  -->
  <ThriftAddress>localhost</ThriftAddress>
  <!-- Thrift RPC port (the port clients connect to). -->
  <ThriftPort>9160</ThriftPort>
+  <!-- 
+   ~ Whether or not to use a framed transport for Thrift. If this option
+   ~ is set to true then you must also use a framed transport on the 
+   ~ client-side, (framed and non-framed transports are not compatible).
+  -->
+  <ThriftFramedTransport>false</ThriftFramedTransport>


  <!--======================================================================-->
  <!-- Memory, Disk, and Performance                                        -->
  <!--======================================================================-->

-  <!-- Buffer size to use when flushing memtables to disk.
-     (Only one memtable is ever flushed at a time.)
-     Increase (decrease) the index buffer size relative to the data buffer
-     if you have few (many) columns per key.
-     Bigger is only better _if_ your memtables get large enough to use the space.
-     (Check in your data directory after your app has been running long enough.)
+  <!--
+   ~ Buffer size to use when performing contiguous column slices. Increase
+   ~ this to the size of the column slices you typically perform. 
+   ~ (Name-based queries are performed with a buffer size of 
+   ~ ColumnIndexSizeInKB.)
  -->
+  <SlicedBufferSizeInKB>64</SlicedBufferSizeInKB>
+
+  <!--
+   ~ Buffer size to use when flushing memtables to disk. (Only one 
+   ~ memtable is ever flushed at a time.) Increase (decrease) the index
+   ~ buffer size relative to the data buffer if you have few (many) 
+   ~ columns per key.  Bigger is only better _if_ your memtables get large
+   ~ enough to use the space. (Check in your data directory after your
+   ~ app has been running long enough.) -->
  <FlushDataBufferSizeInMB>32</FlushDataBufferSizeInMB>
  <FlushIndexBufferSizeInMB>8</FlushIndexBufferSizeInMB>

-  <!-- Add column indexes to a row after its contents reach this size.
-Increase if your column values are large, or if you have a very large
-number of columns.  The competing causes are, Cassandra has to
-deserialize this much of the row to read a single column, so you
-want it to be small - at least if you do many partial-row reads
- but all the index data is read for each access, so
-you don't want to generate that wastefully either. -->
+  <!--
+   ~ Add column indexes to a row after its contents reach this size.
+   ~ Increase if your column values are large, or if you have a very large
+   ~ number of columns.  The competing causes are, Cassandra has to
+   ~ deserialize this much of the row to read a single column, so you want
+   ~ it to be small - at least if you do many partial-row reads - but all
+   ~ the index data is read for each access, so you don't want to generate
+   ~ that wastefully either.
+  -->
  <ColumnIndexSizeInKB>64</ColumnIndexSizeInKB>

  <!--
-    The maximum amount of data to store in memory per ColumnFamily before flushing to
-    disk. Note: There is one memtable per column family, and this threshold
-    is based solely on the amount of data stored, not actual heap memory
-    usage (there is some overhead in indexing the columns).
+   ~ The maximum amount of data to store in memory per ColumnFamily before
+   ~ flushing to disk.  Note: There is one memtable per column family, and 
+   ~ this threshold is based solely on the amount of data stored, not
+   ~ actual heap memory usage (there is some overhead in indexing the
+   ~ columns).
  -->
  <MemtableSizeInMB>64</MemtableSizeInMB>
  <!--
-    The maximum number of columns in millions to store in memory per ColumnFamily
-    before flushing to disk.  This is also a per-memtable setting.
-    Use with MemtableSizeInMB to tune memory usage.
+   ~ The maximum number of columns in millions to store in memory per
+   ~ ColumnFamily before flushing to disk.  This is also a per-memtable
+   ~ setting.  Use with MemtableSizeInMB to tune memory usage.
  -->
  <MemtableObjectCountInMillions>0.1</MemtableObjectCountInMillions>
+  <!--
+   ~ The maximum time to leave a dirty memtable unflushed.
+   ~ (While any affected columnfamilies have unflushed data from a
+   ~ commit log segment, that segment cannot be deleted.)
+   ~ This needs to be large enough that it won't cause a flush storm
+   ~ of all your memtables flushing at once because none has hit
+   ~ the size or count thresholds yet.  For production, a larger
+   ~ value such as 1440 is recommended.
+  -->
+  <MemtableFlushAfterMinutes>60</MemtableFlushAfterMinutes>

-  <!-- Unlike most systems, in Cassandra writes are faster than
-reads, so you can afford more of those in parallel.
-A good rule of thumb is 2 concurrent reads per processor core.
-Increase ConcurrentWrites to the number of clients writing
-at once if you enable CommitLogSync + CommitLogSyncDelay. -->
+  <!--
+   ~ Unlike most systems, in Cassandra writes are faster than reads, so
+   ~ you can afford more of those in parallel.  A good rule of thumb is 2
+   ~ concurrent reads per processor core.  Increase ConcurrentWrites to
+   ~ the number of clients writing at once if you enable CommitLogSync +
+   ~ CommitLogSyncDelay. -->
  <ConcurrentReads>8</ConcurrentReads>
  <ConcurrentWrites>32</ConcurrentWrites>

-  <!-- Turn on CommitLogSync to improve durability.
-       When enabled, Cassandra won't ack writes until the commit log
-       has been synced to disk.  This is less necessary in Cassandra
-       than in traditional databases since replication reduces the
-       odds of losing data from a failure after writing the log
-       entry but before it actually reaches the disk.
-  -->
-  <CommitLogSync>false</CommitLogSync>
-  <!-- Delay (in microseconds) during which additional commit log
-       entries may be written before fsync.  This will increase
-       latency slightly, but can vastly improve throughput where
-       there are many writers.  Set to zero to disable
-       (each entry will be synced individually).
-   Reasonable values range from a minimal 100 to even 10000
-       if throughput matters more than latency.  (10000us = 10ms
-       write latency isn't even that bad by traditional db
-       standards.)
-  -->
-  <CommitLogSyncDelay>1000</CommitLogSyncDelay>
+  <!--
+   ~ CommitLogSync may be either "periodic" or "batch."  When in batch
+   ~ mode, Cassandra won't ack writes until the commit log has been
+   ~ fsynced to disk.  It will wait up to CommitLogSyncBatchWindowInMS
+   ~ milliseconds for other writes, before performing the sync.

+   ~ This is less necessary in Cassandra than in traditional databases
+   ~ since replication reduces the odds of losing data from a failure
+   ~ after writing the log entry but before it actually reaches the disk.
+   ~ So the other option is "timed," where writes may be acked immediately
+   ~ and the CommitLog is simply synced every CommitLogSyncPeriodInMS
+   ~ milliseconds.
+  -->
+  <CommitLogSync>periodic</CommitLogSync>
+  <!--
+   ~ Interval at which to perform syncs of the CommitLog in periodic mode.
+   ~ Usually the default of 10000ms is fine; increase it if your i/o
+   ~ load is such that syncs are taking excessively long times.
+  -->
+  <CommitLogSyncPeriodInMS>10000</CommitLogSyncPeriodInMS>
+  <!--
+   ~ Delay (in milliseconds) during which additional commit log entries
+   ~ may be written before fsync in batch mode.  This will increase
+   ~ latency slightly, but can vastly improve throughput where there are
+   ~ many writers.  Set to zero to disable (each entry will be synced
+   ~ individually).  Reasonable values range from a minimal 0.1 to 10 or
+   ~ even more if throughput matters more than latency.
+  -->
+  <!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> --> 

-  <!-- Time to wait before garbage-collection deletion markers.
-       Set this to a large enough value that you are confident
-       that the deletion marker will be propagated to all replicas
-       by the time this many seconds has elapsed, even in the
-       face of hardware failures.  The default value is ten days.
+  <!--
+   ~ Time to wait before garbage-collection deletion markers.  Set this to
+   ~ a large enough value that you are confident that the deletion marker
+   ~ will be propagated to all replicas by the time this many seconds has
+   ~ elapsed, even in the face of hardware failures.  The default value is
+   ~ ten days.
  -->
  <GCGraceSeconds>864000</GCGraceSeconds>
+
+  <!--
+   ~ The threshold size in megabytes the binary memtable must grow to,
+   ~ before it's submitted for flushing to disk.
+  -->
+  <BinaryMemtableSizeInMB>256</BinaryMemtableSizeInMB>
+
 </Storage>
--- a/embedded-repo/org/apache/cassandra/cassandra/0.5.0/cassandra-0.5.0.jar
+++ b/embedded-repo/org/apache/cassandra/cassandra/0.5.0/cassandra-0.5.0.jar