Snappy
(也称Zippy
)是Google基于LZ77
的思路用C++语言编写的快速数据压缩与解压程序库,在2011年开源
介绍
Snappy
的特点是非常高的速度和合理的压缩率- 使用一个运行在64位酷睿i7处理器的单个核心,压缩速度
250 MB/s
,解压速度500 MB/s
- 压缩率比gzip低20-100%
- Stream format
- 应用
- Google 内部的 BigTable、MapReduce 等
- 开源数据库,如 Cassandra、Couchbase、Hadoop、LevelDB、MongoDB、RocksDB、Lucene、Spark 和 InfluxDB
- 源码
- lib 库实现
安装
snappy lib
apt update
apt install cmake gcc build-essential -y
# wget https://github.com/google/snappy/archive/refs/tags/1.1.10.tar.gz
# tar -zxvf snappy-1.1.10.tar.gz
# cd snappy-1.1.10
git clone https://github.com/google/snappy.git -b 1.1.10
cd snappy
git submodule update --init
mkdir build
cd build
cmake ../
make
make install
# cmake -S $(pwd) -B build -DCMAKE_BUILD_TYPE=RelWithDebInfo
# cmake --build build --target install --config "RelWithDebInfo"
# 安装在如下目录
-- Up-to-date: /usr/local/lib/libsnappy.a
-- Up-to-date: /usr/local/include/snappy-c.h
-- Up-to-date: /usr/local/include/snappy-sinksource.h
-- Up-to-date: /usr/local/include/snappy.h
-- Up-to-date: /usr/local/include/snappy-stubs-public.h
说明:
libsnappy.a
是 snappy 的静态库snappy-stubs-public.h
是 snappy 头文件
验证 snappy lib
可跳过该步骤
#include "snappy.h"
#include <string>
#include <iostream>
int main() {
std::string s = "abc";
std::string d;
std::cout<<"original:"<<s<<std::endl;
snappy::Compress(s.data(), s.size(), &d);
std::cout<<"result:"<<d<<std::endl;
std::cout<<"original-size:"<<s.size()<<" result-size:"<<d.size()<<std::endl;
return 0;
}
$ g++ -o example test.cc -lsnappy
$ ./example
original:abc
resultabc
original-size:3 result-size:5
snzip
- snzip 基于snappy的压缩/解压缩命令行工具
# 由于官方的包仅能编译出 libsnappy.a,安装 libsnappy-dev,参考 https://packages.ubuntu.com/search?keywords=libsnappy-dev
apt install libsnappy-dev
wget https://github.com/kubo/snzip/releases/download/v1.0.5/snzip-1.0.5.tar.gz
tar xvfz snzip-1.0.5.tar.gz
cd snzip-1.0.5
./configure --with-default-format=snzip
make
make install
- ps 基于 snappy 1.1.10 构建,需要为 Makefile 打补丁,参考 的 snappy_1.1.10-1.debian.tar.xz,如下:
$ cat patches/build_static_lib.patch
Description: add static library build
Add upstream missed static library target.
Author: Laszlo Boszormenyi (GCS) <gcs@debian.org>
Last-Update: 2021-12-04
---
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -249,6 +249,25 @@ if(BUILD_SHARED_LIBS)
set_target_properties(snappy PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON)
endif(BUILD_SHARED_LIBS)
+ADD_LIBRARY(snappy-static STATIC
+ snappy-c.cc
+ snappy-c.h
+ snappy-sinksource.cc
+ snappy-sinksource.h
+ snappy-stubs-internal.cc
+ snappy-stubs-public.h
+ snappy.cc
+ snappy.h)
+
+target_include_directories(snappy-static
+ PUBLIC
+ $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}>
+ $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>
+ $<INSTALL_INTERFACE:include>
+)
+
+SET_TARGET_PROPERTIES(snappy-static PROPERTIES OUTPUT_NAME snappy)
+
if(SNAPPY_BUILD_TESTS OR SNAPPY_BUILD_BENCHMARKS)
add_library(snappy_test_support "")
target_sources(snappy_test_support
@@ -359,7 +378,7 @@ endif(SNAPPY_FUZZING_BUILD)
include(GNUInstallDirs)
if(SNAPPY_INSTALL)
- install(TARGETS snappy
+ install(TARGETS snappy snappy-static
EXPORT SnappyTargets
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
help
$ snzip -h
snzip 1.0.5
Usage: snzip [option ...] [file ...]
general options:
-c output to standard output, keep original files unchanged
-d decompress
-k keep (don't delete) input files
-t name file format name. see below. The default format is snzip.
-h give this help
raw_format option:
-s size size of input data when compressing.
The default value is the file size if available.
tuning options:
-b num internal block size in bytes
-B num internal block size. 'num'-th power of two.
-R num size of read buffer in bytes
-W num size of write buffer in bytes
-T trace for debug
supported formats:
NAME SUFFIX URL
---- ------ ---
framing2 sz https://github.com/google/snappy/blob/master/framing_format.txt
hadoop-snappy snappy https://code.google.com/p/hadoop-snappy/
raw raw https://github.com/google/snappy/blob/master/format_description.txt
iwa iwa https://github.com/obriensp/iWorkFileFormat/blob/master/Docs/index.md#snappy-compression
framing sz https://github.com/google/snappy/blob/0755c815197dacc77d8971ae917c86d7aa96bf8e/framing_format.txt
snzip snz https://github.com/kubo/snzip
snappy-java snappy https://github.com/xerial/snappy-java
snappy-in-java snappy https://github.com/dain/snappy
comment-43 snappy http://code.google.com/p/snappy/issues/detail?id=34#c43
使用示例
# 压缩
$ snzip snzip-1.0.5.tar.gz
# 解压
$ snzip -d snzip-1.0.5.tar.gz.snz
lib
Python
pip install python-snappy
# python2
import snappy
compressed = open('/tmp/temp.snappy').read()
snappy.uncompress(compressed)
# python3
import snappy
compressed = open('/tmp/temp.snappy','rb').read()
print(snappy.uncompress(compressed).decode(encoding='utf-8',errors="ignore"))