Skip to content

Commit

Permalink
Add docs and examples
Browse files Browse the repository at this point in the history
  • Loading branch information
silentsokolov committed Jan 2, 2019
1 parent 11c5f3e commit e95ff61
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 20 deletions.
15 changes: 15 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
sudo: false
language: rust
cache: cargo

rust:
- stable
- beta
- nightly

matrix:
allow_failures:
- rust: nightly

script:
cargo test --verbose --all
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,8 @@ keywords = ["compression", "string", "smaz"]
categories = ["compression"]
license = "MIT"

[badges]
travis-ci = { repository = "silentsokolov/rust-smaz" }

[dependencies]
lazy_static = "1.*"
29 changes: 16 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# rust-smaz

[![Build Status](https://travis-ci.org/silentsokolov/rust-smaz.svg?branch=master)](https://travis-ci.org/silentsokolov/rust-smaz)
[![Crate](https://img.shields.io/crates/v/smaz.svg)](https://crates.io/crates/smaz)
[![Docs](https://docs.rs/rand/badge.svg)](https://docs.rs/smaz)

rust-smaz is a pure Rust implementation of smaz - algorithm for compressing very short strings. See original [C implementation smaz by antirez](http://github.com/antirez/smaz) for information on smaz and the algorithm itself.


Expand All @@ -12,22 +16,21 @@ Add this to your `Cargo.toml`:
smaz = "0.1.0"
```

## Quick start

## F.A.Q.

- Why HashMap?
```rust
extern crate smaz;

Benchmark match statement and HashMap:

```
$ cargo bench
use smaz::{compress,decompress};

Finished release [optimized] target(s) in 0.04s
Running target/release/deps/smaz-07673d33e2751e17
fn main() {
let s = "string";

running 2 tests
test tests::lookup_bench ... bench: 22 ns/iter (+/- 7)
test tests::map_bench ... bench: 80 ns/iter (+/- 13)
let compressed = compress(&s.as_bytes());
println!("compress bytes: {:?}", &compressed);

test result: ok. 0 passed; 0 failed; 0 ignored; 2 measured; 0 filtered out
let decompressed = decompress(&compressed).unwrap();
let origin = str::from_utf8(&decompressed).unwrap();
assert_eq!(s, origin);
}
```
103 changes: 96 additions & 7 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,58 @@
//! This crate implements the smaz algorithm for compressing very short strings.
//!
//! Smaz instead is not good for compressing general purpose data, but can compress
//! text by 40-50% in the average case (works better with English), and is able to
//! perform a bit of compression for HTML and urls as well. The important point is
//! that Smaz is able to compress even strings of two or three bytes!
//!
//! See original [library by antirez](http://github.com/antirez/smaz) for information on smaz and the algorithm itself.
//!
//!
//! # Quick Start
//!
//! ```
//! extern crate smaz;
//!
//! use smaz::{compress,decompress};
//!
//! fn main() {
//! let s = "my long string";
//!
//! let compressed = compress(&s.as_bytes());
//! println!("bytes: {:?}", &compressed);
//!
//! let decompressed = decompress(&compressed);
//! if let Ok(v) = decompressed {
//! println!("bytes: {:?}", &v);
//! }
//! }
//! ```
//!
//!
//! ## Compression examples
//!
//! - `This is a small string` compressed by 50%
//! - `foobar` compressed by 34%
//! - `the end` compressed by 58%
//! - `not-a-g00d-Exampl333` *enlarged* by 15%
//! - `Smaz is a simple compression library` compressed by 39%
//! - `Nothing is more difficult, and therefore more precious, than to be able to decide` compressed by 49%
//! - `this is an example of what works very well with smaz` compressed by 49%
//! - `1000 numbers 2000 will 10 20 30 compress very little` compressed by 10%
//! - `and now a few italian sentences:` compressed by 41%
//! - `Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura` compressed by 33%
//! - `Mi illumino di immenso` compressed by 37%
//! - `L'autore di questa libreria vive in Sicilia` compressed by 28%
//! - `try it against urls` compressed by 37%
//! - `http://google.com` compressed by 59%
//! - `http://programming.reddit.com` compressed by 52%
#![deny(
missing_copy_implementations,
missing_debug_implementations,
missing_docs
)]

#[macro_use]
extern crate lazy_static;

Expand All @@ -7,6 +62,7 @@ use std::fmt;
use std::result;
use std::str;

/// Compression codebook, used for compression
pub static CODEBOOK: [&str; 254] = [
" ", "the", "e", "t", "a", "of", "o", "and", "i", "n", "s", "e ", "r", " th", " t", "in", "he",
"th", "h", "he ", "to", "\r\n", "l", "s ", "d", " a", "an", "er", "c", " o", "d ", "on", " of",
Expand Down Expand Up @@ -38,7 +94,10 @@ lazy_static! {
};
}

#[derive(Debug, Clone)]
/// The error type for decompress operation.
///
/// Often this error occurs due to invalid data.
#[derive(Debug, Clone, Copy)]
pub struct DecompressError;

impl fmt::Display for DecompressError {
Expand All @@ -53,6 +112,7 @@ impl Error for DecompressError {
}
}

/// A specialized Result type for decompress operation.
pub type Result<T> = result::Result<T, DecompressError>;

fn flush_verbatim(verbatim: &[u8]) -> Vec<u8> {
Expand All @@ -69,8 +129,19 @@ fn flush_verbatim(verbatim: &[u8]) -> Vec<u8> {
chunk
}

/// Returns compressed data as a vector of bytes.
///
/// # Examples
///
/// ```
/// use smaz::compress;
///
/// let s = "string";
/// let compressed = compress(&s.as_bytes());
/// assert_eq!(vec![77, 114, 84], compressed);
/// ```
pub fn compress(input: &[u8]) -> Vec<u8> {
let mut out = Vec::with_capacity(input.len()/2);
let mut out: Vec<u8> = Vec::with_capacity(input.len() / 2);
let mut verbatim: Vec<u8> = Vec::new();
let mut input_index = 0;

Expand All @@ -81,7 +152,7 @@ pub fn compress(input: &[u8]) -> Vec<u8> {
max_len = input.len() - input_index
}

for i in (0..max_len + 1).rev() {
for i in (0..=max_len).rev() {
let code = CODEBOOK_MAP.get(&input[input_index..input_index + i]);
if let Some(v) = code {
if !verbatim.is_empty() {
Expand Down Expand Up @@ -112,8 +183,26 @@ pub fn compress(input: &[u8]) -> Vec<u8> {
out
}

/// Returns decompressed data as a vector of bytes.
///
/// # Errors
///
/// If the compressed data is invalid or encoded incorrectly, then an error
/// is returned [`DecompressError`](struct.DecompressError.html).
///
/// # Examples
///
/// ```
/// use std::str;
/// use smaz::decompress;
///
/// let v = vec![77, 114, 84];
/// let decompressed = decompress(&v).unwrap();
/// let origin = str::from_utf8(&decompressed).unwrap();
/// assert_eq!("string", origin);
/// ```
pub fn decompress(input: &[u8]) -> Result<Vec<u8>> {
let mut out: Vec<u8> = Vec::with_capacity(input.len()*3);
let mut out: Vec<u8> = Vec::with_capacity(input.len() * 3);
let mut i: usize = 0;

while i < input.len() {
Expand All @@ -127,7 +216,7 @@ pub fn decompress(input: &[u8]) -> Result<Vec<u8>> {
if i + input[i + 1] as usize + 2 >= input.len() {
return Err(DecompressError);
}
for j in 0..input[i + 1] + 1 {
for j in 0..=input[i + 1] {
out.push(input[i + 2 + j as usize])
}
i += 3 + input[i + 1] as usize
Expand Down Expand Up @@ -179,8 +268,8 @@ mod tests {

if s.len() > 0 {
let level = 100i8 - ((100 * compressed.len()) / s.as_bytes().len()) as i8;
let word = if level > 0 {"compressed"} else {"enlarged"};
println!("{} {} by {}%", s, word, level.abs());
let word = if level > 0 { "compressed" } else { "enlarged" };
println!("\"{}\" {} by {}%", s, word, level.abs());
}
}
}
Expand Down

0 comments on commit e95ff61

Please sign in to comment.