1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
// Copyright 2017, CZ.NIC z.s.p.o. (http://www.nic.cz/)
//
// This file is part of the pakon system.
//
// Pakon is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
//  (at your option) any later version.
//
// Pakon is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Pakon.  If not, see <http://www.gnu.org/licenses/>.

//! Data structures related to representing the flows.

use std::borrow::Cow;
use std::collections::BTreeSet;
use std::time::{Duration, SystemTime, UNIX_EPOCH};

use serde::{Deserialize, Deserializer};

/// Amount of data.
///
/// This counts amount of data in bytes. It is a newtype to distinguish it from just plain int.
#[derive(Add, AddAssign, Clone, Copy, Debug, Default, Deserialize, Eq, From, Hash, Into, Ord,
         PartialEq, PartialOrd, Serialize, Sub, SubAssign)]
pub struct Bytes(pub u64);

/// Number of something (usually packets).
///
/// This counts discrete items. It is a newtype to distinguish it from just plain int.
#[derive(Add, AddAssign, Clone, Copy, Debug, Default, Deserialize, Eq, From, Hash, Into, Ord,
         PartialEq, PartialOrd, Serialize, Sub, SubAssign)]
pub struct Count(pub u64);

/// A transfer speed.
///
/// This specifies the speed of transfer in bytes per second, rounded to whole bytes per second.
#[derive(Add, AddAssign, Clone, Copy, Debug, Default, Deserialize, Eq, From, Hash, Into, Ord,
         PartialEq, PartialOrd, Serialize)]
pub struct Speed(pub u64);

impl Speed {
    /// Computes the speed from the amount of data transfered and time it took.
    ///
    /// This computes the speed simply as `bytes` / `time`. However, it shields the caller from
    /// bunch of technical details, like type conversions, the right units, rounding...
    ///
    /// # Note
    ///
    /// The `time` is taken as one second long if it is shorter. This is to prevent various strange
    /// things from happening, like:
    ///
    /// * Division by zero.
    /// * Producing strange spikes for single-packet or very short flows (eg. it is possible to
    ///   send several packets into a buffer in a very short time without the network having that
    ///   actual speed).
    ///
    /// # Parameters
    ///
    /// * `bytes`: The amount of bytes transferred.
    /// * `time`: How long it took.
    pub fn compute(bytes: Bytes, time: Duration) -> Self {
        // Compute it in floats and convert, to have the correct rounding
        let b = bytes.0 as f64;
        let mut t = time.as_secs() as f64 + (f64::from(time.subsec_nanos()) / 1_000_000_000.0);
        // Avoid division by zero, single-packet flows blowing the speed to infinity and other
        // similar problems. Pretend that nothing is shorter than one second.
        if t < 1.0 {
            t = 1.0;
        }
        let s = b / t;
        // This must be in range ‒ the bytes is upper bound on the result, as we make sure flows
        // aren't shorter than 1s
        Speed(s.round() as u64)
    }
}

/// The direction the flow was initiated.
///
/// Note that packets usually flow in both directions once the flow is started.
#[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
pub enum Direction {
    /// Inbound flow.
    In,
    /// Outbound flow.
    Out,
}

///
/// A DNS name.
///
/// It is kept in lower-case, for two reasons. One, some resolvels do case randomization
/// (`eXAmplE.ORg`), which is probably some kind of security thing and fine by the RFCs, but ugly
/// to look at. Another, DNS names are case insensitive and this helps that.
///
/// # Note
///
/// DNS names *can* contain non-valid utf8. But there's no way these get into this application,
/// so we simply use a string, not something like `OsString`. Also, there are things like dots in
/// the middle of label, which is also legal, etc. We simply don't care about these right now (but
/// having the type allows us to care in the future).
#[derive(Clone, Debug, Eq, Hash, PartialEq, Ord, PartialOrd, Serialize)]
pub struct Name(pub String);

impl Name {
    /// Create a new DNS name.
    pub fn new<S: AsRef<str>>(s: S) -> Self {
        Name(s.as_ref().to_lowercase())
    }
}

/// An alias for set of domain names.
pub type Names = BTreeSet<Name>;

impl<'de> Deserialize<'de> for Name {
    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
        let orig: Cow<str> = Deserialize::deserialize(deserializer)?;
        Ok(Self::new(orig))
    }
}

/// The protocol on top of IP
#[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
pub enum IpProto {
    /// The TCP protocol.
    Tcp,
    /// The UDP procotol.
    Udp,
    /// Some other protocol.
    ///
    /// The raw value of that protocol can be seen in the `IpProtoRaw` column. Also note that the
    /// enum might be expanded in the future, so some values that are unknown now can become known.
    #[serde(rename = "?")]
    Other,
}

/// Creates a system time from number of milliseconds since the epoch.
///
/// This is how the guts daemon reports data.
pub fn system_time_from_ms(ms: u64) -> SystemTime {
    UNIX_EPOCH + Duration::from_millis(ms)
}

/// A helper function to deserialize the `SystemTime` from number of milliseconts.
///
/// This can be used with serde's `deserialize_with` attribute to decode on-wire 64bit number of
/// seconds since the epoch as a system time.
pub fn system_time_deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<SystemTime, D::Error> {
    let ms: u64 = Deserialize::deserialize(d)?;
    Ok(system_time_from_ms(ms))
}

/// Converts a system time to number of milliseconds since the epoch.
///
/// This is used mostly to export (serialize) the data when sending it out.
pub fn system_time_to_ms(t: SystemTime) -> u64 {
    let dur = t.duration_since(UNIX_EPOCH).unwrap();
    dur.as_secs() * 1_000 + u64::from(dur.subsec_nanos()) / 1_000_000
}

/// The raw protocol number below the IP header.
#[derive(Clone, Deserialize, Debug, Eq, Ord, PartialEq, PartialOrd, Serialize)]
pub struct IpProtoRaw(pub u8);

/// The port number (already in machine endian).
#[derive(Clone, Deserialize, Debug, Eq, Ord, PartialEq, PartialOrd, Serialize)]
pub struct Port(pub u16);

/// The name assigned to a MAC address.
///
/// Some names can be configured for local computers. As the computers would be distinguished by
/// their MAC addresses, this is where the name goes.
#[derive(Clone, Deserialize, Debug, Eq, Ord, PartialEq, PartialOrd, Serialize)]
pub struct MacName(pub String);

#[cfg(test)]
mod tests {
    use std::time::UNIX_EPOCH;

    use super::*;

    /// Tests conversion of the system time to and from milliseconds.
    #[test]
    fn system_time_ms() {
        for ms in 1..1_000_000 {
            let st = system_time_from_ms(ms);
            let ms2 = system_time_to_ms(st);
            assert_eq!(ms, ms2);
            assert_eq!(Duration::from_millis(ms), st.duration_since(UNIX_EPOCH).unwrap());
        }
    }

    /// Tests the computation of a transfer speed, including some corner cases.
    #[test]
    fn speed_compute() {
        fn test(bytes: u64, seconds: f64, expected: u64) {
            let dur = Duration::from_millis((seconds * 1000.0) as u64);
            let result = Speed::compute(Bytes(bytes), dur);
            assert_eq!(Speed(expected), result);
        }
        test(100, 1.0, 100);
        test(0, 100.0, 0);
        test(100, 2.5, 40);
        test(3, 2.0, 2);
        test(3, 2.1, 1);
        test(100, 0.5, 100);
        test(100, 0.0, 100);
    }
}