1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
//! Implementation of the DBSCAN clustering algorithm.

use std::iter;
use geometry::Point2D;
use distance::DistancePoint2D;

pub fn dbscan(data: &Vec<Point2D<f64>>, eps: f64, minpts: usize) -> Vec<isize> {

    let mut s = ClusterDbscan::new(data, eps, minpts);
    s.compute()
}

struct ClusterDbscan<'a> {
    data: &'a Vec<Point2D<f64>>,
    eps: f64,
    minpts: usize,
    visited: Vec<bool>,
    cluster: Vec<isize>,
    c: isize
}

impl <'a> ClusterDbscan<'a> {
    pub fn new(data: &'a Vec<Point2D<f64>>, eps: f64, minpts: usize) -> ClusterDbscan {
        ClusterDbscan {
            data: data,
            eps: eps,
            minpts: minpts,
            visited: iter::repeat(false).take(data.len()).collect(),
            cluster: iter::repeat(-2).take(data.len()).collect(),
            c: -1
        }
    }

    fn visited(&self, pos: usize) -> bool {
        *self.visited.get(pos).unwrap()
    }

    fn visit(&mut self, pos: usize) {
        let visited = self.visited.get_mut(pos).unwrap();
        *visited = true;
    }

    fn neighbours(&self, pos: usize) -> Vec<usize> {
        let p = self.data.get(pos).unwrap();
        self.data.iter()
            .enumerate()
            .map(|(idx, q)| (idx, p.euclid(q)))
            .filter(|&(_idx, d)| d <= self.eps)
            .map(|(idx, _d)| idx)
            .collect()
    }

    fn noise(&mut self, pos: usize) {
        self.set_cluster(pos, -1);
    }

    pub fn compute(&mut self) -> Vec<isize> {

        for (idx, _p) in self.data.iter().enumerate() {
            if !self.visited(idx) {
                self.visit(idx);
                let neighbours = self.neighbours(idx);
                if neighbours.len() < self.minpts {
                    self.noise(idx);
                } else {
                    self.c += 1;
                    self.expand_cluster(idx, neighbours);
                }
            }
        }
        self.cluster.clone()
    }

    fn set_cluster(&mut self, pos: usize, c: isize) {
        let cl = self.cluster.get_mut(pos).unwrap();
        *cl = c;
    }

    fn get_cluster(&self, pos: usize) -> isize {
        *self.cluster.get(pos).unwrap()
    }

    fn expand_cluster(&mut self, ppos: usize, neighbours: Vec<usize>) {
        let c = self.c;
        self.set_cluster(ppos, c);
        
        let mut q = neighbours.clone();
        while q.len() > 0 {
            let pp = q.pop().unwrap();
            if !self.visited(pp) {
                self.visit(pp);
                let neighbours = self.neighbours(pp);
                if neighbours.len() >= self.minpts {
                    for n in neighbours {
                        q.push(n);
                    }
                }
            }
            if self.get_cluster(pp) == -2 {
                self.set_cluster(pp, c);
            }
        }
    }
}


#[cfg(test)]
mod tests {
    use super::*;
    use geometry::Point2D;

    #[test]
    fn test_dbscan() {

        let data = vec![
            Point2D::new(1.0, 2.0),
            Point2D::new(5.0, 3.0),
            Point2D::new(2.0, 4.0),
            Point2D::new(3.0, 1.0),
            Point2D::new(2.0, 2.0),
            Point2D::new(4.0, 3.0),
            Point2D::new(1.0, 2.0),
            Point2D::new(2.0, 5.0),
            Point2D::new(14.0, 13.0),
            Point2D::new(11.0, 12.0),
            Point2D::new(12.0, 15.0),
            Point2D::new(32.0, 25.0),
        ];

        let r = dbscan(&data, 5.0, 3);
        assert_eq!(r, vec![0,0,0,0,0,0,0,0,1,1,1,-1]);
    }
}