Concurrency!
We haven't really done much that shows Rust shining, yet. So let's fix that.
In lib.rs, let's build:
#![allow(unused)] fn main() { use rayon::prelude::*; #[no_mangle] pub extern "C" fn is_prime_slow(n: i32) -> bool { if n < 2 { return false; } for i in 2..n/2+ 1 { if n % i == 0 { return false; } } true } #[no_mangle] pub extern "C" fn count_primes(slice: *const i32, len: usize) -> usize { // Safety, as much as we can assert!(!slice.is_null()); assert!(len > 0); // Get the slice let slice = unsafe { std::slice::from_raw_parts(slice, len) }; slice.par_iter().filter(|n| is_prime_slow(**n)).count() } }
And add Rayon to your dependencies with cargo add rayon. We've made a deliberately SLOW prime number detector, and then a function that uses Rayon to auto-parallelize it across all your CPUs and count the result.
Now for the C. Let's start by making sure it works:
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdbool.h>
#include <time.h>
#define ARR_SIZE 1000
// Link to the Rust versions
bool is_prime_slow(int32_t n);
int64_t count_primes(int32_t *arr, size_t len);
int main() {
time_t start, end;
// Allocate memory for the array
printf("Allocating memory for the array...\n");
int32_t *arr = (int32_t *)malloc(ARR_SIZE * sizeof(int32_t));
// Populate the array with random numbers
printf("Populating the array with random numbers...\n");
for (int32_t i = 0; i < ARR_SIZE; i++) {
arr[i] = rand();
}
// Count the primes in the array with Rust, cheating we're using Rayon
start = time(NULL);
int64_t sum_rust = count_primes(arr, ARR_SIZE);
end = time(NULL);
printf("Count (from Rust, Parallel): %ld. Seconds: %ld\n", sum_rust, end - start);
// Free the allocated memory
free(arr);
return 0;
}
And here's a long version that tests all of it:
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdbool.h>
#include <time.h>
#define ARR_SIZE 1000
// Link to the Rust versions
bool is_prime_slow(int32_t n);
int64_t count_primes(int32_t *arr, size_t len);
// A native C version of the same thing
bool is_prime_slow_c(int32_t n) {
if (n < 2) return false;
for (int32_t i = 2; i < n/2; i++) {
if (n % i == 0) return false;
}
return true;
}
int main() {
time_t start, end;
// Allocate memory for the array
printf("Allocating memory for the array...\n");
int32_t *arr = (int32_t *)malloc(ARR_SIZE * sizeof(int32_t));
// Populate the array with random numbers
printf("Populating the array with random numbers...\n");
for (int32_t i = 0; i < ARR_SIZE; i++) {
arr[i] = rand();
}
// Count the primes in the array with Rust, cheating we're using Rayon
start = time(NULL);
int64_t sum_rust = count_primes(arr, ARR_SIZE);
end = time(NULL);
printf("Count (from Rust, Parallel): %ld. Seconds: %ld\n", sum_rust, end - start);
// Count the primes in the array with a C loop
printf("Calculating the sum of the array in C...\n");
start = time(NULL);
int64_t sum = 0;
for (int32_t i = 0; i < ARR_SIZE; i++) {
if (is_prime_slow(arr[i])) sum++;
}
end = time(NULL);
printf("Count (from C): %ld. Seconds: %ld\n", sum, end - start);
// Count the primes in the array with a C loop using native code
printf("Calculating the sum of the array in C (native)...\n");
start = time(NULL);
sum = 0;
for (int32_t i = 0; i < ARR_SIZE; i++) {
if (is_prime_slow_c(arr[i])) sum++;
}
end = time(NULL);
printf("Count (from native C): %ld. Seconds: %ld\n", sum, end - start);
// Free the allocated memory
free(arr);
return 0;
}
On my system at the office:
- Rust completes in 4 seconds.
- A C loop to the Rust function completes in 33 seconds.
- A C loop to the native C function completes in 33 seconds.
So we've learned:
- Rayon makes it easy to get a big win if you have multiple CPUs - and Rust makes concurrency much less scary.
- There really isn't a performance penalty for calling into Rust.