Initial commit :)

This commit is contained in:
Nathan Lamy 2025-08-21 13:49:30 +02:00
commit 36e10cf8d2
12 changed files with 1211 additions and 0 deletions

4
.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
/target
/prod
config.toml
Cargo.lock

20
Cargo.toml Normal file
View file

@ -0,0 +1,20 @@
[package]
name = "worker"
version = "0.1.0"
edition = "2024"
[dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
reqwest = { version = "0.12", features = ["blocking", "json"] }
scraper = "0.18"
tokio = { version = "1", features = ["full"] }
chrono = { version = "0.4", features = ["serde", "clock"] }
url = "2.5.4"
redis = { version = "0.32.3", features = ["tokio-comp"] }
pdf-extract = "0.9.0"
tempfile = "3.20.0"
config = "0.15.13"
tokio-cron-scheduler = "0.14.0"
sha2 = "0.10.9"
cron = "0.15.0"

92
src/api.rs Normal file
View file

@ -0,0 +1,92 @@
use chrono::NaiveDateTime;
use reqwest::{self, Client};
use serde_json::json;
use crate::configuration::{Settings, get_config};
pub async fn post_colle(
colle: &Colle,
class_name: &str,
config: &Settings,
) -> Result<(), Box<dyn std::error::Error>> {
let api_url = get_config(config, "api");
let api_token = get_config(config, "token");
let url = format!("{api_url}/colles");
let colle_json = json!({
"colle": &colle,
"className": class_name,
});
let response = Client::new()
.post(&url)
.json(&colle_json)
.header("Content-Type", "application/json")
.header("Accept", "application/json")
// Bearer token for authentication
.header("Authorization", format!("Bearer {api_token}"))
.send()
.await?;
// Check if the response is successful
if !response.status().is_success() {
eprintln!("Failed to post colle: HTTP {}", response.status(),);
eprint!("Response: {:?}", response.text().await?);
return Err("Failed to post colle".into());
}
Ok(())
}
pub async fn post_upcoming_colles(
colles: &[Colle],
class_name: &str,
config: &Settings,
) -> Result<(), Box<dyn std::error::Error>> {
let api_url = get_config(config, "api");
let api_token = get_config(config, "token");
let url = format!("{api_url}/colles");
let colles_json = json!({
"colles": &colles,
"className": class_name,
});
let response = Client::new()
.post(&url)
.json(&colles_json)
.header("Content-Type", "application/json")
.header("Accept", "application/json")
// Bearer token for authentication
.header("Authorization", format!("Bearer {api_token}"))
.send()
.await?;
// Check if the response is successful
if !response.status().is_success() {
eprintln!("Failed to post upcoming colles: HTTP {}", response.status(),);
eprint!("Response: {:?}", response.text().await?);
return Err("Failed to post upcoming colles".into());
}
Ok(())
}
#[derive(Debug, serde::Serialize)]
pub struct Colle {
pub date: NaiveDateTime,
pub examiner: String,
pub room: String,
pub subject: String,
pub student: String,
pub bjid: Option<String>,
pub bjsecret: Option<String>,
pub grade: Option<f32>,
pub content: Option<String>,
pub comment: Option<String>,
pub attachments: Option<Vec<ColleAttachment>>, // (file_url, file_name)
}
#[derive(Debug, serde::Serialize)]
pub struct ColleAttachment {
pub url: String,
pub name: String,
}

77
src/configuration.rs Normal file
View file

@ -0,0 +1,77 @@
use config::{Config, ConfigError, File};
use serde::Deserialize;
use std::collections::HashMap;
#[derive(Debug, Deserialize)]
pub struct Account {
pub username: String,
pub password: String,
}
#[derive(Debug, Deserialize)]
pub struct Settings {
redis: Option<HashMap<String, String>>,
api: Option<HashMap<String, String>>,
accounts: Option<HashMap<String, Account>>,
cron: Option<HashMap<String, Vec<String>>>,
}
pub fn load_config() -> Result<Settings, ConfigError> {
let settings = Config::builder()
.add_source(File::with_name("config"))
.build()?
.try_deserialize::<Settings>()?;
Ok(settings)
}
pub fn get_config(config: &Settings, section: &str) -> String {
match section {
"redis" => {
if let Some(redis_map) = &config.redis {
redis_map.get("url").cloned().unwrap_or_default()
} else {
println!("Redis configuration not found.");
String::new()
}
}
"api" => {
if let Some(api_map) = &config.api {
api_map.get("url").cloned().unwrap_or_default()
} else {
String::new()
}
}
"token" => {
if let Some(api_map) = &config.api {
api_map.get("token").cloned().unwrap_or_default()
} else {
String::new()
}
}
_ => String::new(),
}
}
pub fn get_cron(config: &Settings, section: &str) -> Vec<String> {
if let Some(map) = &config.cron {
map.get(section).cloned().unwrap_or_default()
} else {
Vec::new()
}
}
pub fn get_account_by_class<'a>(config: &'a Settings, class_name: &str) -> Option<&'a Account> {
config
.accounts
.as_ref()
.and_then(|accounts| accounts.get(class_name))
}
pub fn list_classes(config: &Settings) -> Vec<String> {
config
.accounts
.as_ref()
.map(|accounts| accounts.keys().cloned().collect())
.unwrap_or_default()
}

303
src/main.rs Normal file
View file

@ -0,0 +1,303 @@
use std::{str::FromStr, sync::Arc};
use crate::{
api::{post_colle, post_upcoming_colles},
configuration::{get_config, get_cron, list_classes, load_config},
parser::{authenticate, fetch_class_colles, fetch_colle, fetch_upcoming_colles},
};
use chrono::{DateTime, Utc};
use cron::Schedule;
use redis::AsyncCommands;
use redis::aio::MultiplexedConnection;
use redis::{Client, Connection, RedisError, RedisResult, TypedCommands};
use serde::Serialize;
use serde_json::Value;
use tokio::sync::Mutex;
use tokio_cron_scheduler::{Job, JobScheduler};
mod api;
mod configuration;
mod parser;
#[tokio::main]
async fn main() -> redis::RedisResult<()> {
// Load configuration
let config = load_config();
if config.is_err() {
eprintln!("Failed to load configuration: {}", config.unwrap_err());
return Err(redis::RedisError::from((
redis::ErrorKind::InvalidClientConfig,
"Configuration loading failed",
)));
}
let config = config.unwrap();
println!("Configuration loaded successfully");
// Connect to Redis
let redis_url = get_config(&config, "redis");
let client = Client::open(redis_url)?;
// Create separate connections for pubsub and job processing
let mut pubsub_con = client.get_connection()?;
let mut job_con = client.get_connection()?;
let scheduler_con = client.get_multiplexed_async_connection().await.unwrap();
let mut pubsub = pubsub_con.as_pubsub();
pubsub.subscribe("jobs_queue")?;
println!("Connected to Redis pubsub channel 'jobs_queue'");
// Schedule cron jobs
schedule_cron_jobs(&config, scheduler_con).await;
println!("Cron jobs scheduled successfully");
loop {
let msg = pubsub.get_message()?;
let payload: String = msg.get_payload()?;
if let Ok(event) = serde_json::from_str::<Value>(&payload) {
match process_job(&event, &mut job_con, &config).await {
Ok(_) => {
println!("Job processed successfully: {:?}", event);
}
Err(e) => {
eprintln!("Error processing job: {:?}, Error: {}", event, e);
}
}
}
}
}
#[derive(Serialize)]
struct JobMessage {
r#type: u8,
class_name: String,
}
async fn schedule_cron_jobs(config: &configuration::Settings, con: MultiplexedConnection) {
let scheduler = JobScheduler::new().await.unwrap();
let classes: Vec<String> = list_classes(config);
let class_colles_crons = get_cron(config, "class_colles");
let upcoming_colles_crons = get_cron(config, "upcoming_colles");
let con = Arc::new(Mutex::new(con));
for class_name in classes {
// Start job for upcoming colles
let message = JobMessage {
r#type: 2,
class_name: class_name.clone(),
};
for cron in &upcoming_colles_crons {
let job = start_job(&message, &cron, Arc::clone(&con)).await;
scheduler.add(job).await.unwrap();
}
println!("Scheduled job for upcoming colles: {}", class_name);
// Start job for class colles
let message = JobMessage {
r#type: 1,
class_name: class_name.clone(),
};
for cron in &class_colles_crons {
let job = start_job(&message, &cron, Arc::clone(&con)).await;
scheduler.add(job).await.unwrap();
}
println!("Scheduled job for class colles: {}", class_name);
}
scheduler.start().await.unwrap();
}
async fn start_job(
message: &JobMessage,
cron: &str,
con: Arc<Mutex<MultiplexedConnection>>,
) -> Job {
let con_clone = Arc::clone(&con);
let payload = serde_json::to_string(message).unwrap();
// Register cron job
Job::new_async(&cron, move |_uuid, _l| {
let payload = payload.clone();
let con_clone = Arc::clone(&con_clone);
Box::pin(async move {
let mut conn = con_clone.lock().await;
let res: Result<(), RedisError> = conn.publish("jobs_queue", payload).await;
if res.is_err() {
eprintln!("Failed to publish job: {}", res.unwrap_err());
}
})
})
.unwrap()
}
async fn process_job(
job: &Value,
con: &mut Connection,
config: &configuration::Settings,
) -> RedisResult<()> {
// Retrieve the class name from the job
println!("Processing job: {:?}", job);
let class_name = job["class_name"].as_str();
if class_name.is_none() {
eprintln!("Job does not contain a class name.");
return Err(redis::RedisError::from((
redis::ErrorKind::InvalidClientConfig,
"Job missing class name",
)));
}
let class_name = class_name.unwrap();
/*
* Fetch ONE colle (id and secret)
*/
if job["type"] == 0 {
// Simulate fetching colles
let colle_id = job["colle_id"].as_str().unwrap_or("unknown");
let colle_secret = job["colle_secret"].as_str().unwrap_or("unknown");
println!("Fetched colle: ID={}, Secret={}", colle_id, colle_secret);
match fetch_colle(colle_id, colle_secret).await {
Ok(colle) => {
if colle.is_none() {
eprintln!("No colle found for ID: {}", colle_id);
return Ok(());
}
let mut colle = colle.unwrap();
if colle.examiner.is_empty() {
colle.examiner = job["examiner_name"]
.as_str()
.unwrap_or("Unknown")
.to_string();
}
let res = post_colle(&colle, class_name, config).await;
if res.is_err() {
eprintln!("Failed to post colle: {}", res.unwrap_err());
return Err(redis::RedisError::from((
redis::ErrorKind::ResponseError,
"Failed to post colle",
)));
}
}
Err(e) => {
eprintln!("Failed to fetch colle {}: {}", colle_id, e);
}
}
} else {
// Authenticate
let session = authenticate(class_name, &mut *con, config).await;
if session.is_err() {
eprintln!("Failed to authenticate: {}", session.unwrap_err());
return Err(redis::RedisError::from((
redis::ErrorKind::AuthenticationFailed,
"Authentication failed",
)));
}
let session = session.unwrap();
println!("Authenticated successfully: {}", session);
/*
* Fetch class colles (class name and optionally date)
*/
if job["type"] == 1 {
let last_hash_key = format!("class_colles_hash_{}", class_name);
let last_hash = get_last_hash(con, &last_hash_key);
// Fetch class colles
match fetch_class_colles(&session, job["date"].as_str().or(None), &last_hash).await {
Ok((colles, hash)) => {
set_last_hash(con, &last_hash_key, &hash)?;
// Add each colle to Redis queue pubsub
for (colle_id, colle_secret, examiner_name) in colles {
let colle_job = serde_json::json!({
"type": 0,
"colle_id": colle_id,
"colle_secret": colle_secret,
"class_name": class_name,
"examiner_name": examiner_name,
});
let colle_job_str =
serde_json::to_string(&colle_job).unwrap_or_else(|_| "{}".to_string());
// Publish the colle job to the Redis channel
con.publish("jobs_queue", &colle_job_str)?;
println!("Published colle job: {}", &colle_job_str);
}
// Set the last sync date in Redis
let last_sync_key = format!("last_sync_{}", class_name);
con.set(&last_sync_key, Utc::now().to_rfc3339().as_str())?;
let class_colles_crons = get_cron(config, "class_colles");
let healthy_until = next_run_any(&class_colles_crons).unwrap_or_default();
let healhy_until_key = format!("healthy_until_{}", class_name);
con.set(&healhy_until_key, healthy_until.to_rfc3339().as_str())?;
}
Err(e) => {
eprintln!("Failed to fetch class colles: {}", e);
}
}
/*
* Fetch upcoming colles (class name)
*/
} else if job["type"] == 2 {
// Retrieve the last hash from Redis and fetch upcoming colles
let last_hash_key = format!("upcoming_colles_hash_{}", class_name);
let last_hash = get_last_hash(con, &last_hash_key);
let (colles, hash) = fetch_upcoming_colles(&session, &last_hash).await;
set_last_hash(con, &last_hash_key, &hash)?;
if colles.is_empty() {
return Ok(());
}
let res = post_upcoming_colles(&colles, class_name, config).await;
if res.is_err() {
eprintln!("Failed to post upcoming colles: {}", res.unwrap_err());
return Err(redis::RedisError::from((
redis::ErrorKind::ResponseError,
"Failed to post upcoming colles",
)));
}
println!("Posted upcoming colles successfully.");
/*
* Handle unknown job types
* This is a catch-all for any job types that are not recognized.
*/
} else {
eprintln!("Unknown job type: {}", job["type"]);
}
}
Ok(())
}
fn get_last_hash(con: &mut Connection, key: &str) -> String {
let last_hash = con.get(key);
// If the last hash is not found, use an empty string
let last_hash = match last_hash {
Ok(hash) => hash.unwrap_or_else(|| String::new()),
Err(e) => {
eprintln!("Failed to get last hash: {}", e);
String::new()
}
};
last_hash
}
fn set_last_hash(con: &mut Connection, key: &str, hash: &str) -> RedisResult<()> {
con.set(key, hash)?;
Ok(())
}
fn next_run_any<I>(expressions: I) -> Option<DateTime<Utc>>
where
I: IntoIterator,
I::Item: AsRef<str>,
{
let now = Utc::now();
expressions
.into_iter()
.filter_map(|expr| {
let schedule = Schedule::from_str(expr.as_ref()).ok()?;
schedule.after(&now).next()
})
.min()
}

169
src/parser/auth.rs Normal file
View file

@ -0,0 +1,169 @@
/**
* Authenticate on BJColle.
*/
use crate::{configuration::{get_account_by_class, Settings}};
use redis::{Commands, Connection, RedisError, RedisResult};
use reqwest::{self, header::HeaderValue, redirect::Policy};
pub async fn request_session() -> Result<String, Box<dyn std::error::Error>> {
let url = "https://bjcolle.fr/acces.php";
let response = reqwest::get(url).await?;
// Get response headers
let session_id = response.headers().get("set-cookie");
if !session_id.is_some() {
return Err("Failed to get session ID".into());
}
Ok(session_id.unwrap().to_str()?.to_string())
}
pub async fn login(
username: &str,
password: &str,
session_id: &str,
) -> Result<String, Box<dyn std::error::Error>> {
let url = "https://bjcolle.fr/acces.php";
let client = reqwest::Client::builder()
.redirect(Policy::none())
.build()?;
let response = client
.post(url)
.header("Cookie", session_id)
.form(&[
("USERNAME_ACCES", username),
("PASSWORD_ACCES", password),
("SOUVENIR", "on"),
("valider_ident", "Valider"),
])
.send()
.await?;
// Get session ID from cookies
let session_id: Vec<String> = response
.headers()
.get_all("set-cookie")
.iter()
.filter_map(|value: &HeaderValue| value.to_str().ok())
.filter_map(|s| {
// Get the part before the first semicolon
let first_part = s.split(';').next()?.trim();
// Only keep "bjid" or "bjp"
if first_part.starts_with("bjid=") || first_part.starts_with("bjp=") {
Some(first_part.to_string())
} else {
None
}
})
.collect();
if session_id.is_empty() {
return Err("Failed to get session ID".into());
}
Ok(session_id.join("; "))
}
pub async fn refresh_session(cookie: &str) -> Result<String, Box<dyn std::error::Error>> {
let url = "https://bjcolle.fr/acces_cookies.php";
let client = reqwest::Client::builder()
.redirect(Policy::none())
.build()?;
let response = client.get(url).header("Cookie", cookie).send().await?;
// Get session ID from cookies
let new_session_id = response.headers().get("set-cookie");
if !new_session_id.is_some() {
return Err("Failed to refresh session ID".into());
}
Ok(new_session_id.unwrap().to_str()?.to_string())
}
pub async fn authenticate(
class_name: &str,
con: &mut Connection,
config: &Settings,
) -> RedisResult<String> {
// Try reusing an existing session if available
let session_key = format!("session_{}", class_name);
let session_duration = 900; // 15 minutes
let session_id: RedisResult<Option<String>> = con.get(&session_key);
match session_id? {
Some(id) => {
println!("Reusing existing session: {}", id);
return Ok(id);
}
None => {
println!("No existing session found for class: {}", class_name);
}
}
// Request a session ID (valid for 15 minutes)
let session = request_session().await;
if let Err(err) = session {
eprintln!("Failed to request session: {}", err);
return Err(redis::RedisError::from((
redis::ErrorKind::AuthenticationFailed,
"Session request failed",
)));
}
let session_id = session.unwrap();
println!("Session requested: {}", session_id);
// Try to get cached session ID
let cookie_key = format!("auth_cookie_{}", class_name);
let cached_session: RedisResult<Option<String>> = con.get(&cookie_key);
match cached_session? {
Some(cookie) => {
// Attempt to refresh session
let refreshed = refresh_session(&cookie).await;
if let Err(err) = refreshed {
eprintln!("Failed to refresh session: {}", err);
// Remove the cached cookie if refresh fails and retry authentication
let _: Result<(), RedisError> = con.del(&cookie_key);
return Box::pin(authenticate(class_name, con, config)).await;
}
let new_session_id = refreshed.unwrap();
println!("Session refreshed: {}", new_session_id);
// Set the new session id (for 15 minutes)
let _: Result<(), RedisError> = con.set_ex(&session_key, &new_session_id, session_duration);
Ok(new_session_id)
}
None => {
// No cookie found, authenticate with credentials
let account = get_account_by_class(&config, class_name);
if account.is_none() {
eprintln!("No account found for class: {}", class_name);
return Err(redis::RedisError::from((
redis::ErrorKind::AuthenticationFailed,
"No account found",
)));
}
let account = account.unwrap();
println!("Using account: {}", account.username);
let login_result = login(&account.username, &account.password, &session_id).await;
if let Err(err) = login_result {
eprintln!("Failed to login: {}", err);
return Err(redis::RedisError::from((
redis::ErrorKind::AuthenticationFailed,
"Login failed",
)));
}
let cookie = login_result.unwrap();
println!("Login successful: {}", cookie);
let _: Result<(), RedisError> = con.set(&cookie_key, cookie.clone());
// Set the new session id (for 15 minutes)
let _: Result<(), RedisError> = con.set_ex(&session_key, &session_id, session_duration);
Ok(session_id)
}
}
}

View file

@ -0,0 +1,75 @@
/**
* List class colles from BJColle (list ID and secret).
* URL: https://bjcolle.fr/students_dashboard_class.php
*/
use reqwest::{self, Client};
use scraper::{Html, Selector};
use std::collections::HashMap;
use url::Url;
use crate::parser::utils::hash_text;
pub async fn fetch(
session: &str,
date: Option<&str>,
last_hash: &str,
) -> Result<(Vec<(String, String, String)>, String), Box<dyn std::error::Error>> {
let mut url = String::from("https://bjcolle.fr/students_dashboard_class.php?erasedate=1");
if let Some(date) = date {
url = format!(
"https://bjcolle.fr/students_dashboard_class.php?go={}",
date
);
}
// Retrieve page content
let response = Client::new()
.get(url)
.header("Cookie", session)
.send()
.await?
.text()
.await?;
// Calculate the hash of the response
let hash = hash_text(&response);
if hash == last_hash {
return Ok((Vec::new(), hash));
}
let document = Html::parse_document(&response);
// Select the table rows containing colles
let row_selector = Selector::parse("#Choix > a.bouton_eleve2").unwrap();
let mut colles = Vec::new();
for row in document.select(&row_selector) {
// Extract ID and secret from each row
if let Some(href) = row.value().attr("href") {
if let Some((colle_id, colle_secret)) = parse_colle_info(href) {
let rows: Vec<&str> = row.text().collect();
let examiner_name = rows[2].trim().to_string();
colles.push((colle_id, colle_secret, examiner_name));
} else {
eprintln!("Failed to parse colle info from URL: {}", href);
}
} else {
eprintln!("Row does not contain a valid href attribute.");
}
}
Ok((colles, hash))
}
fn parse_colle_info(raw_url: &str) -> Option<(String, String)> {
let full_url = format!("https://dummy.host/{}", raw_url); // must be absolute
let parsed = Url::parse(&full_url).ok()?;
let query: HashMap<_, _> = parsed.query_pairs().into_owned().collect();
let colle_id = query.get("colle")?.to_string();
let colle_secret = query.get("hgfebrgl8ri3h")?.to_string();
Some((colle_id, colle_secret))
}

163
src/parser/colles/fetch.rs Normal file
View file

@ -0,0 +1,163 @@
/*
Request a colle from BJColle given its ID (and secret).
URL : https://bjcolle.fr/students_oral_disp.php?colle=<colle_id>&hgfebrgl8ri3h=<secret>
*/
use crate::{api::{Colle, ColleAttachment}, parser::utils};
use reqwest;
use scraper::{ElementRef, Html, Selector};
pub async fn fetch(
colle_id: &str,
secret: &str,
) -> Result<Option<Colle>, Box<dyn std::error::Error>> {
// Retrieve page content
let url = format!(
"https://bjcolle.fr/students_oral_disp.php?colle={}&hgfebrgl8ri3h={}",
colle_id, secret
);
let response = reqwest::get(url).await?.text().await?;
let document = Html::parse_document(&response);
let header_selector = "#envelope > header > table > tbody h2";
let header_selector = Selector::parse(header_selector).unwrap();
// Expect ONLY ONE header
let Some(header) = document.select(&header_selector).next() else {
// Header not found error
eprint!("Header not found in the document.");
return Ok(None);
};
// Use the header text to extract informations :
// - subject
// - examiner
// - date
// - room
// - student name
let header_text = header.text().collect::<Vec<_>>();
let (title, date, room, student) = if header_text.len() >= 4 {
(
header_text[0].trim().to_string(),
header_text[1].trim().to_string(),
header_text[2].trim().to_string(),
header_text[3].trim().to_string(),
)
} else {
// TODO: Handle the case where no header is found
print!("Header text does not contain enough elements.");
return Ok(None);
};
let (subject, examiner) = title
.split("avec")
.map(|s| s.trim())
.collect::<Vec<_>>()
.split_first()
.map(|(subject, examiner)| {
(
subject
.to_string()
.replace("Colle de", "")
.replace("Colle d'", "")
.trim()
.to_string(),
examiner.join(" "),
)
})
.unwrap_or((String::new(), String::new()));
// Parse the date and room
let date = utils::parse_french_datetime(&date)?;
let room = room.replace("Salle :", "").trim().to_string();
// Parse grades and comments
let grade_selector = "#haut > div > input#NOTE_ELEVE";
let grade_selector = Selector::parse(grade_selector).unwrap();
let grade = document
.select(&grade_selector)
.next()
.and_then(|e| e.value().attr("value"))
.unwrap_or("")
.to_string()
.replace(",", ".")
.parse()
.unwrap_or(-1.0);
// Attachments
let attachment_selector = Selector::parse(".bj > a.bouton_eleve").unwrap();
let files = document
.select(&attachment_selector)
.filter_map(|el| {
let href = el.value().attr("href")?.to_string();
let text = el.text().collect::<String>().trim().to_string();
Some(ColleAttachment {
url: href,
name: text,
})
})
.collect();
// Return a JSON object with the parsed information
Ok(Some(Colle {
date,
room,
student,
subject,
examiner,
bjid: Some(colle_id.to_string()),
bjsecret: Some(secret.to_string()),
grade: if grade >= 0.0 { Some(grade) } else { None },
content: Some(extract_section(&document, "Sujet", true)),
comment: Some(extract_section(&document, "Commentaire", false)),
attachments: Some(files),
}))
}
fn extract_section(document: &Html, keyword: &str, use_first_span: bool) -> String {
let legend_selector = Selector::parse("fieldset.bj > legend.bj2").unwrap();
let span_selector = Selector::parse("fieldset.bj > span").unwrap();
// Check if legend contains the keyword ("Sujet" or "Commentaire")
let is_match = document
.select(&legend_selector)
.any(|el| el.text().collect::<String>().contains(keyword));
if !is_match {
return String::new();
}
// Collect all matching spans
let spans: Vec<_> = document.select(&span_selector).collect();
let span_el = if spans.is_empty() {
None
} else if use_first_span {
Some(spans[0].clone())
} else {
Some(spans[spans.len() - 1].clone())
};
let mut results = Vec::new();
if let Some(span) = span_el {
// Filter children != <br>
let children: Vec<_> = span
.children()
.filter_map(ElementRef::wrap)
.filter(|el| el.value().name() != "br")
.collect();
if !children.is_empty() {
for child in children {
results.push(child.html());
}
} else {
results.push(span.html());
}
}
utils::clean_content(&results.join("<br>"))
}

7
src/parser/colles/mod.rs Normal file
View file

@ -0,0 +1,7 @@
mod class;
mod fetch;
mod upcoming;
pub use class::fetch as fetch_class_colles;
pub use fetch::fetch as fetch_colle;
pub use upcoming::fetch as fetch_upcoming_colles;

View file

@ -0,0 +1,191 @@
use crate::{
api::Colle,
parser::utils::{hash_text, parse_french_date, with_time},
};
use chrono::{Datelike, NaiveDate};
use reqwest::Client;
use std::{collections::HashMap, io::Write};
use tempfile::NamedTempFile;
// Splits a vector of strings into a HashMap based on dates.
// The keys are the dates, and the values are vectors of strings containing the lines after that
fn split_dates(text: Vec<&str>) -> HashMap<NaiveDate, Vec<String>> {
// Get the current year
let current_year = chrono::Utc::now().year();
let mut result = HashMap::new();
let mut content = Vec::new();
for line in text {
// Check if the line contains a year
if line.contains(&current_year.to_string()) {
let date = parse_french_date(line);
if date.is_ok() {
if !content.is_empty() {
result.insert(date.unwrap(), content.clone());
content.clear();
}
} else {
// If parsing failed, just continue
eprintln!("Failed to parse date from line: {}", line);
}
} else {
content.push(line.to_string());
}
}
// Return the result as a vector of strings
result
}
// Check if the line starts with a time format (e.g., "16h20")
fn is_time(line: &str) -> bool {
let trimmed = line.trim();
if let Some(h_pos) = trimmed.find('h') {
// Check if there are only digits before 'h'
let before_h = &trimmed[..h_pos];
if !before_h.is_empty() && before_h.chars().all(|c| c.is_digit(10)) {
// Check that there are exactly 2 digits after 'h'
let after_h = &trimmed[h_pos + 1..];
return after_h.len() >= 2 && after_h.chars().take(2).all(|c| c.is_digit(10));
}
}
false
}
// Check if the line starts with a last name (uppercase first word)
fn is_name(line: &str) -> bool {
let trimmed = line.trim();
if let Some(first_space) = trimmed.find(' ') {
let first_word = &trimmed[..first_space];
return first_word.chars().all(|c| c.is_uppercase() || c == '-');
}
false
}
// Extract all names from a line (split on "/" and trim each part)
fn extract_names(line: &str) -> Vec<String> {
line.split('/')
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.collect()
}
// Parse header (time, teacher, room)
fn parse_header(line: &str) -> (String, String, String) {
// First 5 characters are the time
let time = line.get(0..5).unwrap_or("").to_string();
// Split the rest on "Salle :"
let parts: Vec<&str> = line[5..].split("Salle :").collect();
let teacher = parts.get(0).map_or("", |s| s.trim()).to_string();
let room = parts.get(1).map_or("", |s| s.trim()).to_string();
(time, teacher, room)
}
// Parse the PDF text into a vector of Upcoming colles
fn parse_upcoming(text: Vec<&str>) -> Vec<Colle> {
let dates = split_dates(text);
let mut result = Vec::new();
for (date, lines) in dates {
let mut current_header = (String::new(), String::new(), String::new());
let mut current_subject = String::new();
for line in lines {
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
if is_time(trimmed) {
current_header = parse_header(trimmed);
} else if is_name(trimmed) {
let names = extract_names(trimmed);
// Pre-allocate and extend instead of pushing individual items
let entries: Vec<Colle> = names
.into_iter()
.map(|name| Colle {
date: with_time(date, &current_header.0).unwrap_or_default(),
examiner: current_header.1.clone(),
room: current_header.2.clone(),
subject: current_subject.clone(),
student: name,
bjid: None,
bjsecret: None,
grade: None,
content: None,
comment: None,
attachments: None,
})
.collect();
result.extend(entries);
} else {
current_subject = trimmed.to_string();
}
}
}
result
}
pub async fn fetch(session: &str, last_hash: &str) -> (Vec<Colle>, String) {
let url = "https://bjcolle.fr/oral_choice_week_billboard_cdt.php";
// Start date is 3 days ago, end date is 14 days from now (2 weeks)
let start_date = chrono::Utc::now().date_naive() - chrono::Duration::days(3);
let end_date = chrono::Utc::now().date_naive() + chrono::Duration::days(14);
let response = Client::new()
.post(url)
.header("Cookie", session)
.form(&[
("datepicker_1", &start_date.format("%d/%m/%Y").to_string()),
("datepicker_2", &end_date.format("%d/%m/%Y").to_string()),
("VALIDER_SEMAINE", &"Valider".to_string()),
])
.send()
.await
.expect("Failed to send request");
if !response.status().is_success() {
eprintln!(
"Failed to fetch upcoming colles: HTTP {}",
response.status()
);
return (Vec::new(), String::new());
}
// Save PDF to a temporary file
let body = response
.bytes()
.await
.expect("Failed to read response body");
let mut tmpfile = NamedTempFile::new().expect("Failed to create temp file");
tmpfile
.write_all(&body)
.expect("Failed to write PDF to temp file");
// Extract text using pdf_extract
let pdf_text = pdf_extract::extract_text(tmpfile.path()).expect("Failed to extract PDF text");
// Calculate the hash of the PDF file
let hash = hash_text(&pdf_text);
if hash == last_hash || pdf_text.contains("Aucune colle") {
return (Vec::new(), hash);
}
// Split the text into lines and filter out empty lines
let lines: Vec<&str> = pdf_text
.lines()
.filter(|line| !line.trim().is_empty())
.collect();
// Remove the first line
let lines: Vec<&str> = lines.into_iter().skip(1).collect();
// Parse the lines into Upcoming structs
let upcoming_colles = parse_upcoming(lines);
if upcoming_colles.is_empty() {
eprintln!("No upcoming colles found.");
} else {
println!("Found {} upcoming colles.", upcoming_colles.len());
}
(upcoming_colles, hash)
}

9
src/parser/mod.rs Normal file
View file

@ -0,0 +1,9 @@
mod auth;
mod utils;
mod colles;
pub use colles::fetch_class_colles;
pub use colles::fetch_colle;
pub use colles::fetch_upcoming_colles;
pub use auth::authenticate;

101
src/parser/utils.rs Normal file
View file

@ -0,0 +1,101 @@
use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
use sha2::{Digest, Sha256};
use std::collections::HashMap;
use std::error::Error;
pub fn parse_french_date(french_date: &str) -> Result<NaiveDate, Box<dyn Error>> {
let months: HashMap<&str, u32> = [
("janvier", 1),
("février", 2),
("mars", 3),
("avril", 4),
("mai", 5),
("juin", 6),
("juillet", 7),
("août", 8),
("septembre", 9),
("octobre", 10),
("novembre", 11),
("décembre", 12),
]
.iter()
.cloned()
.collect();
// Split by spaces
let parts: Vec<&str> = french_date.split_whitespace().collect();
if parts.len() < 4 {
return Err("Date string too short".into());
}
// parts layout: ["vendredi", "6", "juin", "2025"]
// Extract day (digits only)
let day: u32 = parts[1]
.chars()
.filter(|c| c.is_digit(10))
.collect::<String>()
.parse()?;
// Month number
let month = months
.get(parts[2])
.ok_or_else(|| format!("Unknown month: {}", parts[2]))?;
// Year
let year: i32 = parts[3].parse()?;
let date = NaiveDate::from_ymd_opt(year, *month, day).ok_or("Invalid date components")?;
Ok(date)
}
pub fn parse_french_datetime(french_date: &str) -> Result<NaiveDateTime, Box<dyn Error>> {
let parts: Vec<&str> = french_date.split_whitespace().collect();
if parts.len() < 8 {
return Err("DateTime string too short".into());
}
// Parse the date part using the existing function
let date_part = parts[0..4].join(" ");
let date = parse_french_date(&date_part)?;
// parts layout: ["vendredi", "6", "juin", "2025", "à", "19", "h", "00"]
// Extract hour and minute
let hour: u32 = parts[5].parse()?;
let minute: u32 = parts[7].parse()?;
let datetime = date
.and_hms_opt(hour, minute, 0)
.ok_or("Invalid time components")?;
Ok(datetime)
}
pub fn clean_content(content: &str) -> String {
// Remove HTML tags and decode HTML entities
let cleaned = content
.replace("<p>", "")
.replace("</p>", "")
.replace("&nbsp;", " ")
.trim()
.to_string();
cleaned
}
pub fn with_time(date: NaiveDate, time_str: &str) -> Result<NaiveDateTime, String> {
// Replace 'h' with ':' to make parsing easier
let clean_time = time_str.replace('h', ":");
// Parse the time string into NaiveTime
match NaiveTime::parse_from_str(&clean_time, "%H:%M") {
Ok(time) => Ok(date.and_time(time)),
Err(_) => Err(format!("Invalid time format: {}", time_str)),
}
}
pub fn hash_text(text: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(text.as_bytes());
let result = hasher.finalize();
format!("{:x}", result)
}