Skip to content

Feat - implement queue #2081

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 117 additions & 0 deletions database/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,16 @@ impl FromStr for CommitType {
}
}

impl ToString for CommitType {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not super important, but usually it's idiomatic to implement Display, which is more general, and then get an implementation of ToString "for free" (there is a blanket impl of ToString for types that implement Display).

fn to_string(&self) -> String {
match self {
CommitType::Try => "try",
CommitType::Master => "master",
}
.to_string()
}
}

#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
pub struct Commit {
pub sha: String,
Expand Down Expand Up @@ -791,3 +801,110 @@ pub struct ArtifactCollection {
pub duration: Duration,
pub end_time: DateTime<Utc>,
}

#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
pub enum CommitJobStatus {
Queued,
InProgress,
Finished,
}

impl FromStr for CommitJobStatus {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s.to_ascii_lowercase().as_str() {
"queued" => CommitJobStatus::Queued,
"in_progress" => CommitJobStatus::InProgress,
"finished" => CommitJobStatus::Finished,
_ => return Err(format!("{} is not a valid `CommitJobStatus`", s)),
})
}
}

impl ToString for CommitJobStatus {
fn to_string(&self) -> String {
match self {
CommitJobStatus::Queued => "queued",
CommitJobStatus::InProgress => "in_progress",
CommitJobStatus::Finished => "finished",
}
.to_string()
}
}

/// Represents a job in the work queue for collectors
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CommitJob {
pub sha: String,
pub parent_sha: Option<String>,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we keep the invariant that the parent SHA is always present when we insert things into the queue, then this shouldn't be nullable.

pub commit_type: CommitType,
pub pr: u32,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we don't implement any automatic from/to SQL row conversions and we build these structs manually anyway, I would appreciate if they were more "domain-driven" and didn't allow representing invalid states.

So I would model it such that the job status looks something like this:

pub enum CommitJobStatus {
    Queued,
    InProgress { started_at: DateTime<Utc> },
    Finished { started_at: DateTime<Utc>, finished_at: DateTime<Utc> },
}

The artifact data looks something like this:

enum CommittType {
   Try { pr: u32 },
   Master { pr: u32 },
   Release { label: String }
}

and so on, so that on the Rust side, we can work more easily with these structs and avoid invalid states. Then in the DB layer, we'll just translate the domain structs into the corresponding atomic SQL attributes.

pub commit_time: Date,
pub target: Target,
pub machine_id: Option<String>,
pub started_at: Option<Date>,
pub finished_at: Option<Date>,
pub status: CommitJobStatus,
}

impl CommitJob {
/// Create a new commit job
pub fn new(
sha: String,
parent_sha: Option<String>,
pr: u32,
commit_type: CommitType,
commit_time: Date,
) -> Self {
Self {
sha,
parent_sha,
commit_type,
pr,
commit_time,
status: CommitJobStatus::Queued,
target: Target::X86_64UnknownLinuxGnu,
machine_id: None,
started_at: None,
finished_at: None,
}
}

pub fn from_db(
sha: String,
parent_sha: Option<String>,
commit_type: CommitType,
pr: u32,
commit_time: Date,
target: Target,
machine_id: Option<String>,
started_at: Option<Date>,
finished_at: Option<Date>,
status: CommitJobStatus,
) -> Self {
Self {
sha,
parent_sha,
commit_type,
pr,
commit_time,
target,
machine_id,
started_at,
finished_at,
status,
}
}

pub fn get_enqueue_column_names() -> Vec<String> {
vec![
String::from("sha"),
String::from("parent_sha"),
String::from("commit_type"),
String::from("pr"),
String::from("commit_time"),
String::from("status"),
String::from("target"),
]
}
}
10 changes: 9 additions & 1 deletion database/src/pool.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::{
ArtifactCollection, ArtifactId, ArtifactIdNumber, CodegenBackend, CompileBenchmark, Target,
ArtifactCollection, ArtifactId, ArtifactIdNumber, CodegenBackend, CommitJob, CompileBenchmark, Target
};
use crate::{CollectionId, Index, Profile, QueuedCommit, Scenario, Step};
use chrono::{DateTime, Utc};
Expand Down Expand Up @@ -178,6 +178,14 @@ pub trait Connection: Send + Sync {

/// Removes all data associated with the given artifact.
async fn purge_artifact(&self, aid: &ArtifactId);

/* @Queue - Adds a job - we want to "double up" by adding one per `Target` */
/// Add a job to the queue
async fn enqueue_commit_job(&self, target: Target, jobs: &[CommitJob]);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't the target already a part of CommitJob?


/* @Queue - currently extracts everything out of the queue as a SELECT */
/// Dequeue jobs
async fn dequeue_commit_job(&self, machine_id: String, target: Target) -> Option<String>;
}

#[async_trait::async_trait]
Expand Down
215 changes: 214 additions & 1 deletion database/src/pool/postgres.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use crate::pool::{Connection, ConnectionManager, ManagedConnection, Transaction};
use crate::{
ArtifactCollection, ArtifactId, ArtifactIdNumber, Benchmark, CodegenBackend, CollectionId,
Commit, CommitType, CompileBenchmark, Date, Index, Profile, QueuedCommit, Scenario, Target,
Commit, CommitJob, CommitJobStatus, CommitType, CompileBenchmark, Date, Index, Profile,
QueuedCommit, Scenario, Target,
};
use anyhow::Context as _;
use chrono::{DateTime, TimeZone, Utc};
Expand All @@ -12,6 +13,7 @@ use std::str::FromStr;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::Mutex;
use tokio_postgres::types::{FromSql, ToSql};
use tokio_postgres::GenericClient;
use tokio_postgres::Statement;

Expand Down Expand Up @@ -1365,6 +1367,217 @@ where
.await
.unwrap();
}

/* @Queue */
async fn enqueue_commit_job(&self, target: Target, jobs: &[CommitJob]) {
let row_count = jobs.len();
if row_count == 0 {
return;
}

let column_names = CommitJob::get_enqueue_column_names();
let column_string_names = column_names.join(", ");
let column_count: usize = column_names.len();
// Generate the placeholders like ($1, $2, ..., $7), ($8, $9, ..., $14), ...
let placeholders = (0..row_count)
.map(|i| {
let offset = i * column_count;
let group = (1..=column_count)
.map(|j| format!("${}", offset + j))
.collect::<Vec<_>>()
.join(", ");
format!("({})", group)
})
.collect::<Vec<_>>()
.join(", ");

let sql = format!(
"INSERT INTO commit_queue ({}) VALUES {}",
column_string_names, placeholders,
);

let params: Vec<&(dyn tokio_postgres::types::ToSql + Sync)> = jobs
.iter()
.flat_map(|job| {
vec![
&job.sha as &(dyn tokio_postgres::types::ToSql + Sync),
&job.parent_sha,
&job.commit_type,
&job.pr,
&job.commit_time,
&job.status,
&target,
]
})
.collect();

self.conn().execute(&sql, &params).await.unwrap();
}

/* @Queue */
async fn dequeue_commit_job(&self, machine_id: String, target: Target) -> Option<String> {
/* Check to see if this machine possibly went offline while doing
* a previous job - if it did we'll take that job */
let maybe_previous_job = self
.conn()
.query_opt(
"
WITH job_to_update AS (
SELECT sha
FROM commit_queue
WHERE machine_id = $1
AND target = $2
AND status = 'in_progress'
ORDER BY started_at
LIMIT 1
FOR UPDATE SKIP LOCKED
)
UPDATE commit_queue
SET started_at = NOW(),
status = 'in_progress'
WHERE machine_id = $1
AND target = $2
AND sha = (SELECT sha FROM job_to_update)
RETURNING sha;
",
&[&machine_id, &target],
)
.await
.unwrap();

/* If it was we will take that job */
if let Some(row) = maybe_previous_job {
return Some(row.get("sha"));
}

let maybe_drift_job = self
.conn()
.query_opt(
"
WITH job_to_update AS (
SELECT *
FROM commit_queue
WHERE target != $1
AND status IN ('finished', 'in_progress')
AND sha NOT IN (
SELECT sha
FROM commit_queue
WHERE target != $1
AND status = 'finished'
)
ORDER BY started_at
LIMIT 1
FOR UPDATE SKIP LOCKED
)
UPDATE commit_queue
SET started_at = NOW(),
status = 'in_progress',
machine_id = $2
WHERE
target = $1
AND sha = (SELECT sha FROM job_to_update)
RETURNING sha;
",
&[&target, &machine_id],
)
.await
.unwrap();

/* If we are, we will take that job */
if let Some(row) = maybe_drift_job {
return Some(row.get("sha"));
}

/* See if there are any jobs that need taking care of */
let job = self
.conn()
.query_opt("
WITH job_to_update AS (
SELECT sha
FROM commit_queue
WHERE target = $1
AND status = 'queued'
ORDER BY pr ASC, commit_type, sha
LIMIT 1
FOR UPDATE SKIP LOCKED
)
UPDATE commit_queue
SET started_at = NOW(),
status = 'in_progress',
machine_id = $2
WHERE
sha = (SELECT sha FROM job_to_update)
AND target = $1
RETURNING sha;
", &[&target, &machine_id])
.await
.unwrap();

/* If there is one, we will take that job */
if let Some(row) = job {
return Some(row.get("sha"));
}

/* There are no jobs in the queue */
return None;
}
}

#[macro_export]
macro_rules! impl_to_postgresql_via_to_string {
($t:ty) => {
impl tokio_postgres::types::ToSql for $t {
fn to_sql(
&self,
ty: &tokio_postgres::types::Type,
out: &mut bytes::BytesMut,
) -> Result<tokio_postgres::types::IsNull, Box<dyn std::error::Error + Sync + Send>>
{
self.to_string().to_sql(ty, out)
}

fn accepts(ty: &tokio_postgres::types::Type) -> bool {
<String as tokio_postgres::types::ToSql>::accepts(ty)
}

// Only compile if the type is acceptable
tokio_postgres::types::to_sql_checked!();
}
};
}

impl_to_postgresql_via_to_string!(Target);
impl_to_postgresql_via_to_string!(CommitType);
impl_to_postgresql_via_to_string!(CommitJobStatus);

impl ToSql for Date {
fn to_sql(
&self,
ty: &tokio_postgres::types::Type,
out: &mut bytes::BytesMut,
) -> Result<tokio_postgres::types::IsNull, Box<dyn std::error::Error + Sync + Send>> {
self.0.to_sql(ty, out)
}

fn accepts(ty: &tokio_postgres::types::Type) -> bool {
<DateTime<Utc> as ToSql>::accepts(ty)
}

tokio_postgres::types::to_sql_checked!();
}

impl<'a> FromSql<'a> for Date {
fn from_sql(
ty: &tokio_postgres::types::Type,
raw: &'a [u8],
) -> Result<Date, Box<dyn std::error::Error + Sync + Send>> {
let dt = DateTime::<Utc>::from_sql(ty, raw)?;
Ok(Date(dt))
}

fn accepts(ty: &tokio_postgres::types::Type) -> bool {
<DateTime<Utc> as FromSql>::accepts(ty)
}
}

fn parse_artifact_id(ty: &str, sha: &str, date: Option<DateTime<Utc>>) -> ArtifactId {
Expand Down
Loading