Merge pull request #13 from tdaron/user-agent
This commit is contained in:
Коммит
e3c388f0c5
|
@ -1,11 +1,7 @@
|
||||||
use std::{
|
use std::{collections::HashSet, path::Path, sync::Arc, time::Duration};
|
||||||
collections::HashSet,
|
|
||||||
path::{Path, PathBuf},
|
|
||||||
time::Duration,
|
|
||||||
};
|
|
||||||
|
|
||||||
use anyhow::{bail, Context, Result};
|
use anyhow::{bail, Context, Result};
|
||||||
use hyper::{Client, StatusCode};
|
use hyper::{Client, Method, Request, StatusCode};
|
||||||
use hyper_tls::HttpsConnector;
|
use hyper_tls::HttpsConnector;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use tokio::{
|
use tokio::{
|
||||||
|
@ -13,7 +9,10 @@ use tokio::{
|
||||||
time::sleep,
|
time::sleep,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::git_parsing::{parse_hash, parse_head, parse_log, parse_object, GitObject};
|
use crate::{
|
||||||
|
git_parsing::{parse_hash, parse_head, parse_log, parse_object, GitObject},
|
||||||
|
Args,
|
||||||
|
};
|
||||||
|
|
||||||
lazy_static::lazy_static! {
|
lazy_static::lazy_static! {
|
||||||
static ref REGEX_OBJECT_PATH: Regex = Regex::new(r"[\da-f]{2}/[\da-f]{38}").unwrap();
|
static ref REGEX_OBJECT_PATH: Regex = Regex::new(r"[\da-f]{2}/[\da-f]{38}").unwrap();
|
||||||
|
@ -42,7 +41,9 @@ struct DownloadedFile {
|
||||||
pub tx: UnboundedSender<DownloadedFile>,
|
pub tx: UnboundedSender<DownloadedFile>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn download_all(base_url: String, base_path: PathBuf, max_task_count: u16) {
|
pub async fn download_all(args: Arc<Args>) {
|
||||||
|
let base_url = &args.url;
|
||||||
|
let base_path = &args.path;
|
||||||
let mut cache = HashSet::<String>::new();
|
let mut cache = HashSet::<String>::new();
|
||||||
|
|
||||||
// TODO: try out unbounded channel too
|
// TODO: try out unbounded channel too
|
||||||
|
@ -75,8 +76,9 @@ pub async fn download_all(base_url: String, base_path: PathBuf, max_task_count:
|
||||||
|
|
||||||
let url = format!("{}{}", &base_url, &message.path);
|
let url = format!("{}{}", &base_url, &message.path);
|
||||||
let base_path = base_path.clone();
|
let base_path = base_path.clone();
|
||||||
|
let cloned_args = args.clone();
|
||||||
let handle = tokio::spawn(async move {
|
let handle = tokio::spawn(async move {
|
||||||
let file_bytes = match download(&url).await {
|
let file_bytes = match download(&url, cloned_args).await {
|
||||||
Ok(content) => content,
|
Ok(content) => content,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
println!("Error while downloading file {url}: {}", e);
|
println!("Error while downloading file {url}: {}", e);
|
||||||
|
@ -99,7 +101,7 @@ pub async fn download_all(base_url: String, base_path: PathBuf, max_task_count:
|
||||||
|
|
||||||
threads.push(handle);
|
threads.push(handle);
|
||||||
|
|
||||||
while threads.len() >= (max_task_count as usize) {
|
while threads.len() >= (args.tasks as usize) {
|
||||||
// sleep
|
// sleep
|
||||||
sleep(Duration::from_millis(10)).await;
|
sleep(Duration::from_millis(10)).await;
|
||||||
|
|
||||||
|
@ -109,9 +111,25 @@ pub async fn download_all(base_url: String, base_path: PathBuf, max_task_count:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn download(url: &str) -> Result<Vec<u8>> {
|
async fn download(url: &str, args: Arc<Args>) -> Result<Vec<u8>> {
|
||||||
let client = Client::builder().build::<_, hyper::Body>(HttpsConnector::new());
|
let client = Client::builder().build::<_, hyper::Body>(HttpsConnector::new());
|
||||||
let resp = client.get(url.parse().unwrap()).await;
|
let req = Request::builder()
|
||||||
|
.method(Method::GET)
|
||||||
|
.uri(url)
|
||||||
|
.header(
|
||||||
|
"User-Agent",
|
||||||
|
args.user_agent
|
||||||
|
.clone()
|
||||||
|
.unwrap_or(
|
||||||
|
"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
|
||||||
|
.into(),
|
||||||
|
)
|
||||||
|
.clone(),
|
||||||
|
)
|
||||||
|
.body(hyper::Body::empty())
|
||||||
|
.expect("Failed to build the request");
|
||||||
|
|
||||||
|
let resp = client.request(req).await;
|
||||||
match resp {
|
match resp {
|
||||||
Ok(resp) => match resp.status() {
|
Ok(resp) => match resp.status() {
|
||||||
StatusCode::OK => {
|
StatusCode::OK => {
|
||||||
|
|
10
src/main.rs
10
src/main.rs
|
@ -1,4 +1,4 @@
|
||||||
use std::path::PathBuf;
|
use std::{path::PathBuf, sync::Arc};
|
||||||
|
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
|
|
||||||
|
@ -7,11 +7,13 @@ mod git_parsing;
|
||||||
|
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
#[command(author, version, about, long_about = None)]
|
#[command(author, version, about, long_about = None)]
|
||||||
struct Cli {
|
pub struct Args {
|
||||||
/// The url of the exposed .git directory
|
/// The url of the exposed .git directory
|
||||||
#[arg()]
|
#[arg()]
|
||||||
url: String,
|
url: String,
|
||||||
|
|
||||||
|
#[arg(short, long)]
|
||||||
|
user_agent: Option<String>,
|
||||||
/// The directory to download to
|
/// The directory to download to
|
||||||
#[arg(default_value = "git-dumped")]
|
#[arg(default_value = "git-dumped")]
|
||||||
path: PathBuf,
|
path: PathBuf,
|
||||||
|
@ -23,13 +25,13 @@ struct Cli {
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let args = Cli::parse();
|
let args = Args::parse();
|
||||||
|
|
||||||
// println!("URL: {url}");
|
// println!("URL: {url}");
|
||||||
// println!("PATH: {path}");
|
// println!("PATH: {path}");
|
||||||
|
|
||||||
std::fs::create_dir_all(args.path.join(".git"))?;
|
std::fs::create_dir_all(args.path.join(".git"))?;
|
||||||
dump_git::download_all(args.url.clone(), args.path, args.tasks).await;
|
dump_git::download_all(Arc::new(args)).await;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
Загрузка…
Ссылка в новой задаче