changes
This commit is contained in:
parent
6790061e22
commit
f7a3ca8fd7
10
Crawler.toml
10
Crawler.toml
@ -6,9 +6,7 @@ surreal_ns = "test"
|
|||||||
surreal_db = "v1.21.1"
|
surreal_db = "v1.21.1"
|
||||||
|
|
||||||
# Crawler config
|
# Crawler config
|
||||||
# crawl_filter = "https://ftpgeoinfo.msl.mt.gov/Data/Spatial/MSDI/Imagery/2023_NAIP/UTM_County_Mosaics/"
|
crawl_filter = "https://ftpgeoinfo.msl.mt.gov/Data/Spatial/MSDI"
|
||||||
crawl_filter = "https://oliveratkinson.net"
|
start_url = "https://ftpgeoinfo.msl.mt.gov/Data/Spatial/MSDI"
|
||||||
# start_url = "https://ftpgeoinfo.msl.mt.gov/Data/Spatial/MSDI/Imagery/2023_NAIP/UTM_County_Mosaics/"
|
budget = 10000
|
||||||
start_url = "https://oliveratkinson.net"
|
batch_size = 50
|
||||||
budget = 1000
|
|
||||||
batch_size = 500
|
|
||||||
|
@ -7,7 +7,7 @@ scrape_configs:
|
|||||||
static_configs:
|
static_configs:
|
||||||
# change this your machine's ip, localhost won't work
|
# change this your machine's ip, localhost won't work
|
||||||
# because localhost refers to the docker container.
|
# because localhost refers to the docker container.
|
||||||
- targets: ['172.20.239.48:2500']
|
- targets: ['192.168.1.200:2500']
|
||||||
#- targets: ['192.168.8.209:2500']
|
#- targets: ['192.168.8.209:2500']
|
||||||
- job_name: loki
|
- job_name: loki
|
||||||
static_configs:
|
static_configs:
|
||||||
|
@ -275,7 +275,7 @@ async fn get_uncrawled_links(
|
|||||||
count = config.batch_size;
|
count = config.batch_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
debug!("Getting uncrawled links");
|
debug!("Getting {} uncrawled links", count);
|
||||||
|
|
||||||
let mut response = db
|
let mut response = db
|
||||||
.query("SELECT * FROM website WHERE crawled = false AND site ~ type::string($format) LIMIT $count;")
|
.query("SELECT * FROM website WHERE crawled = false AND site ~ type::string($format) LIMIT $count;")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user