2024-08-25 21:50:59 +00:00
|
|
|
from surrealdb import Surreal
|
|
|
|
import fileinput
|
|
|
|
import asyncio
|
|
|
|
|
|
|
|
|
|
|
|
async def main():
|
|
|
|
async with Surreal("ws://localhost:8000/rpc") as db:
|
|
|
|
|
|
|
|
await db.signin({"user": "root", "pass": "root"})
|
|
|
|
await db.use("test", "test")
|
|
|
|
|
|
|
|
for line in fileinput.input():
|
|
|
|
# Testing
|
|
|
|
# for line in ["https://oliveratkinson.net;->;http://google.com"]:
|
|
|
|
|
|
|
|
urls = line.split(";->;")
|
2024-08-26 06:55:36 +00:00
|
|
|
ffrom = urls[0].strip()
|
|
|
|
to= urls[1].strip()
|
2024-08-25 21:50:59 +00:00
|
|
|
|
2024-08-26 06:49:04 +00:00
|
|
|
to_id = None
|
|
|
|
from_id = None
|
|
|
|
|
2024-08-26 06:55:36 +00:00
|
|
|
if (ffrom == to):
|
|
|
|
print('Site has self-reference, ignoring')
|
|
|
|
continue
|
2024-08-25 21:50:59 +00:00
|
|
|
|
|
|
|
# FROM
|
|
|
|
result = await db.query(f"SELECT id FROM website WHERE url='{ffrom}'")
|
|
|
|
if (len(result[0]['result']) > 0):
|
|
|
|
from_id = result[0]['result'][0]['id']
|
2024-08-26 06:49:04 +00:00
|
|
|
await db.query(f"UPDATE {from_id} MERGE {{ crawled: True }}")
|
2024-08-25 21:50:59 +00:00
|
|
|
else:
|
2024-08-26 06:49:04 +00:00
|
|
|
from_response = await db.create("website", { "url":ffrom, "crawled":True, } )
|
2024-08-25 21:50:59 +00:00
|
|
|
from_id = from_response[0]["id"]
|
|
|
|
|
|
|
|
# TO
|
|
|
|
result = await db.query(f"SELECT id FROM website WHERE url='{to}'")
|
|
|
|
if (len(result[0]['result']) < 1):
|
|
|
|
# Object doesn't exists yet
|
|
|
|
to_response = await db.create("website", { "url":to })
|
|
|
|
to_id = to_response[0]["id"]
|
|
|
|
else:
|
|
|
|
to_id = result[0]['result'][0]['id']
|
|
|
|
|
|
|
|
await db.query(f"RELATE {from_id} -> links_to -> {to_id}");
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
asyncio.run(main())
|
|
|
|
|