internet_mapper/driver.py

50 lines
1.6 KiB
Python
Raw Normal View History

2024-08-25 21:50:59 +00:00
from surrealdb import Surreal
import fileinput
import asyncio
async def main():
async with Surreal("ws://localhost:8000/rpc") as db:
await db.signin({"user": "root", "pass": "root"})
await db.use("test", "test")
for line in fileinput.input():
# Testing
# for line in ["https://oliveratkinson.net;->;http://google.com"]:
urls = line.split(";->;")
ffrom = urls[0].strip()
to= urls[1].strip()
2024-08-25 21:50:59 +00:00
to_id = None
from_id = None
if (ffrom == to):
print('Site has self-reference, ignoring')
continue
2024-08-25 21:50:59 +00:00
# FROM
result = await db.query(f"SELECT id FROM website WHERE url='{ffrom}'")
if (len(result[0]['result']) > 0):
from_id = result[0]['result'][0]['id']
await db.query(f"UPDATE {from_id} MERGE {{ crawled: True }}")
2024-08-25 21:50:59 +00:00
else:
from_response = await db.create("website", { "url":ffrom, "crawled":True, } )
2024-08-25 21:50:59 +00:00
from_id = from_response[0]["id"]
# TO
result = await db.query(f"SELECT id FROM website WHERE url='{to}'")
if (len(result[0]['result']) < 1):
# Object doesn't exists yet
to_response = await db.create("website", { "url":to })
to_id = to_response[0]["id"]
else:
to_id = result[0]['result'][0]['id']
await db.query(f"RELATE {from_id} -> links_to -> {to_id}");
if __name__ == "__main__":
asyncio.run(main())