use sqlite to save index, major thread pool refactor

This commit is contained in:
simon987 2023-04-03 21:39:50 -04:00
parent ca973d63a4
commit fc36f33d52
62 changed files with 3630 additions and 4673 deletions

View file

@ -1,10 +1,13 @@
#!/usr/bin/env bash
rm -rf index.sist2/
(
cd ..
rm -rf index.sist2
python3 scripts/mime.py > src/parsing/mime_generated.c
python3 scripts/serve_static.py > src/web/static_generated.c
python3 scripts/index_static.py > src/index/static_generated.c
python3 scripts/magic_static.py > src/magic_generated.c
python3 scripts/mime.py > src/parsing/mime_generated.c
python3 scripts/serve_static.py > src/web/static_generated.c
python3 scripts/index_static.py > src/index/static_generated.c
python3 scripts/magic_static.py > src/magic_generated.c
printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h
printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h
)

View file

@ -29,7 +29,7 @@ application/mime, aps
application/mspowerpoint, ppz
application/msword, doc|dot|w6w|wiz|word
application/netmc, mcp
application/octet-stream, bin|dump|gpg
application/octet-stream, bin|dump|gpg|pack|idx
application/oda, oda
application/ogg, ogv
application/pdf, pdf
@ -243,7 +243,7 @@ audio/make, funk|my|pfunk
audio/midi, kar
audio/mid, rmi
audio/mp4, m4b
audio/mpeg, m2a|mpa
audio/mpeg, m2a|mpa|mpga
audio/ogg, ogg
audio/s3m, s3m
audio/tsp-audio, tsi
@ -382,7 +382,7 @@ text/x-pascal, p
text/x-perl, pl
text/x-php, php
text/x-po, po
text/x-python, py
text/x-python, py|pyi
text/x-ruby, rb
text/x-sass, sass
text/x-scss, scss

1 application/arj arj
29 application/mspowerpoint ppz
30 application/msword doc|dot|w6w|wiz|word
31 application/netmc mcp
32 application/octet-stream bin|dump|gpg bin|dump|gpg|pack|idx
33 application/oda oda
34 application/ogg ogv
35 application/pdf pdf
243 audio/midi kar
244 audio/mid rmi
245 audio/mp4 m4b
246 audio/mpeg m2a|mpa m2a|mpa|mpga
247 audio/ogg ogg
248 audio/s3m s3m
249 audio/tsp-audio tsi
382 text/x-perl pl
383 text/x-php php
384 text/x-po po
385 text/x-python py py|pyi
386 text/x-ruby rb
387 text/x-sass sass
388 text/x-scss scss

View file

@ -1,3 +1,5 @@
import zlib
mimes = {}
noparse = set()
ext_in_hash = set()
@ -135,24 +137,40 @@ def clean(t):
return t.replace("/", "_").replace(".", "_").replace("+", "_").replace("-", "_")
def crc(s):
return zlib.crc32(s.encode()) & 0xffffffff
with open("scripts/mime.csv") as f:
for l in f:
mime, ext_list = l.split(",")
if l.startswith("!"):
mime = mime[1:]
noparse.add(mime)
ext = [x.strip() for x in ext_list.split("|")]
ext = [x.strip() for x in ext_list.split("|") if x.strip() != ""]
mimes[mime] = ext
seen_crc = set()
for ext in mimes.values():
for e in ext:
if crc(e) in seen_crc:
raise Exception("CRC32 collision")
seen_crc.add(crc(e))
seen_crc = set()
for mime in mimes.keys():
if crc(mime) in seen_crc:
raise Exception("CRC32 collision")
seen_crc.add(crc(mime))
print("// **Generated by mime.py**")
print("#ifndef MIME_GENERATED_C")
print("#define MIME_GENERATED_C")
print("#include <glib.h>\n")
print("#include <stdlib.h>\n")
# Enum
print("enum mime {")
for mime, ext in sorted(mimes.items()):
print(" " + clean(mime) + "=" + mime_id(mime) + ",")
print(f"{clean(mime)}={mime_id(mime)},")
print("};")
# Enum -> string
@ -163,20 +181,20 @@ with open("scripts/mime.csv") as f:
print("default: return NULL;}}")
# Ext -> Enum
print("GHashTable *mime_get_ext_table() {"
"GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);")
print("unsigned int mime_extension_lookup(unsigned long extension_crc32) {"
"switch (extension_crc32) {")
for mime, ext in mimes.items():
for e in [e for e in ext if e]:
print("g_hash_table_insert(ext_table, \"" + e + "\", (gpointer)" + clean(mime) + ");")
if e in ext_in_hash:
raise Exception("extension already in hash: " + e)
ext_in_hash.add(e)
print("return ext_table;}")
if len(ext) > 0:
for e in ext:
print(f"case {crc(e)}:", end="")
print(f"return {clean(mime)};")
print("default: return 0;}}")
# string -> Enum
print("GHashTable *mime_get_mime_table() {"
"GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal);")
for mime, ext in mimes.items():
print("g_hash_table_insert(mime_table, \"" + mime + "\", (gpointer)" + clean(mime) + ");")
print("return mime_table;}")
print("unsigned int mime_name_lookup(unsigned long mime_crc32) {"
"switch (mime_crc32) {")
for mime in mimes.keys():
print(f"case {crc(mime)}: return {clean(mime)};")
print("default: return 0;}}")
print("#endif")