mirror of
https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
synced 2025-08-06 15:22:21 +08:00
94 lines
2.5 KiB
Python
94 lines
2.5 KiB
Python
import json
|
|
import re
|
|
from pathlib import Path
|
|
|
|
from labml import logger
|
|
from labml.logger import Text
|
|
|
|
HOME = Path('./labml_nn').absolute()
|
|
print(HOME)
|
|
|
|
REGEX = re.compile(r"""
|
|
\(
|
|
https://arxiv\.org/abs/ # Start of a numeric entity reference
|
|
(?P<id>[0-9\.]+) # Paper ID
|
|
\)
|
|
""", re.VERBOSE)
|
|
|
|
IGNORE = {
|
|
'neox/model.html',
|
|
'transformers/index.html',
|
|
'transformers/configs.html',
|
|
'optimizers/noam.html',
|
|
'transformers/basic/autoregressive_experiment.html',
|
|
'transformers/xl/relative_mha.html',
|
|
'capsule_networks/mnist.html',
|
|
'transformers/rope/value_pe/index.html',
|
|
}
|
|
|
|
IGNORE_PAPERS = {
|
|
'2002.04745', # On Layer Normalization in the Transformer Architecture
|
|
'1606.08415', # Gaussian Error Linear Units (GELUs)
|
|
'1710.10196', # Progressive Growing of GANs for Improved Quality, Stability, and Variation
|
|
'1904.11486', # Making Convolutional Networks Shift-Invariant Again
|
|
'1801.04406', # Which Training Methods for GANs do actually Converge?
|
|
'1812.04948', # A Style-Based Generator Architecture for Generative Adversarial Networks
|
|
'1705.10528', # Constrained Policy Optimization
|
|
}
|
|
|
|
|
|
def collect(path: Path):
|
|
if path.is_file():
|
|
html = path.relative_to(HOME)
|
|
if html.suffix not in {'.py'}:
|
|
return []
|
|
|
|
if html.stem == '__init__':
|
|
html = html.parent / 'index.html'
|
|
else:
|
|
html = html.parent / f'{html.stem}.html'
|
|
|
|
if str(html) in IGNORE:
|
|
return []
|
|
|
|
with open(str(path), 'r') as f:
|
|
contents = f.read()
|
|
papers = set()
|
|
for m in REGEX.finditer(contents):
|
|
if m.group('id') in IGNORE_PAPERS:
|
|
continue
|
|
papers.add(m.group('id'))
|
|
|
|
if len(papers) > 1:
|
|
logger.log([(str(html), Text.key), ': ', str(papers)])
|
|
return [{'url': str(html), 'arxiv_id': p} for p in papers]
|
|
|
|
urls = []
|
|
for f in path.iterdir():
|
|
urls += collect(f)
|
|
|
|
return urls
|
|
|
|
|
|
def main():
|
|
papers = []
|
|
for f in HOME.iterdir():
|
|
papers += collect(f)
|
|
|
|
papers.sort(key=lambda p: p['arxiv_id'])
|
|
|
|
by_id = {}
|
|
for p in papers:
|
|
if p['arxiv_id'] not in by_id:
|
|
by_id[p['arxiv_id']] = []
|
|
by_id[p['arxiv_id']].append(f'''https://nn.labml.ai/{p['url']}''')
|
|
|
|
logger.log([('Papers', Text.key), ': ', f'{len(by_id) :,}'])
|
|
|
|
with open(str(HOME.parent / 'docs' / 'papers.json'), 'w') as f:
|
|
f.write(json.dumps(by_id, indent=1))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|