first commit

2013-08-09 16:19:44 -06:00 · 2013-08-09 16:19:44 -06:00 · 2d9a2a7c04
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,5 @@
+website-archive.mozilla.org/
+www.mozilla.org/
+.DS_Store
+.swp
+venv
--- a/README.txt
+++ b/README.txt
@ -0,0 +1 @@
+wget -e robots=off -w 1 --mirror -p --adjust-extension --no-parent --convert-links --no-host-directories -P www.mozilla.org/devpreview_releasenotes http://www.mozilla.org/projects/devpreview/releasenotes/
--- a/process_files.py
+++ b/process_files.py
@ -0,0 +1,72 @@
+import sys
+import os
+import argparse
+import fnmatch
+from datetime import datetime
+from bs4 import BeautifulSoup
+from bs4 import Comment
+
+# recursive directory pattern search
+def locate(pattern, root):
+    for path, dirs, files in os.walk(os.path.abspath(root)):
+        for filename in fnmatch.filter(files, pattern):
+            yield os.path.join(path, filename)
+
+# verify the argument is a valid dir
+def readable_dir(prospective_dir):
+  if not os.path.isdir(prospective_dir):
+    raise argparse.ArgumentTypeError(
+        "{0} is not a valid path".format(prospective_dir)
+    )
+  if os.access(prospective_dir, os.R_OK):
+    return prospective_dir
+  else:
+    raise argparse.ArgumentTypeError(
+        "{0} is not a readable dir".format(prospective_dir)
+    )
+
+# parse arguments
+parser = argparse.ArgumentParser(description='Process a directory of content.')
+parser.add_argument('directory',
+                    help='directory to process',
+                    type=readable_dir,
+                    action='store')
+args = parser.parse_args()
+
+# define strings to replace, strings to insert
+css = '''
+    body { padding-top: 25px; }
+    #archived { margin: 0; padding: 5px; position: absolute; top: 0; left: 0; height: 25px; width: 100%; z-index: 1000; text-align: center; font: bold 1.143em/1 Arial, Calibri, Helvetica, "Helvetica Neue"; color: #f5f3ed; background-color: #4d5151; }
+    #archived a { color: #fff; }
+    #archived a:hover { color: #fff; text-decoration: underline; }
+'''
+
+text = 'You are viewing information archived from Mozilla.org on %s.' % (
+            datetime.utcnow().strftime("%Y-%m-%d")
+        )
+
+# process every file
+for filename in locate("*.html", args.directory):
+
+    with open(filename, "r") as f:
+        soup = BeautifulSoup(f)
+
+    if (len(soup.select('#archived')) == 0):
+        print 'Processing %s' % (filename)
+
+        # get rid of search form
+        for s in soup.select('#quick-search'):
+            s.replace_with(Comment('search removed'))
+
+        # add styles for notification block
+        style = soup.new_tag('style', type='text/css')
+        style.append(css)
+        soup.head.append(style)
+
+        # add notification block
+        div = soup.new_tag('div', id='archived')
+        div.append(text)
+        soup.body.insert(0, div)
+
+        with open(filename, "w") as f:
+            f.write(str(soup))
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,2 @@
+beautifulsoup4==4.2.1
+wsgiref==0.1.2
				`@ -0,0 +1 @@`
				`wget -e robots=off -w 1 --mirror -p --adjust-extension --no-parent --convert-links --no-host-directories -P www.mozilla.org/devpreview_releasenotes http://www.mozilla.org/projects/devpreview/releasenotes/`