From c4dd3abb55c0c1602ebd07ad44fbfceec77bfd79 Mon Sep 17 00:00:00 2001 From: Mike Kasianowicz Date: Thu, 25 Sep 2014 23:05:30 -0500 Subject: [PATCH] initial commit --- Argument.cs | 72 ++ CppNet.csproj | 74 ++ CppReader.cs | 153 +++ CppTask.cs | 113 ++ Feature.cs | 49 + FileLexerSource.cs | 67 ++ FixedTokenSource.cs | 57 + InputLexerSource.cs | 68 ++ JavaCompat/JavaCompat.cs | 96 ++ JavaFile.cs | 49 + JavaFileSystem.cs | 40 + JoinReader.cs | 221 ++++ LexerException.cs | 32 + LexerSource.cs | 809 +++++++++++++ Macro.cs | 208 ++++ MacroTokenSource.cs | 197 ++++ Preprocessor.cs | 2248 ++++++++++++++++++++++++++++++++++++ PreprocessorListener.cs | 86 ++ Properties/AssemblyInfo.cs | 30 + Source.cs | 298 +++++ SourceIterator.cs | 98 ++ State.cs | 89 ++ StringLexerSource.cs | 55 + Token.cs | 353 ++++++ TokenSnifferSource.cs | 54 + VirtualFile.cs | 33 + VirtualFileSystem.cs | 30 + Warning.cs | 38 + 28 files changed, 5717 insertions(+) create mode 100644 Argument.cs create mode 100644 CppNet.csproj create mode 100644 CppReader.cs create mode 100644 CppTask.cs create mode 100644 Feature.cs create mode 100644 FileLexerSource.cs create mode 100644 FixedTokenSource.cs create mode 100644 InputLexerSource.cs create mode 100644 JavaCompat/JavaCompat.cs create mode 100644 JavaFile.cs create mode 100644 JavaFileSystem.cs create mode 100644 JoinReader.cs create mode 100644 LexerException.cs create mode 100644 LexerSource.cs create mode 100644 Macro.cs create mode 100644 MacroTokenSource.cs create mode 100644 Preprocessor.cs create mode 100644 PreprocessorListener.cs create mode 100644 Properties/AssemblyInfo.cs create mode 100644 Source.cs create mode 100644 SourceIterator.cs create mode 100644 State.cs create mode 100644 StringLexerSource.cs create mode 100644 Token.cs create mode 100644 TokenSnifferSource.cs create mode 100644 VirtualFile.cs create mode 100644 VirtualFileSystem.cs create mode 100644 Warning.cs diff --git a/Argument.cs b/Argument.cs new file mode 100644 index 0000000..340ed77 --- /dev/null +++ b/Argument.cs @@ -0,0 +1,72 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; +using System.Collections.Generic; +using System.Text; + +namespace CppNet { +/** + * A macro argument. + * + * This encapsulates a raw and preprocessed token stream. + */ +internal class Argument : List { + public const int NO_ARGS = -1; + + private List _expansion; + + public Argument() { + this._expansion = null; + } + + public void addToken(Token tok) { + Add(tok); + } + + internal void expand(Preprocessor p) { + /* Cache expansion. */ + if(_expansion == null) { + this._expansion = p.expand(this); + // System.out.println("Expanded arg " + this); + } + } + + public Iterator expansion() + { + return _expansion.iterator(); + } + + override public String ToString() { + StringBuilder buf = new StringBuilder(); + buf.Append("Argument("); + // buf.Append(super.toString()); + buf.Append("raw=[ "); + for (int i = 0; i < this.Count; i++) + buf.Append(this[i].getText()); + buf.Append(" ];expansion=[ "); + if(_expansion == null) + buf.Append("null"); + else + for(int i = 0; i < _expansion.Count; i++) + buf.Append(_expansion[i].getText()); + buf.Append(" ])"); + return buf.ToString(); + } + +} + +} \ No newline at end of file diff --git a/CppNet.csproj b/CppNet.csproj new file mode 100644 index 0000000..921869a --- /dev/null +++ b/CppNet.csproj @@ -0,0 +1,74 @@ + + + + + Debug + AnyCPU + {C2FD9262-69F8-4B75-9AB1-FF359C9143E9} + Library + Properties + CppNet + CppNet + v4.5 + 512 + + + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + + + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/CppReader.cs b/CppReader.cs new file mode 100644 index 0000000..fbe3545 --- /dev/null +++ b/CppReader.cs @@ -0,0 +1,153 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package org.anarres.cpp; + +import java.io.File; +import java.io.IOException; +import java.io.Reader; + +import static org.anarres.cpp.Token.*; + +/** + * A Reader wrapper around the Preprocessor. + * + * This is a utility class to provide a transparent {@link Reader} + * which preprocesses the input text. + * + * @see Preprocessor + * @see Reader + */ +public class CppReader extends Reader { + + private Preprocessor cpp; + private String token; + private int idx; + + public CppReader(final Reader r) { + cpp = new Preprocessor(new LexerSource(r, true) { + @Override + public String getName() { + return ""; + } + }); + token = ""; + idx = 0; + } + + public CppReader(Preprocessor p) { + cpp = p; + token = ""; + idx = 0; + } + + /** + * Returns the Preprocessor used by this CppReader. + */ + public Preprocessor getPreprocessor() { + return cpp; + } + + /** + * Defines the given name as a macro. + * + * This is a convnience method. + */ + public void addMacro(String name) + throws LexerException { + cpp.addMacro(name); + } + + /** + * Defines the given name as a macro. + * + * This is a convnience method. + */ + public void addMacro(String name, String value) + throws LexerException { + cpp.addMacro(name, value); + } + + private boolean refill() + throws IOException { + try { + assert cpp != null : "cpp is null : was it closed?"; + if (token == null) + return false; + while (idx >= token.length()) { + Token tok = cpp.token(); + switch (tok.getType()) { + case EOF: + token = null; + return false; + case CCOMMENT: + case CPPCOMMENT: + if (!cpp.getFeature(Feature.KEEPCOMMENTS)) { + token = " "; + break; + } + default: + token = tok.getText(); + break; + } + idx = 0; + } + return true; + } + catch (LexerException e) { + /* Never happens. + if (e.getCause() instanceof IOException) + throw (IOException)e.getCause(); + */ + IOException ie = new IOException(String.valueOf(e)); + ie.initCause(e); + throw ie; + } + } + + public int read() + throws IOException { + if (!refill()) + return -1; + return token.charAt(idx++); + } + + /* XXX Very slow and inefficient. */ + public int read(char cbuf[], int off, int len) + throws IOException { + if (token == null) + return -1; + for (int i = 0; i < len; i++) { + int ch = read(); + if (ch == -1) + return i; + cbuf[off + i] = (char)ch; + } + return len; + } + + public void close() + throws IOException { + if (cpp != null) { + cpp.close(); + cpp = null; + } + token = null; + } + +} diff --git a/CppTask.cs b/CppTask.cs new file mode 100644 index 0000000..f86c2d1 --- /dev/null +++ b/CppTask.cs @@ -0,0 +1,113 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package org.anarres.cpp; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; + +import org.apache.tools.ant.BuildException; +import org.apache.tools.ant.Task; + +import org.anarres.cpp.LexerException; +import org.anarres.cpp.Preprocessor; +import org.anarres.cpp.Token; + +/** + * An ant task for jcpp. + */ +public class CppTask extends Task { + + private static class Macro { + private String name; + private String value; + + public void setName(String name) { + this.name = name; + } + + public String getName() { + return name; + } + + public void setValue(String value) { + this.value = value; + } + + public String getValue() { + return value; + } + } + + private File input; + private File output; + private Preprocessor cpp; + + public CppTask() { + super(); + cpp = new Preprocessor(); + } + + public void setInput(File input) { + this.input = input; + } + + public void setOutput(File output) { + this.output = output; + } + + public void addMacro(Macro macro) { + try { + cpp.addMacro(macro.getName(), macro.getValue()); + } + catch (LexerException e) { + throw new BuildException(e); + } + } + + public void execute() { + FileWriter writer = null; + try { + if (input == null) + throw new BuildException("Input not specified"); + if (output == null) + throw new BuildException("Output not specified"); + cpp.addInput(this.input); + writer = new FileWriter(this.output); + for (;;) { + Token tok = cpp.token(); + if (tok != null && tok.getType() == Token.EOF) + break; + writer.write(tok.getText()); + } + } + catch (Exception e) { + throw new BuildException(e); + } + finally { + if (writer != null) { + try { + writer.close(); + } + catch (IOException e) { + } + } + } + } + +} diff --git a/Feature.cs b/Feature.cs new file mode 100644 index 0000000..e66afad --- /dev/null +++ b/Feature.cs @@ -0,0 +1,49 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +using System; + +namespace CppNet +{ + /** + * Features of the Preprocessor, which may be enabled or disabled. + */ + [Flags] + public enum Feature + { + NONE = 0, + /** Supports ANSI digraphs. */ + DIGRAPHS = 1 << 0, + /** Supports ANSI trigraphs. */ + TRIGRAPHS = 1 << 1, + /** Outputs linemarker tokens. */ + LINEMARKERS = 1 << 2, + /** Reports tokens of type INVALID as errors. */ + CSYNTAX = 1 << 3, + /** Preserves comments in the lexed output. */ + KEEPCOMMENTS = 1 << 4, + /** Preserves comments in the lexed output, even when inactive. */ + KEEPALLCOMMENTS = 1 << 5, + VERBOSE = 1 << 6, + DEBUG = 1 << 7, + + /** Supports lexing of objective-C. */ + OBJCSYNTAX = 1 << 8, + INCLUDENEXT = 1 << 9 + } + +} \ No newline at end of file diff --git a/FileLexerSource.cs b/FileLexerSource.cs new file mode 100644 index 0000000..9ae9704 --- /dev/null +++ b/FileLexerSource.cs @@ -0,0 +1,67 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; +using System.IO; + +namespace CppNet { + +/** + * A {@link Source} which lexes a file. + * + * The input is buffered. + * + * @see Source + */ +public class FileLexerSource : LexerSource { + // private File file; + private String path; + + /** + * Creates a new Source for lexing the given File. + * + * Preprocessor directives are honoured within the file. + */ + public FileLexerSource(FileInfo file, String path) : base(new StreamReader(file.OpenRead()), true) + { + + // this.file = file; + this.path = path; + } + + public FileLexerSource(FileInfo file) : + this(file, file.FullName) { + } + + public FileLexerSource(String path) : + this(new FileInfo(path)) { + } + + override internal String getPath() { + return path; + } + + override internal String getName() + { + return getPath(); + } + + override public string ToString() { + return "file " + path; + } +} + +} \ No newline at end of file diff --git a/FixedTokenSource.cs b/FixedTokenSource.cs new file mode 100644 index 0000000..3a2c076 --- /dev/null +++ b/FixedTokenSource.cs @@ -0,0 +1,57 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +using System; +using System.Text; +using System.Collections.Generic; + +namespace CppNet { + +internal class FixedTokenSource : Source { + private static readonly Token EOF = + new Token(Token.EOF, ""); + + private List tokens; + private int idx; + + internal FixedTokenSource(params Token[] tokens) { + this.tokens = new List(tokens); + this.idx = 0; + } + + internal FixedTokenSource(List tokens) { + this.tokens = tokens; + this.idx = 0; + } + + public override Token token() { + if (idx >= tokens.Count) + return EOF; + return tokens[idx++]; + } + + override public String ToString() { + StringBuilder buf = new StringBuilder(); + buf.Append("constant token stream " + tokens); + Source parent = getParent(); + if (parent != null) + buf.Append(" in ").Append(parent); + return buf.ToString(); + } +} + +} \ No newline at end of file diff --git a/InputLexerSource.cs b/InputLexerSource.cs new file mode 100644 index 0000000..0931dc4 --- /dev/null +++ b/InputLexerSource.cs @@ -0,0 +1,68 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package org.anarres.cpp; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.IOException; + +import java.util.List; +import java.util.Iterator; + +import static org.anarres.cpp.Token.*; + +/** + * A {@link Source} which lexes a file. + * + * The input is buffered. + * + * @see Source + */ +public class InputLexerSource extends LexerSource { + /** + * Creates a new Source for lexing the given Reader. + * + * Preprocessor directives are honoured within the file. + */ + public InputLexerSource(InputStream input) + throws IOException { + super( + new BufferedReader( + new InputStreamReader( + input + ) + ), + true + ); + } + + @Override + /* pp */ String getPath() { + return ""; + } + + @Override + /* pp */ String getName() { + return "standard input"; + } + + public String toString() { + return getPath(); + } +} diff --git a/JavaCompat/JavaCompat.cs b/JavaCompat/JavaCompat.cs new file mode 100644 index 0000000..678b7e4 --- /dev/null +++ b/JavaCompat/JavaCompat.cs @@ -0,0 +1,96 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +using boolean = System.Boolean; +//using Set = System.Collections.Generic.HashSet; +//using ArrayList = System.Collections.Generic.List; +//using Map = System.Collections.Generic.Dictionary; + +namespace CppNet +{ + static class JavaCompat + { + public static StringBuilder append(this StringBuilder bld, object value) + { + return bld.Append(value); + } + + public static int length(this string str) + { + return str.Length; + } + + public static char charAt(this string str, int i) + { + return str[i]; + } + + public static T get(this List list, int i) + { + return list[i]; + } + + public static Iterator iterator(this List list) + { + return new ListIterator(list); + } + + public static string toString(this object o) + { + return o.ToString(); + } + } + + class ListIterator : Iterator + { + List _list; + int _index; + + public ListIterator(List list) + { + _list = list; + } + + public boolean hasNext() + { + return _index < _list.Count; + } + + public T next() + { + return _list[_index++]; + } + + public void remove() + { + throw new NotImplementedException(); + } + } + + public interface Closeable + { + void close(); + } + + public interface Iterable + { + Iterator iterator(); + } + + public interface Iterator + { + boolean hasNext(); + T next(); + void remove(); + } + + public class IllegalStateException : Exception + { + public IllegalStateException(Exception ex) : base("Illegal State", ex) { } + } + + +} diff --git a/JavaFile.cs b/JavaFile.cs new file mode 100644 index 0000000..72c56e3 --- /dev/null +++ b/JavaFile.cs @@ -0,0 +1,49 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using System.IO; + +namespace CppNet +{ + public class JavaFile : VirtualFile + { + string _path; + + public JavaFile(string path) + { + _path = Path.GetFullPath(path); + } + + public bool isFile() + { + return File.Exists(_path) && !File.GetAttributes(_path).HasFlag(FileAttributes.Directory); + } + + public string getPath() + { + return _path; + } + + public string getName() + { + return Path.GetFileName(_path); + } + + public VirtualFile getParentFile() + { + return new JavaFile(Path.GetDirectoryName(_path)); + } + + public VirtualFile getChildFile(string name) + { + return new JavaFile(Path.Combine(_path, name)); + } + + public Source getSource() + { + return new FileLexerSource(_path); + } + } +} diff --git a/JavaFileSystem.cs b/JavaFileSystem.cs new file mode 100644 index 0000000..006683a --- /dev/null +++ b/JavaFileSystem.cs @@ -0,0 +1,40 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; +using System.IO; + +namespace CppNet +{ + + /** + * A virtual filesystem implementation using java.io. + */ + public class JavaFileSystem : VirtualFileSystem + { + + + public VirtualFile getFile(string path) + { + return new JavaFile(path); + } + + public VirtualFile getFile(string dir, string name) + { + return new JavaFile(Path.Combine(dir, name)); + } + } +} \ No newline at end of file diff --git a/JoinReader.cs b/JoinReader.cs new file mode 100644 index 0000000..5efb987 --- /dev/null +++ b/JoinReader.cs @@ -0,0 +1,221 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; + +using System.IO; +namespace CppNet +{ + + internal class JoinReader /* extends Reader */ { + private TextReader _in; + + private PreprocessorListener listener; + private LexerSource source; + private bool trigraphs; + private bool warnings; + + private int newlines; + private bool flushnl; + private int[] unget; + private int uptr; + + public JoinReader(TextReader ain, bool trigraphs) + { + this._in = ain; + this.trigraphs = trigraphs; + this.newlines = 0; + this.flushnl = false; + this.unget = new int[2]; + this.uptr = 0; + } + + public JoinReader(TextReader ain) : + this(ain, false) + { + } + + public void setTrigraphs(bool enable, bool warnings) + { + this.trigraphs = enable; + this.warnings = warnings; + } + + internal void init(Preprocessor pp, LexerSource s) + { + this.listener = pp.getListener(); + this.source = s; + setTrigraphs(pp.getFeature(Feature.TRIGRAPHS), + pp.getWarning(Warning.TRIGRAPHS)); + } + + private int __read() + { + if(uptr > 0) + return unget[--uptr]; + return _in.Read(); + } + + private void _unread(int c) + { + if(c != -1) + unget[uptr++] = c; + System.Diagnostics.Debug.Assert(uptr <= unget.Length, + "JoinReader ungets too many characters"); + } + + protected void warning(String msg) + { + if(source != null) + source.warning(msg); + else + throw new LexerException(msg); + } + + private char trigraph(char raw, char repl) + { + if(trigraphs) { + if(warnings) + warning("trigraph ??" + raw + " converted to " + repl); + return repl; + } else { + if(warnings) + warning("trigraph ??" + raw + " ignored"); + _unread(raw); + _unread('?'); + return '?'; + } + } + + private int _read() + { + int c = __read(); + if(c == '?' && (trigraphs || warnings)) { + int d = __read(); + if(d == '?') { + int e = __read(); + switch(e) { + case '(': return trigraph('(', '['); + case ')': return trigraph(')', ']'); + case '<': return trigraph('<', '{'); + case '>': return trigraph('>', '}'); + case '=': return trigraph('=', '#'); + case '/': return trigraph('/', '\\'); + case '\'': return trigraph('\'', '^'); + case '!': return trigraph('!', '|'); + case '-': return trigraph('-', '~'); + } + _unread(e); + } + _unread(d); + } + return c; + } + + public int read() + { + if(flushnl) { + if(newlines > 0) { + newlines--; + return '\n'; + } + flushnl = false; + } + + for(; ; ) { + int c = _read(); + switch(c) { + case '\\': + int d = _read(); + switch(d) { + case '\n': + newlines++; + continue; + case '\r': + newlines++; + int e = _read(); + if(e != '\n') + _unread(e); + continue; + default: + _unread(d); + return c; + } + case '\r': + case '\n': + case '\u2028': + case '\u2029': + case '\u000B': + case '\u000C': + case '\u0085': + flushnl = true; + return c; + case -1: + if(newlines > 0) { + newlines--; + return '\n'; + } + goto default; + default: + return c; + } + } + } + + public int read(char[] cbuf, int off, int len) + { + for(int i = 0; i < len; i++) { + int ch = read(); + if(ch == -1) + return i; + cbuf[off + i] = (char)ch; + } + return len; + } + + public void close() + { + if(_in == null) { + return; + } + _in.Close(); + } + + + + override public String ToString() + { + return "JoinReader(nl=" + newlines + ")"; + } + + /* + public static void main(String[] args) throws IOException { + FileReader f = new FileReader(new File(args[0])); + BufferedReader b = new BufferedReader(f); + JoinReader r = new JoinReader(b); + BufferedWriter w = new BufferedWriter( + new java.io.OutputStreamWriter(System.out) + ); + int c; + while ((c = r.read()) != -1) { + w.write((char)c); + } + w.close(); + } + */ + + } +} \ No newline at end of file diff --git a/LexerException.cs b/LexerException.cs new file mode 100644 index 0000000..c997bcc --- /dev/null +++ b/LexerException.cs @@ -0,0 +1,32 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +using System; + +namespace CppNet { +/** + * A preprocessor exception. + * + * Note to users: I don't really like the name of this class. S. + */ +public class LexerException : Exception { + public LexerException(String msg) : base(msg) { } + + public LexerException(Exception cause) : base(cause.Message, cause) {} +} + +} \ No newline at end of file diff --git a/LexerSource.cs b/LexerSource.cs new file mode 100644 index 0000000..b55e26e --- /dev/null +++ b/LexerSource.cs @@ -0,0 +1,809 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; +using System.IO; +using System.Text; + +namespace CppNet { + +/** Does not handle digraphs. */ +public class LexerSource : Source { + static bool isJavaIdentifierStart(int c) { + return char.IsLetter((char)c) || c == '$' || c == '_'; + } + static bool isJavaIdentifierPart(int c) + { + return char.IsLetter((char)c) || c == '$' || c == '_' || char.IsDigit((char)c); + } + static bool isIdentifierIgnorable(int c) { + return c >= 0 && c <= 8 || + c >= 0xE && c <= 0x1B || + c >= 0x7F && c <= 0x9F || + char.GetUnicodeCategory((char)c) == System.Globalization.UnicodeCategory.Format; + } + + static int digit(char ch, int radix) + { + try { + return Convert.ToInt32(ch.ToString(), radix); + } catch { + return -1; + } + } + private static readonly bool DEBUG = false; + + private JoinReader reader; + private bool ppvalid; + private bool bol; + private bool include; + + private bool digraphs; + + /* Unread. */ + private int u0, u1; + private int ucount; + + private int line; + private int column; + private int lastcolumn; + private bool cr; + + /* ppvalid is: + * false in StringLexerSource, + * true in FileLexerSource */ + public LexerSource(TextReader r, bool ppvalid) { + this.reader = new JoinReader(r); + this.ppvalid = ppvalid; + this.bol = true; + this.include = false; + + this.digraphs = true; + + this.ucount = 0; + + this.line = 1; + this.column = 0; + this.lastcolumn = -1; + this.cr = false; + } + + override internal void init(Preprocessor pp) { + base.init(pp); + this.digraphs = pp.getFeature(Feature.DIGRAPHS); + this.reader.init(pp, this); + } + + + override public int getLine() { + return line; + } + + + override public int getColumn() { + return column; + } + + + override internal bool isNumbered() { + return true; + } + +/* Error handling. */ + + private void _error(String msg, bool error) { + int _l = line; + int _c = column; + if (_c == 0) { + _c = lastcolumn; + _l--; + } + else { + _c--; + } + if (error) + base.error(_l, _c, msg); + else + base.warning(_l, _c, msg); + } + + /* Allow JoinReader to call this. */ + internal void error(String msg) + { + _error(msg, true); + } + + /* Allow JoinReader to call this. */ + internal void warning(String msg) { + _error(msg, false); + } + +/* A flag for string handling. */ + + internal void setInclude(bool b) + { + this.include = b; + } + +/* + private bool _isLineSeparator(int c) { + return Character.getType(c) == Character.LINE_SEPARATOR + || c == -1; + } +*/ + + /* XXX Move to JoinReader and canonicalise newlines. */ + private static bool isLineSeparator(int c) { + switch ((char)c) { + case '\r': + case '\n': + case '\u2028': + case '\u2029': + case '\u000B': + case '\u000C': + case '\u0085': + return true; + default: + return (c == -1); + } + } + + + private int read() { + System.Diagnostics.Debug.Assert(ucount <= 2, "Illegal ucount: " + ucount); + switch (ucount) { + case 2: + ucount = 1; + return u1; + case 1: + ucount = 0; + return u0; + } + + if (reader == null) + return -1; + + int c = reader.read(); + switch (c) { + case '\r': + cr = true; + line++; + lastcolumn = column; + column = 0; + break; + case '\n': + if (cr) { + cr = false; + break; + } + goto case '\u2028'; + /* fallthrough */ + case '\u2028': + case '\u2029': + case '\u000B': + case '\u000C': + case '\u0085': + cr = false; + line++; + lastcolumn = column; + column = 0; + break; + default: + cr = false; + column++; + break; + } + +/* + if (isLineSeparator(c)) { + line++; + lastcolumn = column; + column = 0; + } + else { + column++; + } +*/ + + return c; + } + + /* You can unget AT MOST one newline. */ + private void unread(int c) { + /* XXX Must unread newlines. */ + if (c != -1) { + if (isLineSeparator(c)) { + line--; + column = lastcolumn; + cr = false; + } + else { + column--; + } + switch (ucount) { + case 0: + u0 = c; + ucount = 1; + break; + case 1: + u1 = c; + ucount = 2; + break; + default: + throw new InvalidOperationException( + "Cannot unget another character!" + ); + } + // reader.unread(c); + } + } + + /* Consumes the rest of the current line into an invalid. */ + private Token invalid(StringBuilder text, String reason) { + int d = read(); + while (!isLineSeparator(d)) { + text.Append((char)d); + d = read(); + } + unread(d); + return new Token(Token.INVALID, text.ToString(), reason); + } + + private Token ccomment() { + StringBuilder text = new StringBuilder("/*"); + int d; + do { + do { + d = read(); + text.Append((char)d); + } while (d != '*'); + do { + d = read(); + text.Append((char)d); + } while (d == '*'); + } while (d != '/'); + return new Token(Token.CCOMMENT, text.ToString()); + } + + private Token cppcomment() { + StringBuilder text = new StringBuilder("//"); + int d = read(); + while (!isLineSeparator(d)) { + text.Append((char)d); + d = read(); + } + unread(d); + return new Token(Token.CPPCOMMENT, text.ToString()); + } + + private int escape(StringBuilder text) { + int d = read(); + switch (d) { + case 'a': text.Append('a'); return 0x07; + case 'b': text.Append('b'); return '\b'; + case 'f': text.Append('f'); return '\f'; + case 'n': text.Append('n'); return '\n'; + case 'r': text.Append('r'); return '\r'; + case 't': text.Append('t'); return '\t'; + case 'v': text.Append('v'); return 0x0b; + case '\\': text.Append('\\'); return '\\'; + + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + int len = 0; + int val = 0; + do { + val = (val << 3) + digit((char)d, 8); + text.Append((char)d); + d = read(); + } while(++len < 3 && digit((char)d, 8) != -1); + unread(d); + return val; + + case 'x': + len = 0; + val = 0; + do { + val = (val << 4) + digit((char)d, 16); + text.Append((char)d); + d = read(); + } while(++len < 2 && digit((char)d, 16) != -1); + unread(d); + return val; + + /* Exclude two cases from the warning. */ + case '"': text.Append('"'); return '"'; + case '\'': text.Append('\''); return '\''; + + default: + warning("Unnecessary escape character " + (char)d); + text.Append((char)d); + return d; + } + } + + private Token character() { + StringBuilder text = new StringBuilder("'"); + int d = read(); + if (d == '\\') { + text.Append('\\'); + d = escape(text); + } + else if (isLineSeparator(d)) { + unread(d); + return new Token(Token.INVALID, text.ToString(), + "Unterminated character literal"); + } + else if (d == '\'') { + text.Append('\''); + return new Token(Token.INVALID, text.ToString(), + "Empty character literal"); + } + else if (char.IsControl((char)d)) { + text.Append('?'); + return invalid(text, "Illegal unicode character literal"); + } + else { + text.Append((char)d); + } + + int e = read(); + if (e != '\'') { + // error("Illegal character constant"); + /* We consume up to the next ' or the rest of the line. */ + for (;;) { + if (isLineSeparator(e)) { + unread(e); + break; + } + text.Append((char)e); + if (e == '\'') + break; + e = read(); + } + return new Token(Token.INVALID, text.ToString(), + "Illegal character constant " + text); + } + text.Append('\''); + /* XXX It this a bad cast? */ + return new Token(Token.CHARACTER, + text.ToString(), (char)d); + } + + private Token String(char open, char close) { + StringBuilder text = new StringBuilder(); + text.Append(open); + + StringBuilder buf = new StringBuilder(); + + for (;;) { + int c = read(); + if (c == close) { + break; + } + else if (c == '\\') { + text.Append('\\'); + if (!include) { + char d = (char)escape(text); + buf.Append(d); + } + } + else if (c == -1) { + unread(c); + // error("End of file in string literal after " + buf); + return new Token(Token.INVALID, text.ToString(), + "End of file in string literal after " + buf); + } + else if (isLineSeparator(c)) { + unread(c); + // error("Unterminated string literal after " + buf); + return new Token(Token.INVALID, text.ToString(), + "Unterminated string literal after " + buf); + } + else { + text.Append((char)c); + buf.Append((char)c); + } + } + text.Append(close); + return new Token(close == '>' ? Token.HEADER : Token.STRING, + text.ToString(), buf.ToString()); + } + + private Token _number(StringBuilder text, long val, int d) { + int bits = 0; + for (;;) { + /* XXX Error check duplicate bits. */ + if (d == 'U' || d == 'u') { + bits |= 1; + text.Append((char)d); + d = read(); + } + else if (d == 'L' || d == 'l') { + if ((bits & 4) != 0) + /* XXX warn */ ; + bits |= 2; + text.Append((char)d); + d = read(); + } + else if (d == 'I' || d == 'i') { + if ((bits & 2) != 0) + /* XXX warn */ ; + bits |= 4; + text.Append((char)d); + d = read(); + } + else if (char.IsLetter((char)d)) { + unread(d); + return new Token(Token.INVALID, text.ToString(), + "Invalid suffix \"" + (char)d + + "\" on numeric constant"); + } + else { + unread(d); + return new Token(Token.INTEGER, + text.ToString(), (long)val); + } + } + } + + /* We already chewed a zero, so empty is fine. */ + private Token number_octal() { + StringBuilder text = new StringBuilder("0"); + int d = read(); + long val = 0; + while (digit((char)d, 8) != -1) { + val = (val << 3) + digit((char)d, 8); + text.Append((char)d); + d = read(); + } + return _number(text, val, d); + } + + /* We do not know whether know the first digit is valid. */ + private Token number_hex(char x) { + StringBuilder text = new StringBuilder("0"); + text.Append(x); + int d = read(); + if (digit((char)d, 16) == -1) { + unread(d); + // error("Illegal hexadecimal constant " + (char)d); + return new Token(Token.INVALID, text.ToString(), + "Illegal hexadecimal digit " + (char)d + + " after "+ text); + } + long val = 0; + do { + val = (val << 4) + digit((char)d, 16); + text.Append((char)d); + d = read(); + } while (digit((char)d, 16) != -1); + return _number(text, val, d); + } + + /* We know we have at least one valid digit, but empty is not + * fine. */ + /* XXX This needs a complete rewrite. */ + private Token number_decimal(int c) { + StringBuilder text = new StringBuilder((char)c); + int d = c; + long val = 0; + do { + val = val * 10 + digit((char)d, 10); + text.Append((char)d); + d = read(); + } while (digit((char)d, 10) != -1); + return _number(text, val, d); + } + + private Token identifier(int c) { + StringBuilder text = new StringBuilder(); + int d; + text.Append((char)c); + for (;;) { + d = read(); + if (isIdentifierIgnorable(d)) + ; + else if (isJavaIdentifierPart(d)) + text.Append((char)d); + else + break; + } + unread(d); + return new Token(Token.IDENTIFIER, text.ToString()); + } + + private Token whitespace(int c) { + StringBuilder text = new StringBuilder(); + int d; + text.Append((char)c); + for (;;) { + d = read(); + if (ppvalid && isLineSeparator(d)) /* XXX Ugly. */ + break; + if (char.IsWhiteSpace((char)d)) + text.Append((char)d); + else + break; + } + unread(d); + return new Token(Token.WHITESPACE, text.ToString()); + } + + /* No token processed by cond() contains a newline. */ + private Token cond(char c, int yes, int no) { + int d = read(); + if (c == d) + return new Token(yes); + unread(d); + return new Token(no); + } + + public override Token token() { + Token tok = null; + + int _l = line; + int _c = column; + + int c = read(); + int d; + + switch (c) { + case '\n': + if (ppvalid) { + bol = true; + if (include) { + tok = new Token(Token.NL, _l, _c, "\n"); + } + else { + int nls = 0; + do { + nls++; + d = read(); + } while (d == '\n'); + unread(d); + char[] text = new char[nls]; + for (int i = 0; i < text.Length; i++) + text[i] = '\n'; + // Skip the bol = false below. + tok = new Token(Token.NL, _l, _c, new String(text)); + } + if (DEBUG) + System.Console.Error.WriteLine("lx: Returning NL: " + tok); + return tok; + } + /* Let it be handled as whitespace. */ + break; + + case '!': + tok = cond('=', Token.NE, '!'); + break; + + case '#': + if (bol) + tok = new Token(Token.HASH); + else + tok = cond('#', Token.PASTE, '#'); + break; + + case '+': + d = read(); + if (d == '+') + tok = new Token(Token.INC); + else if (d == '=') + tok = new Token(Token.PLUS_EQ); + else + unread(d); + break; + case '-': + d = read(); + if (d == '-') + tok = new Token(Token.DEC); + else if (d == '=') + tok = new Token(Token.SUB_EQ); + else if (d == '>') + tok = new Token(Token.ARROW); + else + unread(d); + break; + + case '*': + tok = cond('=', Token.MULT_EQ, '*'); + break; + case '/': + d = read(); + if (d == '*') + tok = ccomment(); + else if (d == '/') + tok = cppcomment(); + else if (d == '=') + tok = new Token(Token.DIV_EQ); + else + unread(d); + break; + + case '%': + d = read(); + if (d == '=') + tok = new Token(Token.MOD_EQ); + else if (digraphs && d == '>') + tok = new Token('}'); // digraph + else if (digraphs && d == ':') { + bool paste = true; + d = read(); + if (d != '%') { + unread(d); + tok = new Token('#'); // digraph + paste = false; + } + d = read(); + if (d != ':') { + unread(d); // Unread 2 chars here. + unread('%'); + tok = new Token('#'); // digraph + paste = false; + } + if(paste) { + tok = new Token(Token.PASTE); // digraph + } + } + else + unread(d); + break; + + case ':': + /* :: */ + d = read(); + if (digraphs && d == '>') + tok = new Token(']'); // digraph + else + unread(d); + break; + + case '<': + if (include) { + tok = String('<', '>'); + } + else { + d = read(); + if (d == '=') + tok = new Token(Token.LE); + else if (d == '<') + tok = cond('=', Token.LSH_EQ, Token.LSH); + else if (digraphs && d == ':') + tok = new Token('['); // digraph + else if (digraphs && d == '%') + tok = new Token('{'); // digraph + else + unread(d); + } + break; + + case '=': + tok = cond('=', Token.EQ, '='); + break; + + case '>': + d = read(); + if (d == '=') + tok = new Token(Token.GE); + else if (d == '>') + tok = cond('=', Token.RSH_EQ, Token.RSH); + else + unread(d); + break; + + case '^': + tok = cond('=', Token.XOR_EQ, '^'); + break; + + case '|': + d = read(); + if (d == '=') + tok = new Token(Token.OR_EQ); + else if (d == '|') + tok = cond('=', Token.LOR_EQ, Token.LOR); + else + unread(d); + break; + case '&': + d = read(); + if (d == '&') + tok = cond('=', Token.LAND_EQ, Token.LAND); + else if (d == '=') + tok = new Token(Token.AND_EQ); + else + unread(d); + break; + + case '.': + d = read(); + if (d == '.') + tok = cond('.', Token.ELLIPSIS, Token.RANGE); + else + unread(d); + /* XXX decimal fraction */ + break; + + case '0': + /* octal or hex */ + d = read(); + if (d == 'x' || d == 'X') + tok = number_hex((char)d); + else { + unread(d); + tok = number_octal(); + } + break; + + case '\'': + tok = character(); + break; + + case '"': + tok = String('"', '"'); + break; + + case -1: + close(); + tok = new Token(Token.EOF, _l, _c, ""); + break; + } + + if (tok == null) { + if (char.IsWhiteSpace((char)c)) { + tok = whitespace(c); + } + else if (char.IsDigit((char)c)) { + tok = number_decimal(c); + } + else if (isJavaIdentifierStart(c)) { + tok = identifier(c); + } + else { + tok = new Token(c); + } + } + + if (bol) { + switch (tok.getType()) { + case Token.WHITESPACE: + case Token.CCOMMENT: + break; + default: + bol = false; + break; + } + } + + tok.setLocation(_l, _c); + if (DEBUG) + System.Console.WriteLine("lx: Returning " + tok); + // (new Exception("here")).printStackTrace(System.out); + return tok; + } + + public override void close() + { + if(reader != null) { + reader.close(); + reader = null; + } + base.close(); + } +} + +} \ No newline at end of file diff --git a/Macro.cs b/Macro.cs new file mode 100644 index 0000000..f4b0765 --- /dev/null +++ b/Macro.cs @@ -0,0 +1,208 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; +using System.Collections.Generic; +using System.Text; + +namespace CppNet +{ + /** + * A macro object. + * + * This encapsulates a name, an argument count, and a token stream + * for replacement. The replacement token stream may contain the + * extra tokens {@link Token#M_ARG} and {@link Token#M_STRING}. + */ + public class Macro + { + private Source source; + private String name; + /* It's an explicit decision to keep these around here. We don't + * need to; the argument token type is M_ARG and the value + * is the index. The strings themselves are only used in + * stringification of the macro, for debugging. */ + private List args; + private bool variadic; + private List tokens; + + public Macro(Source source, String name) + { + this.source = source; + this.name = name; + this.args = null; + this.variadic = false; + this.tokens = new List(); + } + + public Macro(String name) + : this(null, name) + { + } + + /** + * Sets the Source from which this macro was parsed. + */ + public void setSource(Source s) + { + this.source = s; + } + + /** + * Returns the Source from which this macro was parsed. + * + * This method may return null if the macro was not parsed + * from a regular file. + */ + public Source getSource() + { + return source; + } + + /** + * Returns the name of this macro. + */ + public String getName() + { + return name; + } + + /** + * Sets the arguments to this macro. + */ + public void setArgs(List args) + { + this.args = args; + } + + /** + * Returns true if this is a function-like macro. + */ + public bool isFunctionLike() + { + return args != null; + } + + /** + * Returns the number of arguments to this macro. + */ + public int getArgs() + { + return args.Count; + } + + /** + * Sets the variadic flag on this Macro. + */ + public void setVariadic(bool b) + { + this.variadic = b; + } + + /** + * Returns true if this is a variadic function-like macro. + */ + public bool isVariadic() + { + return variadic; + } + + /** + * Adds a token to the expansion of this macro. + */ + public void addToken(Token tok) + { + this.tokens.Add(tok); + } + + /** + * Adds a "paste" operator to the expansion of this macro. + * + * A paste operator causes the next token added to be pasted + * to the previous token when the macro is expanded. + * It is an error for a macro to end with a paste token. + */ + public void addPaste(Token tok) + { + /* + * Given: tok0 ## tok1 + * We generate: M_PASTE, tok0, tok1 + * This extends as per a stack language: + * tok0 ## tok1 ## tok2 -> + * M_PASTE, tok0, M_PASTE, tok1, tok2 + */ + this.tokens.Insert(tokens.Count - 1, tok); + } + + internal List getTokens() + { + return tokens; + } + + /* Paste tokens are inserted before the first of the two pasted + * tokens, so it's a kind of bytecode notation. This method + * swaps them around again. We know that there will never be two + * sequential paste tokens, so a bool is sufficient. */ + public String getText() { + StringBuilder buf = new StringBuilder(); + bool paste = false; + for (int i = 0; i < tokens.Count; i++) { + Token tok = tokens[i]; + if (tok.getType() == Token.M_PASTE) { + System.Diagnostics.Debug.Assert(paste == false, "Two sequential pastes."); + paste = true; + continue; + } + else { + buf.Append(tok.getText()); + } + if (paste) { + buf.Append(" #" + "# "); + paste = false; + } + // buf.Append(tokens.get(i)); + } + return buf.ToString(); + } + + override public String ToString() + { + StringBuilder buf = new StringBuilder(name); + if(args != null) { + buf.Append('('); + bool first = true; + foreach(String str in args) { + if(!first) { + buf.Append(", "); + } else { + first = false; + } + buf.Append(str); + } + if(isVariadic()) { + buf.Append("..."); + } + + buf.Append(')'); + } + if(tokens.Count != 0) { + buf.Append(" => ").Append(getText()); + } + return buf.ToString(); + } + + } +} diff --git a/MacroTokenSource.cs b/MacroTokenSource.cs new file mode 100644 index 0000000..dc77eff --- /dev/null +++ b/MacroTokenSource.cs @@ -0,0 +1,197 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; +using System.Text; +using System.Collections.Generic; +using boolean = System.Boolean; +using Debug = System.Diagnostics.Debug; + +namespace CppNet { + +/* This source should always be active, since we don't expand macros + * in any inactive context. */ +internal class MacroTokenSource : Source { + private Macro macro; + private Iterator tokens; /* Pointer into the macro. */ + private List args; /* { unexpanded, expanded } */ + private Iterator arg; /* "current expansion" */ + + internal MacroTokenSource(Macro m, List args) { + this.macro = m; + this.tokens = m.getTokens().iterator(); + this.args = args; + this.arg = null; + } + + override internal boolean isExpanding(Macro m) { + /* When we are expanding an arg, 'this' macro is not + * being expanded, and thus we may re-expand it. */ + if (/* XXX this.arg == null && */ this.macro == m) + return true; + return base.isExpanding(m); + } + + /* XXX Called from Preprocessor [ugly]. */ + internal static void escape(StringBuilder buf, string cs) { + for (int i = 0; i < cs.length(); i++) { + char c = cs.charAt(i); + switch (c) { + case '\\': + buf.append("\\\\"); + break; + case '"': + buf.append("\\\""); + break; + case '\n': + buf.append("\\n"); + break; + case '\r': + buf.append("\\r"); + break; + default: + buf.append(c); + break; + } + } + } + + private void concat(StringBuilder buf, Argument arg) { + Iterator it = arg.iterator(); + while (it.hasNext()) { + Token tok = it.next(); + buf.append(tok.getText()); + } + } + + private Token stringify(Token pos, Argument arg) { + StringBuilder buf = new StringBuilder(); + concat(buf, arg); + // System.out.println("Concat: " + arg + " -> " + buf); + StringBuilder str = new StringBuilder("\""); + escape(str, buf.ToString()); + str.append("\""); + // System.out.println("Escape: " + buf + " -> " + str); + return new Token(Token.STRING, + pos.getLine(), pos.getColumn(), + str.toString(), buf.toString()); + } + + + /* At this point, we have consumed the first M_PASTE. + * @see Macro#addPaste(Token) */ + private void paste(Token ptok) { + StringBuilder buf = new StringBuilder(); + Token err = null; + /* We know here that arg is null or expired, + * since we cannot paste an expanded arg. */ + + int count = 2; + for (int i = 0; i < count; i++) { + if (!tokens.hasNext()) { + /* XXX This one really should throw. */ + error(ptok.getLine(), ptok.getColumn(), + "Paste at end of expansion"); + buf.append(' ').append(ptok.getText()); + break; + } + Token tok = tokens.next(); + // System.out.println("Paste " + tok); + switch (tok.getType()) { + case Token.M_PASTE: + /* One extra to paste, plus one because the + * paste token didn't count. */ + count += 2; + ptok = tok; + break; + case Token.M_ARG: + int idx = (int)tok.getValue(); + concat(buf, args.get(idx)); + break; + /* XXX Test this. */ + case Token.CCOMMENT: + case Token.CPPCOMMENT: + break; + default: + buf.append(tok.getText()); + break; + } + } + + /* Push and re-lex. */ + /* + StringBuilder src = new StringBuilder(); + escape(src, buf); + StringLexerSource sl = new StringLexerSource(src.toString()); + */ + StringLexerSource sl = new StringLexerSource(buf.toString()); + + /* XXX Check that concatenation produces a valid token. */ + + arg = new SourceIterator(sl); + } + + override public Token token() { + for (;;) { + /* Deal with lexed tokens first. */ + + if (arg != null) { + if (arg.hasNext()) { + Token tok2 = arg.next(); + /* XXX PASTE -> INVALID. */ + Debug.Assert(tok2.getType() != Token.M_PASTE, + "Unexpected paste token"); + return tok2; + } + arg = null; + } + + if (!tokens.hasNext()) + return new Token(Token.EOF, -1, -1, ""); /* End of macro. */ + Token tok = tokens.next(); + int idx; + switch (tok.getType()) { + case Token.M_STRING: + /* Use the nonexpanded arg. */ + idx = (int)tok.getValue(); + return stringify(tok, args.get(idx)); + case Token.M_ARG: + /* Expand the arg. */ + idx = (int)tok.getValue(); + // System.out.println("Pushing arg " + args.get(idx)); + arg = args.get(idx).expansion(); + break; + case Token.M_PASTE: + paste(tok); + break; + default: + return tok; + } + } /* for */ + } + + + override public String ToString() { + StringBuilder buf = new StringBuilder(); + buf.Append("expansion of ").Append(macro.getName()); + Source parent = getParent(); + if (parent != null) + buf.Append(" in ").Append(parent); + return buf.ToString(); + } +} + +} \ No newline at end of file diff --git a/Preprocessor.cs b/Preprocessor.cs new file mode 100644 index 0000000..7c9f794 --- /dev/null +++ b/Preprocessor.cs @@ -0,0 +1,2248 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +using System; +using System.Text; +using System.Collections.Generic; +using System.IO; + +namespace CppNet { + + +/** + * A C Preprocessor. + * The Preprocessor outputs a token stream which does not need + * re-lexing for C or C++. Alternatively, the output text may be + * reconstructed by concatenating the {@link Token#getText() text} + * values of the returned {@link Token Tokens}. (See + * {@link CppReader}, which does this.) + */ + + +/* +Source file name and line number information is conveyed by lines of the form + + # linenum filename flags + +These are called linemarkers. They are inserted as needed into +the output (but never within a string or character constant). They +mean that the following line originated in file filename at line +linenum. filename will never contain any non-printing characters; +they are replaced with octal escape sequences. + +After the file name comes zero or more flags, which are `1', `2', +`3', or `4'. If there are multiple flags, spaces separate them. Here +is what the flags mean: + +`1' + This indicates the start of a new file. +`2' + This indicates returning to a file (after having included another + file). +`3' + This indicates that the following text comes from a system header + file, so certain warnings should be suppressed. +`4' + This indicates that the following text should be treated as being + wrapped in an implicit extern "C" block. +*/ + +public class Preprocessor : IDisposable { + private class InternalSource : Source { + public override Token token() + { + throw new LexerException("Cannot read from " + getName()); + } + + internal override String getPath() + { + return ""; + } + + internal override String getName() { + return "internal data"; + } + } + + private static readonly Source INTERNAL = new InternalSource(); + private static readonly Macro __LINE__ = new Macro(INTERNAL, "__LINE__"); + private static readonly Macro __FILE__ = new Macro(INTERNAL, "__FILE__"); + private static readonly Macro __COUNTER__ = new Macro(INTERNAL, "__COUNTER__"); + + private List inputs; + + /* The fundamental engine. */ + private Dictionary macros; + private Stack states; + private Source source; + + /* Miscellaneous support. */ + private int counter; + + /* Support junk to make it work like cpp */ + private List quoteincludepath; /* -iquote */ + private List sysincludepath; /* -I */ + private List frameworkspath; + private Feature features; + private Warning warnings; + private VirtualFileSystem filesystem; + private PreprocessorListener listener; + + private List _importedPaths = new List(); + + public Preprocessor() { + this.inputs = new List(); + + this.macros = new Dictionary(); + macros.Add(__LINE__.getName(), __LINE__); + macros.Add(__FILE__.getName(), __FILE__); + macros.Add(__COUNTER__.getName(), __COUNTER__); + this.states = new Stack(); + states.Push(new State()); + this.source = null; + + this.counter = 0; + + this.quoteincludepath = new List(); + this.sysincludepath = new List(); + this.frameworkspath = new List(); + this.features = Feature.NONE; + this.warnings = Warning.NONE; + this.filesystem = new JavaFileSystem(); + this.listener = null; + } + + public Preprocessor(Source initial) : + this() { + addInput(initial); + } + + /** Equivalent to + * 'new Preprocessor(new {@link FileLexerSource}(file))' + */ + public Preprocessor(FileInfo file) : + this(new FileLexerSource(file)) { + } + + /** + * Sets the VirtualFileSystem used by this Preprocessor. + */ + public void setFileSystem(VirtualFileSystem filesystem) { + this.filesystem = filesystem; + } + + /** + * Returns the VirtualFileSystem used by this Preprocessor. + */ + public VirtualFileSystem getFileSystem() { + return filesystem; + } + + /** + * Sets the PreprocessorListener which handles events for + * this Preprocessor. + * + * The listener is notified of warnings, errors and source + * changes, amongst other things. + */ + public void setListener(PreprocessorListener listener) { + this.listener = listener; + Source s = source; + while (s != null) { + // s.setListener(listener); + s.init(this); + s = s.getParent(); + } + } + + /** + * Returns the PreprocessorListener which handles events for + * this Preprocessor. + */ + public PreprocessorListener getListener() { + return listener; + } + + /** + * Returns the feature-set for this Preprocessor. + * + * This set may be freely modified by user code. + */ + public Feature getFeatures() { + return features; + } + + /** + * Adds a feature to the feature-set of this Preprocessor. + */ + public void addFeature(Feature f) { + features |= f; + } + + /** + * Adds features to the feature-set of this Preprocessor. + */ + public void addFeatures(Feature f) { + features |= f; + } + + /** + * Returns true if the given feature is in + * the feature-set of this Preprocessor. + */ + public bool getFeature(Feature f) { + return (features & f) != Feature.NONE; + } + + /** + * Returns the warning-set for this Preprocessor. + * + * This set may be freely modified by user code. + */ + public Warning getWarnings() { + return warnings; + } + + /** + * Adds a warning to the warning-set of this Preprocessor. + */ + public void addWarning(Warning w) { + warnings |= w; + } + + /** + * Adds warnings to the warning-set of this Preprocessor. + */ + public void addWarnings(Warning w) { + warnings |= w; + } + + /** + * Returns true if the given warning is in + * the warning-set of this Preprocessor. + */ + public bool getWarning(Warning w) { + return (warnings & w) != Warning.NONE; + } + + /** + * Adds input for the Preprocessor. + * + * Inputs are processed in the order in which they are added. + */ + public void addInput(Source source) { + source.init(this); + inputs.Add(source); + } + + /** + * Adds input for the Preprocessor. + * + * @see #addInput(Source) + */ + public void addInput(FileInfo file) { + addInput(new FileLexerSource(file)); + } + + + /** + * Handles an error. + * + * If a PreprocessorListener is installed, it receives the + * error. Otherwise, an exception is thrown. + */ + protected void error(int line, int column, String msg) { + if (listener != null) + listener.handleError(source, line, column, msg); + else + throw new LexerException("Error at " + line + ":" + column + ": " + msg); + } + + /** + * Handles an error. + * + * If a PreprocessorListener is installed, it receives the + * error. Otherwise, an exception is thrown. + * + * @see #error(int, int, String) + */ + protected void error(Token tok, String msg) { + error(tok.getLine(), tok.getColumn(), msg); + } + + /** + * Handles a warning. + * + * If a PreprocessorListener is installed, it receives the + * warning. Otherwise, an exception is thrown. + */ + protected void warning(int line, int column, String msg) { + if (warnings.HasFlag(Warning.ERROR)) + error(line, column, msg); + else if (listener != null) + listener.handleWarning(source, line, column, msg); + else + throw new LexerException("Warning at " + line + ":" + column + ": " + msg); + } + + /** + * Handles a warning. + * + * If a PreprocessorListener is installed, it receives the + * warning. Otherwise, an exception is thrown. + * + * @see #warning(int, int, String) + */ + protected void warning(Token tok, String msg) { + warning(tok.getLine(), tok.getColumn(), msg); + } + + /** + * Adds a Macro to this Preprocessor. + * + * The given {@link Macro} object encapsulates both the name + * and the expansion. + */ + public void addMacro(Macro m) { + // System.out.println("Macro " + m); + String name = m.getName(); + /* Already handled as a source error in macro(). */ + if ("defined" == name) + throw new LexerException("Cannot redefine name 'defined'"); + macros[m.getName()] = m; + } + + /** + * Defines the given name as a macro. + * + * The String value is lexed into a token stream, which is + * used as the macro expansion. + */ + public void addMacro(String name, String value) { + try { + Macro m = new Macro(name); + StringLexerSource s = new StringLexerSource(value); + for (;;) { + Token tok = s.token(); + if(tok.getType() == Token.EOF) + break; + m.addToken(tok); + } + addMacro(m); + } + catch (IOException e) { + throw new LexerException(e); + } + } + + /** + * Defines the given name as a macro, with the value 1. + * + * This is a convnience method, and is equivalent to + * addMacro(name, "1"). + */ + public void addMacro(String name) { + addMacro(name, "1"); + } + + /** + * Sets the user include path used by this Preprocessor. + */ + /* Note for future: Create an IncludeHandler? */ + public void setQuoteIncludePath(List path) { + this.quoteincludepath = path; + } + + /** + * Returns the user include-path of this Preprocessor. + * + * This list may be freely modified by user code. + */ + public List getQuoteIncludePath() { + return quoteincludepath; + } + + /** + * Sets the system include path used by this Preprocessor. + */ + /* Note for future: Create an IncludeHandler? */ + public void setSystemIncludePath(List path) { + this.sysincludepath = path; + } + + /** + * Returns the system include-path of this Preprocessor. + * + * This list may be freely modified by user code. + */ + public List getSystemIncludePath() { + return sysincludepath; + } + + /** + * Sets the Objective-C frameworks path used by this Preprocessor. + */ + /* Note for future: Create an IncludeHandler? */ + public void setFrameworksPath(List path) { + this.frameworkspath = path; + } + + /** + * Returns the Objective-C frameworks path used by this + * Preprocessor. + * + * This list may be freely modified by user code. + */ + public List getFrameworksPath() { + return frameworkspath; + } + + /** + * Returns the Map of Macros parsed during the run of this + * Preprocessor. + */ + public Dictionary getMacros() { + return macros; + } + + /** + * Returns the named macro. + * + * While you can modify the returned object, unexpected things + * might happen if you do. + */ + public Macro getMacro(String name) { + Macro retval; + macros.TryGetValue(name, out retval); + return retval; + } + +/* States */ + + private void push_state() { + State top = states.Peek(); + states.Push(new State(top)); + } + + private void pop_state() { + State s = states.Pop(); + if (states.Count == 0) { + error(0, 0, "#" + "endif without #" + "if"); + states.Push(s); + } + } + + private bool isActive() { + State state = states.Peek(); + return state.isParentActive() && state.isActive(); + } + + +/* Sources */ + + /** + * Returns the top Source on the input stack. + * + * @see Source + * @see #push_source(Source,bool) + * @see #pop_source() + */ + public Source getSource() { + return source; + } + + /** + * Pushes a Source onto the input stack. + * + * @see #getSource() + * @see #pop_source() + */ + protected void push_source(Source source, bool autopop) { + source.init(this); + source.setParent(this.source, autopop); + // source.setListener(listener); + if (listener != null) + listener.handleSourceChange(this.source, "suspend"); + this.source = source; + if (listener != null) + listener.handleSourceChange(this.source, "push"); + } + + /** + * Pops a Source from the input stack. + * + * @see #getSource() + * @see #push_source(Source,bool) + */ + protected void pop_source() { + if (listener != null) + listener.handleSourceChange(this.source, "pop"); + Source s = this.source; + this.source = s.getParent(); + /* Always a noop unless called externally. */ + s.close(); + if (listener != null && this.source != null) + listener.handleSourceChange(this.source, "resume"); + } + + +/* Source tokens */ + + private Token _source_token; + + /* XXX Make this include the Token.NL, and make all cpp directives eat + * their own Token.NL. */ + private Token line_token(int line, String name, String extra) { + StringBuilder buf = new StringBuilder(); + buf.Append("#line ").Append(line) + .Append(" \""); + /* XXX This call to escape(name) is correct but ugly. */ + MacroTokenSource.escape(buf, name); + buf.Append("\"").Append(extra).Append("\n"); + return new Token(Token.P_LINE, line, 0, buf.ToString(), null); + } + + private Token source_token() { + if(_source_token != null) { + Token tok = _source_token; + _source_token = null; + if (getFeature(Feature.DEBUG)) + System.Console.Error.WriteLine("Returning unget token " + tok); + return tok; + } + + for (;;) { + Source s = getSource(); + if (s == null) { + if (inputs.Count == 0) + return new Token(Token.EOF); + Source t = inputs[0]; + inputs.RemoveAt(0); + push_source(t, true); + if (getFeature(Feature.LINEMARKERS)) + return line_token(t.getLine(), t.getName(), " 1"); + continue; + } + Token tok = s.token(); + /* XXX Refactor with skipline() */ + if(tok.getType() == Token.EOF && s.isAutopop()) { + // System.out.println("Autopop " + s); + pop_source(); + Source t = getSource(); + if (getFeature(Feature.LINEMARKERS) + && s.isNumbered() + && t != null) { + /* We actually want 'did the nested source + * contain a newline token', which isNumbered() + * approximates. This is not perfect, but works. */ + return line_token(t.getLine() + 1, t.getName(), " 2"); + } + continue; + } + if (getFeature(Feature.DEBUG)) + System.Console.Error.WriteLine("Returning fresh token " + tok); + return tok; + } + } + + private void source_untoken(Token tok) { + if (this._source_token != null) + throw new InvalidOperationException("Cannot return two tokens"); + this._source_token = tok; + } + + private bool isWhite(Token tok) { + int type = tok.getType(); + return (type == Token.WHITESPACE) + || (type == Token.CCOMMENT) + || (type == Token.CPPCOMMENT); + } + + private Token source_token_nonwhite() { + Token tok; + do { + tok = source_token(); + } while (isWhite(tok)); + return tok; + } + + /** + * Returns an Token.NL or an Token.EOF token. + * + * The metadata on the token will be correct, which is better + * than generating a new one. + * + * This method can, as of recent patches, return a P_LINE token. + */ + private Token source_skipline(bool white) { + // (new Exception("skipping line")).printStackTrace(System.out); + Source s = getSource(); + Token tok = s.skipline(white); + /* XXX Refactor with source_token() */ + if (tok.getType() == Token.EOF && s.isAutopop()) { + // System.out.println("Autopop " + s); + pop_source(); + Source t = getSource(); + if (getFeature(Feature.LINEMARKERS) + && s.isNumbered() + && t != null) { + /* We actually want 'did the nested source + * contain a newline token', which isNumbered() + * approximates. This is not perfect, but works. */ + return line_token(t.getLine() + 1, t.getName(), " 2"); + } + } + return tok; + } + + /* processes and expands a macro. */ + private bool macro(Macro m, Token orig) { + Token tok; + List args; + + // System.out.println("pp: expanding " + m); + + if (m.isFunctionLike()) { + for (;;) { + tok = source_token(); + // System.out.println("pp: open: token is " + tok); + switch (tok.getType()) { + case Token.WHITESPACE: /* XXX Really? */ + case Token.CCOMMENT: + case Token.CPPCOMMENT: + case Token.NL: + break; /* continue */ + case '(': + goto BREAK_OPEN; + default: + source_untoken(tok); + return false; + } + } + BREAK_OPEN: + + // tok = expanded_token_nonwhite(); + tok = source_token_nonwhite(); + + /* We either have, or we should have args. + * This deals elegantly with the case that we have + * one empty arg. */ + if (tok.getType() != ')' || m.getArgs() > 0) { + args = new List(); + + Argument arg = new Argument(); + int depth = 0; + bool space = false; + + ARGS: for (;;) { + // System.out.println("pp: arg: token is " + tok); + switch (tok.getType()) { + case Token.EOF: + error(tok, "EOF in macro args"); + return false; + + case ',': + if (depth == 0) { + if (m.isVariadic() && + /* We are building the last arg. */ + args.Count == m.getArgs() - 1) { + /* Just add the comma. */ + arg.addToken(tok); + } + else { + args.Add(arg); + arg = new Argument(); + } + } + else { + arg.addToken(tok); + } + space = false; + break; + case ')': + if (depth == 0) { + args.Add(arg); + goto BREAK_ARGS; + } + else { + depth--; + arg.addToken(tok); + } + space = false; + break; + case '(': + depth++; + arg.addToken(tok); + space = false; + break; + + case Token.WHITESPACE: + case Token.CCOMMENT: + case Token.CPPCOMMENT: + /* Avoid duplicating spaces. */ + space = true; + break; + + default: + /* Do not put space on the beginning of + * an argument token. */ + if (space && arg.Count != 0) + arg.addToken(Token.space); + arg.addToken(tok); + space = false; + break; + + } + // tok = expanded_token(); + tok = source_token(); + } + BREAK_ARGS: + + if(m.isVariadic() && args.Count < m.getArgs()) { + args.Add(new Argument()); + } + /* space may still be true here, thus trailing space + * is stripped from arguments. */ + + if (args.Count != m.getArgs()) { + error(tok, + "macro " + m.getName() + + " has " + m.getArgs() + " parameters " + + "but given " + args.Count + " args"); + /* We could replay the arg tokens, but I + * note that GNU cpp does exactly what we do, + * i.e. output the macro name and chew the args. + */ + return false; + } + + /* + for (Argument a : args) + a.expand(this); + */ + + for (int i = 0; i < args.Count; i++) { + args[i].expand(this); + } + + // System.out.println("Macro " + m + " args " + args); + } + else { + /* nargs == 0 and we (correctly) got () */ + args = null; + } + + } + else { + /* Macro without args. */ + args = null; + } + + if (m == __LINE__) { + push_source(new FixedTokenSource( + new Token[] { new Token(Token.INTEGER, + orig.getLine(), orig.getColumn(), + orig.getLine().ToString(), + orig.getLine()) } + ), true); + } + else if (m == __FILE__) { + StringBuilder buf = new StringBuilder("\""); + String name = getSource().getName(); + if (name == null) + name = ""; + for (int i = 0; i < name.Length; i++) { + char c = name[i]; + switch (c) { + case '\\': + buf.Append("\\\\"); + break; + case '"': + buf.Append("\\\""); + break; + default: + buf.Append(c); + break; + } + } + buf.Append("\""); + String text = buf.ToString(); + push_source(new FixedTokenSource( + new Token[] { new Token(Token.STRING, + orig.getLine(), orig.getColumn(), + text, text) } + ), true); + } + else if (m == __COUNTER__) { + /* This could equivalently have been done by adding + * a special Macro subclass which overrides getTokens(). */ + int value = this.counter++; + push_source(new FixedTokenSource( + new Token[] { new Token(Token.INTEGER, + orig.getLine(), orig.getColumn(), + value.ToString(), + value) } + ), true); + } + else { + push_source(new MacroTokenSource(m, args), true); + } + + return true; + } + + /** + * Expands an argument. + */ + /* I'd rather this were done lazily, but doing so breaks spec. */ + internal List expand(List arg) { + List expansion = new List(); + bool space = false; + + push_source(new FixedTokenSource(arg), false); + + for (;;) { + Token tok = expanded_token(); + switch (tok.getType()) { + case Token.EOF: + goto BREAK_EXPANSION; + + case Token.WHITESPACE: + case Token.CCOMMENT: + case Token.CPPCOMMENT: + space = true; + break; + + default: + if (space && expansion.Count != 0) + expansion.Add(Token.space); + expansion.Add(tok); + space = false; + break; + } + } + BREAK_EXPANSION: + + pop_source(); + + return expansion; + } + + /* processes a #define directive */ + private Token define() { + Token tok = source_token_nonwhite(); + if (tok.getType() != Token.IDENTIFIER) { + error(tok, "Expected Token.IDENTIFIER"); + return source_skipline(false); + } + /* if predefined */ + + String name = tok.getText(); + if ("defined" == name) { + error(tok, "Cannot redefine name 'defined'"); + return source_skipline(false); + } + + Macro m = new Macro(getSource(), name); + List args; + + tok = source_token(); + if (tok.getType() == '(') { + tok = source_token_nonwhite(); + if (tok.getType() != ')') { + args = new List(); + for (;;) { + switch (tok.getType()) { + case Token.IDENTIFIER: + if(m.isVariadic()) { + throw new Exception(); + } + args.Add(tok.getText()); + break; + case Token.ELLIPSIS: + m.setVariadic(true); + args.Add("__VA_ARGS__"); + break; + case Token.NL: + case Token.EOF: + error(tok, + "Unterminated macro parameter list"); + return tok; + default: + error(tok, + "error in macro parameters: " + + tok.getText()); + return source_skipline(false); + } + tok = source_token_nonwhite(); + switch (tok.getType()) { + case ',': + break; + case Token.ELLIPSIS: + tok = source_token_nonwhite(); + if (tok.getType() != ')') + error(tok, + "ellipsis must be on last argument"); + m.setVariadic(true); + goto BREAK_ARGS; + case ')': + goto BREAK_ARGS; + + case Token.NL: + case Token.EOF: + /* Do not skip line. */ + error(tok, + "Unterminated macro parameters"); + return tok; + default: + error(tok, + "Bad token in macro parameters: " + + tok.getText()); + return source_skipline(false); + } + tok = source_token_nonwhite(); + } + BREAK_ARGS:; + } + else { + System.Diagnostics.Debug.Assert(tok.getType() == ')', "Expected ')'"); + args = new List(); + } + + m.setArgs(args); + } + else { + /* For searching. */ + args = new List(); + source_untoken(tok); + } + + /* Get an expansion for the macro, using IndexOf. */ + bool space = false; + bool paste = false; + int idx; + + /* Ensure no space at start. */ + tok = source_token_nonwhite(); + for (;;) { + switch (tok.getType()) { + case Token.EOF: + goto BREAK_EXPANSION; + case Token.NL: + goto BREAK_EXPANSION; + + case Token.CCOMMENT: + case Token.CPPCOMMENT: + /* XXX This is where we implement GNU's cpp -CC. */ + // break; + case Token.WHITESPACE: + if (!paste) + space = true; + break; + + /* Paste. */ + case Token.PASTE: + space = false; + paste = true; + m.addPaste(new Token(Token.M_PASTE, + tok.getLine(), tok.getColumn(), + "#" + "#", null)); + break; + + /* Stringify. */ + case '#': + if (space) + m.addToken(Token.space); + space = false; + Token la = source_token_nonwhite(); + if(la.getType() == Token.IDENTIFIER && + ((idx = args.IndexOf(la.getText())) != -1)) { + m.addToken(new Token(Token.M_STRING, + la.getLine(), la.getColumn(), + "#" + la.getText(), + idx)); + } + else { + m.addToken(tok); + /* Allow for special processing. */ + source_untoken(la); + } + break; + + case Token.IDENTIFIER: + if (space) + m.addToken(Token.space); + space = false; + paste = false; + idx = args.IndexOf(tok.getText()); + if (idx == -1) + m.addToken(tok); + else + m.addToken(new Token(Token.M_ARG, + tok.getLine(), tok.getColumn(), + tok.getText(), + idx)); + break; + + default: + if (space) + m.addToken(Token.space); + space = false; + paste = false; + m.addToken(tok); + break; + } + tok = source_token(); + } + BREAK_EXPANSION: + + if (getFeature(Feature.DEBUG)) + System.Console.Error.WriteLine("Defined macro " + m); + addMacro(m); + + return tok; /* Token.NL or Token.EOF. */ + } + + private Token undef() { + Token tok = source_token_nonwhite(); + if (tok.getType() != Token.IDENTIFIER) { + error(tok, + "Expected identifier, not " + tok.getText()); + if(tok.getType() == Token.NL || tok.getType() == Token.EOF) + return tok; + } + else { + Macro m; + macros.TryGetValue(tok.getText(), out m); + if (m != null) { + /* XXX error if predefined */ + macros.Remove(m.getName()); + } + } + return source_skipline(true); + } + + /** + * Attempts to include the given file. + * + * User code may override this method to implement a virtual + * file system. + */ + private bool include(VirtualFile file, bool isImport, bool checkOnly) { + // System.out.println("Try to include " + file); + if (!file.isFile()) + return false; + + if(!checkOnly) { + if(isImport) { + if(_importedPaths.Contains(file.getPath())) { + return true; + } + + _importedPaths.Add(file.getPath()); + } + + if(getFeature(Feature.DEBUG)) + System.Console.WriteLine("pp: including " + file); + + push_source(file.getSource(), true); + } + return true; + } + + /** + * Includes a file from an include path, by name. + */ + private bool include(IEnumerable path, String name, bool isImport, bool checkOnly) { + foreach (String dir in path) { + VirtualFile file = filesystem.getFile(dir, name); + if (include(file, isImport, checkOnly)) + return true; + } + return false; + } + + private bool includeFramework(IEnumerable path, string name, bool isImport, bool checkOnly) + { + string[] framework = name.Split(new char[] { '/' }, 2); + if(framework.Length < 2) { + return false; + } + name = Path.Combine(Path.Combine(framework[0] + ".framework", "Headers"), framework[1]); + + foreach(String dir in path) { + VirtualFile file = filesystem.getFile(dir, name); + if(include(file, isImport, checkOnly)) + return true; + } + return false; + + } + + /** + * Handles an include directive. + */ + private bool include(String parent, int line, String name, bool quoted, bool isImport, bool checkOnly) { + VirtualFile pdir = null; + if (quoted) { + VirtualFile pfile = filesystem.getFile(parent); + pdir = pfile.getParentFile(); + VirtualFile ifile = pdir.getChildFile(name); + if(include(ifile, isImport, checkOnly)) + return true; + if(include(quoteincludepath, name, isImport, checkOnly)) + return true; + } + + if(include(sysincludepath, name, isImport, checkOnly)) + return true; + + if(includeFramework(frameworkspath, name, isImport, checkOnly)) { + return true; + } + if(checkOnly) { + return false; + } + + StringBuilder buf = new StringBuilder(); + buf.Append("File not found: ").Append(name); + buf.Append(" in"); + if (quoted) { + buf.Append(" .").Append('(').Append(pdir).Append(')'); + foreach (String dir in quoteincludepath) + buf.Append(" ").Append(dir); + } + foreach (String dir in sysincludepath) + buf.Append(" ").Append(dir); + error(line, 0, buf.ToString()); + return false; + } + + private bool has_feature() { + Token tok; + tok = token_nonwhite(); + if(tok.getType() != '(') { + throw new Exception(); + } + tok = token_nonwhite(); + string feature = tok.getText(); + + tok = token_nonwhite(); + if(tok.getType() != ')') { + throw new Exception(); + } + switch(feature) { + + case "address_sanitizer": return true; //, LangOpts.Sanitize.Address) + case "attribute_analyzer_noreturn": return true; + case "attribute_availability": return true; + case "attribute_availability_with_message": return true; + case "attribute_cf_returns_not_retained": return true; + case "attribute_cf_returns_retained": return true; + case "attribute_deprecated_with_message": return true; + case "attribute_ext_vector_type": return true; + case "attribute_ns_returns_not_retained": return true; + case "attribute_ns_returns_retained": return true; + case "attribute_ns_consumes_self": return true; + case "attribute_ns_consumed": return true; + case "attribute_cf_consumed": return true; + case "attribute_objc_ivar_unused": return true; + case "attribute_objc_method_family": return true; + case "attribute_overloadable": return true; + case "attribute_unavailable_with_message": return true; + case "attribute_unused_on_fields": return true; + case "blocks": return true; //, LangOpts.Blocks) + case "c_thread_safety_attributes": return true; + case "cxx_exceptions": return true; //, LangOpts.CXXExceptions) + case "cxx_rtti": return true; //, LangOpts.RTTI) + case "enumerator_attributes": return true; + case "memory_sanitizer": return true; //, LangOpts.Sanitize.Memory) + case "thread_sanitizer": return true; //, LangOpts.Sanitize.Thread) + case "dataflow_sanitizer": return true; //, LangOpts.Sanitize.DataFlow) + + case "objc_arr": return true; //, LangOpts.ObjCAutoRefCount) // FIXME: REMOVE? + case "objc_arc": return true; //, LangOpts.ObjCAutoRefCount) + case "objc_arc_weak": return true; //, LangOpts.ObjCARCWeak) + case "objc_default_synthesize_properties": return true; //, LangOpts.ObjC2) + case "objc_fixed_enum": return true; //, LangOpts.ObjC2) + case "objc_instancetype": return true; //, LangOpts.ObjC2) + case "objc_modules": return true; //, LangOpts.ObjC2 && LangOpts.Modules) + case "objc_nonfragile_abi": return true; //, LangOpts.ObjCRuntime.isNonFragile()) + case "objc_property_explicit_atomic": return true; // Does clang support explicit "atomic" keyword? + case "objc_protocol_qualifier_mangling": return true; + case "objc_weak_class": return true; //, LangOpts.ObjCRuntime.hasWeakClassImport()) + case "ownership_holds": return true; + case "ownership_returns": return true; + case "ownership_takes": return true; + case "objc_bool": return true; + case "objc_subscripting": return true; //, LangOpts.ObjCRuntime.isNonFragile()) + case "objc_array_literals": return true; //, LangOpts.ObjC2) + case "objc_dictionary_literals": return true; //, LangOpts.ObjC2) + case "objc_boxed_expressions": return true; //, LangOpts.ObjC2) + case "arc_cf_code_audited": return true; + // C11 features + case "c_alignas": return true; //, LangOpts.C11) + case "c_atomic": return true; //, LangOpts.C11) + case "c_generic_selections": return true; //, LangOpts.C11) + case "c_static_assert": return true; //, LangOpts.C11) + case "c_thread_local": return true; // LangOpts.C11 && PP.getTargetInfo().isTLSSupported()) + // C++11 features + case "cxx_access_control_sfinae": return true; //, LangOpts.CPlusPlus11) + case "cxx_alias_templates": return true; //, LangOpts.CPlusPlus11) + case "cxx_alignas": return true; //, LangOpts.CPlusPlus11) + case "cxx_atomic": return true; //, LangOpts.CPlusPlus11) + case "cxx_attributes": return true; //, LangOpts.CPlusPlus11) + case "cxx_auto_type": return true; //, LangOpts.CPlusPlus11) + case "cxx_constexpr": return true; //, LangOpts.CPlusPlus11) + case "cxx_decltype": return true; //, LangOpts.CPlusPlus11) + case "cxx_decltype_incomplete_return_types": return true; //, LangOpts.CPlusPlus11) + case "cxx_default_function_template_args": return true; //, LangOpts.CPlusPlus11) + case "cxx_defaulted_functions": return true; //, LangOpts.CPlusPlus11) + case "cxx_delegating_constructors": return true; //, LangOpts.CPlusPlus11) + case "cxx_deleted_functions": return true; //, LangOpts.CPlusPlus11) + case "cxx_explicit_conversions": return true; //, LangOpts.CPlusPlus11) + case "cxx_generalized_initializers": return true; //, LangOpts.CPlusPlus11) + case "cxx_implicit_moves": return true; //, LangOpts.CPlusPlus11) + case "cxx_inheriting_constructors": return true; //, LangOpts.CPlusPlus11) + case "cxx_inline_namespaces": return true; //, LangOpts.CPlusPlus11) + case "cxx_lambdas": return true; //, LangOpts.CPlusPlus11) + case "cxx_local_type_template_args": return true; //, LangOpts.CPlusPlus11) + case "cxx_nonstatic_member_init": return true; //, LangOpts.CPlusPlus11) + case "cxx_noexcept": return true; //, LangOpts.CPlusPlus11) + case "cxx_nullptr": return true; //, LangOpts.CPlusPlus11) + case "cxx_override_control": return true; //, LangOpts.CPlusPlus11) + case "cxx_range_for": return true; //, LangOpts.CPlusPlus11) + case "cxx_raw_string_literals": return true; //, LangOpts.CPlusPlus11) + case "cxx_reference_qualified_functions": return true; //, LangOpts.CPlusPlus11) + case "cxx_rvalue_references": return true; //, LangOpts.CPlusPlus11) + case "cxx_strong_enums": return true; //, LangOpts.CPlusPlus11) + case "cxx_static_assert": return true; //, LangOpts.CPlusPlus11) + case "cxx_thread_local": return true; //LangOpts.CPlusPlus11 && PP.getTargetInfo().isTLSSupported()) + case "cxx_trailing_return": return true; //, LangOpts.CPlusPlus11) + case "cxx_unicode_literals": return true; //, LangOpts.CPlusPlus11) + case "cxx_unrestricted_unions": return true; //, LangOpts.CPlusPlus11) + case "cxx_user_literals": return true; //, LangOpts.CPlusPlus11) + case "cxx_variadic_templates": return true; //, LangOpts.CPlusPlus11) + // C++1y features + case "cxx_aggregate_nsdmi": return true; //, LangOpts.CPlusPlus1y) + case "cxx_binary_literals": return true; //, LangOpts.CPlusPlus1y) + case "cxx_contextual_conversions": return true; //, LangOpts.CPlusPlus1y) + case "cxx_decltype_auto": return true; //, LangOpts.CPlusPlus1y) + case "cxx_generic_lambdas": return true; //, LangOpts.CPlusPlus1y) + case "cxx_init_captures": return true; //, LangOpts.CPlusPlus1y) + case "cxx_relaxed_constexpr": return true; //, LangOpts.CPlusPlus1y) + case "cxx_return_type_deduction": return true; //, LangOpts.CPlusPlus1y) + case "cxx_variable_templates": return true; //, LangOpts.CPlusPlus1y) + case "has_nothrow_assign": return true; //, LangOpts.CPlusPlus) + case "has_nothrow_copy": return true; //, LangOpts.CPlusPlus) + case "has_nothrow_constructor": return true; //, LangOpts.CPlusPlus) + case "has_trivial_assign": return true; //, LangOpts.CPlusPlus) + case "has_trivial_copy": return true; //, LangOpts.CPlusPlus) + case "has_trivial_constructor": return true; //, LangOpts.CPlusPlus) + case "has_trivial_destructor": return true; //, LangOpts.CPlusPlus) + case "has_virtual_destructor": return true; //, LangOpts.CPlusPlus) + case "is_abstract": return true; //, LangOpts.CPlusPlus) + case "is_base_of": return true; //, LangOpts.CPlusPlus) + case "is_class": return true; //, LangOpts.CPlusPlus) + case "is_constructible": return true; //, LangOpts.CPlusPlus) + case "is_convertible_to": return true; //, LangOpts.CPlusPlus) + case "is_empty": return true; //, LangOpts.CPlusPlus) + case "is_enum": return true; //, LangOpts.CPlusPlus) + case "is_final": return true; //, LangOpts.CPlusPlus) + case "is_literal": return true; //, LangOpts.CPlusPlus) + case "is_standard_layout": return true; //, LangOpts.CPlusPlus) + case "is_pod": return true; //, LangOpts.CPlusPlus) + case "is_polymorphic": return true; //, LangOpts.CPlusPlus) + case "is_sealed": return true; //, LangOpts.MicrosoftExt) + case "is_trivial": return true; //, LangOpts.CPlusPlus) + case "is_trivially_assignable": return true; //, LangOpts.CPlusPlus) + case "is_trivially_constructible": return true; //, LangOpts.CPlusPlus) + case "is_trivially_copyable": return true; //, LangOpts.CPlusPlus) + case "is_union": return true; //, LangOpts.CPlusPlus) + case "modules": return true; //, LangOpts.Modules) + case "tls": return true; // PP.getTargetInfo().isTLSSupported()) + case "underlying_type": return true; //, LangOpts.CPlusPlus) + default: + return false; + } + + + + } + + private bool has_include(bool next) + { + LexerSource lexer = (LexerSource)source; + string name; + bool quoted; + + Token tok; + tok = token_nonwhite(); + if(tok.getType() != '(') { + throw new Exception(); + } + + lexer.setInclude(true); + tok = token_nonwhite(); + + if(tok.getType() == Token.STRING) { + /* XXX Use the original text, not the value. + * Backslashes must not be treated as escapes here. */ + StringBuilder buf = new StringBuilder((String)tok.getValue()); + for(; ; ) { + tok = token_nonwhite(); + switch(tok.getType()) { + case Token.STRING: + buf.Append((String)tok.getValue()); + break; + case Token.NL: + case Token.EOF: + goto BREAK_HEADER; + default: + warning(tok, + "Unexpected token on #" + "include line"); + return false; + } + } + BREAK_HEADER: + name = buf.ToString(); + quoted = true; + } else if(tok.getType() == Token.HEADER) { + name = (String)tok.getValue(); + quoted = false; + } else { + throw new Exception(); + } + + tok = token_nonwhite(); + if(tok.getType() != ')') { + throw new Exception(); + } + + return include(source.getPath(), tok.getLine(), name, quoted, false, true); + } + + private Token include(bool next, bool isImport) { + LexerSource lexer = (LexerSource)source; + try { + lexer.setInclude(true); + Token tok = token_nonwhite(); + + String name; + bool quoted; + + if(tok.getType() == Token.STRING) { + /* XXX Use the original text, not the value. + * Backslashes must not be treated as escapes here. */ + StringBuilder buf = new StringBuilder((String)tok.getValue()); + for (;;) { + tok = token_nonwhite(); + switch (tok.getType()) { + case Token.STRING: + buf.Append((String)tok.getValue()); + break; + case Token.NL: + case Token.EOF: + goto BREAK_HEADER; + default: + warning(tok, + "Unexpected token on #"+"include line"); + return source_skipline(false); + } + } + BREAK_HEADER: + name = buf.ToString(); + quoted = true; + } else if(tok.getType() == Token.HEADER) { + name = (String)tok.getValue(); + quoted = false; + tok = source_skipline(true); + } + else { + error(tok, + "Expected string or header, not " + tok.getText()); + switch (tok.getType()) { + case Token.NL: + case Token.EOF: + return tok; + default: + /* Only if not a Token.NL or Token.EOF already. */ + return source_skipline(false); + } + } + + /* Do the inclusion. */ + include(source.getPath(), tok.getLine(), name, quoted, isImport, false); + + /* 'tok' is the 'nl' after the include. We use it after the + * #line directive. */ + if (getFeature(Feature.LINEMARKERS)) + return line_token(1, source.getName(), " 1"); + return tok; + } + finally { + lexer.setInclude(false); + } + } + + protected void pragma(Token name, List value) { + warning(name, "Unknown #" + "pragma: " + name.getText()); + } + + private Token pragma() { + Token name; + + for (;;) { + Token tok = token(); + switch (tok.getType()) { + case Token.EOF: + /* There ought to be a newline before Token.EOF. + * At least, in any skipline context. */ + /* XXX Are we sure about this? */ + warning(tok, + "End of file in #" + "pragma"); + return tok; + case Token.NL: + /* This may contain one or more newlines. */ + warning(tok, + "Empty #" + "pragma"); + return tok; + case Token.CCOMMENT: + case Token.CPPCOMMENT: + case Token.WHITESPACE: + continue; + case Token.IDENTIFIER: + name = tok; + goto BREAK_NAME; + default: + return source_skipline(false); + } + } + BREAK_NAME: + + Token tok2; + List value = new List(); + for (;;) { + tok2 = token(); + switch (tok2.getType()) { + case Token.EOF: + /* There ought to be a newline before Token.EOF. + * At least, in any skipline context. */ + /* XXX Are we sure about this? */ + warning(tok2, + "End of file in #" + "pragma"); + goto BREAK_VALUE; + case Token.NL: + /* This may contain one or more newlines. */ + goto BREAK_VALUE; + case Token.CCOMMENT: + case Token.CPPCOMMENT: + break; + case Token.WHITESPACE: + value.Add(tok2); + break; + default: + value.Add(tok2); + break; + } + } + BREAK_VALUE: + + pragma(name, value); + + return tok2; /* The Token.NL. */ + } + + /* For #error and #warning. */ + private void error(Token pptok, bool is_error) { + StringBuilder buf = new StringBuilder(); + buf.Append('#').Append(pptok.getText()).Append(' '); + /* Peculiar construction to ditch first whitespace. */ + Token tok = source_token_nonwhite(); + for (;;) { + switch (tok.getType()) { + case Token.NL: + case Token.EOF: + goto BREAK_ERROR; + default: + buf.Append(tok.getText()); + break; + } + tok = source_token(); + } + BREAK_ERROR: + if (is_error) + error(pptok, buf.ToString()); + else + warning(pptok, buf.ToString()); + } + + + + + /* This bypasses token() for #elif expressions. + * If we don't do this, then isActive() == false + * causes token() to simply chew the entire input line. */ + private Token expanded_token() { + for (;;) { + Token tok = source_token(); + // System.out.println("Source token is " + tok); + if (tok.getType() == Token.IDENTIFIER) { + Macro m; + macros.TryGetValue(tok.getText(), out m); + if (m == null) + return tok; + if (source.isExpanding(m)) + return tok; + if (macro(m, tok)) + continue; + } + return tok; + } + } + + private Token expanded_token_nonwhite() { + Token tok; + do { + tok = expanded_token(); + // System.out.println("expanded token is " + tok); + } while (isWhite(tok)); + return tok; + } + + + private Token _expr_token = null; + + private Token expr_token() { + Token tok = _expr_token; + + if (tok != null) { + // System.out.println("ungetting"); + _expr_token = null; + } + else { + tok = expanded_token_nonwhite(); + // System.out.println("expt is " + tok); + + if (tok.getType() == Token.IDENTIFIER && + tok.getText() == "defined") { + Token la = source_token_nonwhite(); + bool paren = false; + if (la.getType() == '(') { + paren = true; + la = source_token_nonwhite(); + } + + // System.out.println("Core token is " + la); + + if (la.getType() != Token.IDENTIFIER) { + error(la, + "defined() needs identifier, not " + + la.getText()); + tok = new Token(Token.INTEGER, + la.getLine(), la.getColumn(), + "0", 0); + } + else if (macros.ContainsKey(la.getText())) { + // System.out.println("Found macro"); + tok = new Token(Token.INTEGER, + la.getLine(), la.getColumn(), + "1", 1); + } else if(la.getText() == "__has_include_next" || la.getText() == "__has_include" || la.getText() == "__has_feature") { + tok = new Token(Token.INTEGER, + la.getLine(), la.getColumn(), + "1", 1); + } else { + // System.out.println("Not found macro"); + tok = new Token(Token.INTEGER, + la.getLine(), la.getColumn(), + "0", 0); + } + + if (paren) { + la = source_token_nonwhite(); + if (la.getType() != ')') { + expr_untoken(la); + error(la, "Missing ) in defined()"); + } + } + } + } + + // System.out.println("expr_token returns " + tok); + + return tok; + } + + private void expr_untoken(Token tok) { + if(_expr_token != null) + throw new Exception ( + "Cannot unget two expression tokens." + ); + _expr_token = tok; + } + + private int expr_priority(Token op) { + switch (op.getType()) { + case '/': return 11; + case '%': return 11; + case '*': return 11; + case '+': return 10; + case '-': return 10; + case Token.LSH: return 9; + case Token.RSH: return 9; + case '<': return 8; + case '>': return 8; + case Token.LE: return 8; + case Token.GE: return 8; + case Token.EQ: return 7; + case Token.NE: return 7; + case '&': return 6; + case '^': return 5; + case '|': return 4; + case Token.LAND: return 3; + case Token.LOR: return 2; + case '?': return 1; + default: + // System.out.println("Unrecognised operator " + op); + return 0; + } + } + + private long expr(int priority) { + /* + System.out.flush(); + (new Exception("expr(" + priority + ") called")).printStackTrace(); + System.err.flush(); + */ + + Token tok = expr_token(); + long lhs, rhs; + + // System.out.println("Expr lhs token is " + tok); + + switch (tok.getType()) { + case '(': + lhs = expr(0); + tok = expr_token(); + if (tok.getType() != ')') { + expr_untoken(tok); + error(tok, "missing ) in expression"); + return 0; + } + break; + + case '~': lhs = ~expr(11); break; + case '!': lhs = expr(11) == 0 ? 1 : 0; break; + case '-': lhs = -expr(11); break; + case Token.INTEGER: + lhs = Convert.ToInt64(tok.getValue()); + break; + case Token.CHARACTER: + lhs = (long)((char)tok.getValue()); + break; + case Token.IDENTIFIER: + if(tok.getText() == "__has_include_next") { + lhs = has_include(true) ? 1 : 0; + } else if(tok.getText() == "__has_include") { + lhs = has_include(false) ? 1 : 0; + } else if(tok.getText() == "__has_feature") { + lhs = has_feature() ? 1 : 0; + + + } else { + if(warnings.HasFlag(Warning.UNDEF)) { + warning(tok, "Undefined token '" + tok.getText() + + "' encountered in conditional."); + } + lhs = 0; + } + break; + + default: + expr_untoken(tok); + error(tok, + "Bad token in expression: " + tok.getText()); + return 0; + } + + for (;;) { + // System.out.println("expr: lhs is " + lhs + ", pri = " + priority); + Token op = expr_token(); + int pri = expr_priority(op); /* 0 if not a binop. */ + if (pri == 0 || priority >= pri) { + expr_untoken(op); + goto BREAK_EXPR; + } + rhs = expr(pri); + // System.out.println("rhs token is " + rhs); + switch (op.getType()) { + case '/': + if (rhs == 0) { + error(op, "Division by zero"); + lhs = 0; + } + else { + lhs = lhs / rhs; + } + break; + case '%': + if (rhs == 0) { + error(op, "Modulus by zero"); + lhs = 0; + } + else { + lhs = lhs % rhs; + } + break; + case '*': lhs = lhs * rhs; break; + case '+': lhs = lhs + rhs; break; + case '-': lhs = lhs - rhs; break; + case '<': lhs = lhs < rhs ? 1 : 0; break; + case '>': lhs = lhs > rhs ? 1 : 0; break; + case '&': lhs = lhs & rhs; break; + case '^': lhs = lhs ^ rhs; break; + case '|': lhs = lhs | rhs; break; + + case Token.LSH: lhs = lhs << (int)rhs; break; + case Token.RSH: lhs = lhs >> (int)rhs; break; + case Token.LE: lhs = lhs <= rhs ? 1 : 0; break; + case Token.GE: lhs = lhs >= rhs ? 1 : 0; break; + case Token.EQ: lhs = lhs == rhs ? 1 : 0; break; + case Token.NE: lhs = lhs != rhs ? 1 : 0; break; + case Token.LAND: lhs = (lhs != 0) && (rhs != 0) ? 1 : 0; break; + case Token.LOR: lhs = (lhs != 0) || (rhs != 0) ? 1 : 0; break; + + case '?': + Token colon = expr_token(); + if(colon.getText() != ":") { + throw new Exception(); + } + long rrhs = expr(0); + if(lhs == 1) { + lhs = rhs; + } else { + lhs = rrhs; + } + break; + + default: + error(op, + "Unexpected operator " + op.getText()); + return 0; + + } + } + BREAK_EXPR: + /* + System.out.flush(); + (new Exception("expr returning " + lhs)).printStackTrace(); + System.err.flush(); + */ + // System.out.println("expr returning " + lhs); + + return lhs; + } + + private Token toWhitespace(Token tok) { + String text = tok.getText(); + int len = text.Length; + bool cr = false; + int nls = 0; + + for (int i = 0; i < len; i++) { + char c = text[i]; + + switch (c) { + case '\r': + cr = true; + nls++; + break; + case '\n': + if (cr) { + cr = false; + break; + } + goto case '\u2028'; + /* fallthrough */ + case '\u2028': + case '\u2029': + case '\u000B': + case '\u000C': + case '\u0085': + cr = false; + nls++; + break; + } + } + + char[] cbuf = new char[nls]; + for(int i = 0; i < nls; i++) { cbuf[i] = '\n'; } + + return new Token(Token.WHITESPACE, + tok.getLine(), tok.getColumn(), + new String(cbuf)); + } + + private Token _token() { + + SKIP_TOKEN: + for (;;) { + Token tok; + if (!isActive()) { + try { + /* XXX Tell lexer to ignore warnings. */ + source.setActive(false); + tok = source_token(); + } + finally { + /* XXX Tell lexer to stop ignoring warnings. */ + source.setActive(true); + } + switch (tok.getType()) { + case Token.HASH: + case Token.NL: + case Token.EOF: + /* The preprocessor has to take action here. */ + break; + case Token.WHITESPACE: + return tok; + case Token.CCOMMENT: + case Token.CPPCOMMENT: + // Patch up to preserve whitespace. + if (getFeature(Feature.KEEPALLCOMMENTS)) + return tok; + if (!isActive()) + return toWhitespace(tok); + if (getFeature(Feature.KEEPCOMMENTS)) + return tok; + return toWhitespace(tok); + default: + // Return Token.NL to preserve whitespace. + /* XXX This might lose a comment. */ + return source_skipline(false); + } + } + else { + tok = source_token(); + } + + LEX: switch (tok.getType()) { + case Token.EOF: + /* Pop the stacks. */ + return tok; + + case Token.WHITESPACE: + case Token.NL: + //goto SKIP_TOKEN; + return tok; + + case Token.CCOMMENT: + case Token.CPPCOMMENT: + + //if(!getFeature(Feature.KEEPALLCOMMENTS)) { + // goto SKIP_TOKEN; + //} + return tok; + + case '!': case '%': case '&': + case '(': case ')': case '*': + case '+': case ',': case '-': + case '/': case ':': case ';': + case '<': case '=': case '>': + case '?': case '[': case ']': + case '^': case '{': case '|': + case '}': case '~': case '.': + + /* From Olivier Chafik for Objective C? */ + case '@': + /* The one remaining ASCII, might as well. */ + case '`': + + // case '#': + + case Token.AND_EQ: + case Token.ARROW: + case Token.CHARACTER: + case Token.DEC: + case Token.DIV_EQ: + case Token.ELLIPSIS: + case Token.EQ: + case Token.GE: + case Token.HEADER: /* Should only arise from include() */ + case Token.INC: + case Token.LAND: + case Token.LE: + case Token.LOR: + case Token.LSH: + case Token.LSH_EQ: + case Token.SUB_EQ: + case Token.MOD_EQ: + case Token.MULT_EQ: + case Token.NE: + case Token.OR_EQ: + case Token.PLUS_EQ: + case Token.RANGE: + case Token.RSH: + case Token.RSH_EQ: + case Token.STRING: + case Token.XOR_EQ: + return tok; + + case Token.INTEGER: + return tok; + + case Token.IDENTIFIER: + Macro m; + macros.TryGetValue(tok.getText(), out m); + if(tok.getText() == "__has_include_next") { + Console.WriteLine(); + } + if (m == null) + return tok; + if (source.isExpanding(m)) + return tok; + if (macro(m, tok)) + break; + return tok; + + case Token.P_LINE: + if (getFeature(Feature.LINEMARKERS)) + return tok; + break; + + case Token.INVALID: + if (getFeature(Feature.CSYNTAX)) + error(tok, (String)tok.getValue()); + return tok; + + default: + throw new Exception("Bad token " + tok); + // break; + + case Token.HASH: + tok = source_token_nonwhite(); + // (new Exception("here")).printStackTrace(); + switch (tok.getType()) { + case Token.NL: + goto BREAK_LEX; /* Some code has #\n */ + case Token.IDENTIFIER: + break; + default: + error(tok, + "Preprocessor directive not a word " + + tok.getText()); + return source_skipline(false); + } + int _ppcmd = ppcmds[tok.getText()]; + if (_ppcmd == null) { + error(tok, + "Unknown preprocessor directive " + + tok.getText()); + return source_skipline(false); + } + int ppcmd = _ppcmd; + + PP: switch(ppcmd) { + + case PP_DEFINE: + if(!isActive()) + return source_skipline(false); + else + return define(); + // break; + + case PP_UNDEF: + if(!isActive()) + return source_skipline(false); + else + return undef(); + // break; + + case PP_INCLUDE: + if(!isActive()) + return source_skipline(false); + else + return include(false, false); + // break; + case PP_INCLUDE_NEXT: + if(!isActive()) + return source_skipline(false); + if(!getFeature(Feature.INCLUDENEXT)) { + error(tok, + "Directive include_next not enabled" + ); + return source_skipline(false); + } + return include(true, false); + // break; + + case PP_WARNING: + case PP_ERROR: + if(!isActive()) + return source_skipline(false); + else + error(tok, ppcmd == PP_ERROR); + break; + + case PP_IF: + push_state(); + if(!isActive()) { + return source_skipline(false); + } + _expr_token = null; + states.Peek().setActive(expr(0) != 0); + tok = expr_token(); /* unget */ + if(tok.getType() == Token.NL) + return tok; + return source_skipline(true); + // break; + + case PP_ELIF: + State state = states.Peek(); + if(false) { + /* Check for 'if' */ + ; + } else if(state.sawElse()) { + error(tok, + "#elif after #" + "else"); + return source_skipline(false); + } else if(!state.isParentActive()) { + /* Nested in skipped 'if' */ + return source_skipline(false); + } else if(state.isActive()) { + /* The 'if' part got executed. */ + state.setParentActive(false); + /* This is like # else # if but with + * only one # end. */ + state.setActive(false); + return source_skipline(false); + } else { + _expr_token = null; + state.setActive(expr(0) != 0); + tok = expr_token(); /* unget */ + if(tok.getType() == Token.NL) + return tok; + return source_skipline(true); + } + // break; + + case PP_ELSE: + state = states.Peek(); + if(false) + /* Check for 'if' */ + ; + else if(state.sawElse()) { + error(tok, + "#" + "else after #" + "else"); + return source_skipline(false); + } else { + state.setSawElse(); + state.setActive(!state.isActive()); + return source_skipline(warnings.HasFlag(Warning.ENDIF_LABELS)); + } + // break; + + case PP_IFDEF: + push_state(); + if(!isActive()) { + return source_skipline(false); + } else { + tok = source_token_nonwhite(); + // System.out.println("ifdef " + tok); + if(tok.getType() != Token.IDENTIFIER) { + error(tok, + "Expected identifier, not " + + tok.getText()); + return source_skipline(false); + } else { + String text = tok.getText(); + bool exists = + macros.ContainsKey(text); + states.Peek().setActive(exists); + return source_skipline(true); + } + } + // break; + + case PP_IFNDEF: + push_state(); + if(!isActive()) { + return source_skipline(false); + } else { + tok = source_token_nonwhite(); + if(tok.getType() != Token.IDENTIFIER) { + error(tok, + "Expected identifier, not " + + tok.getText()); + return source_skipline(false); + } else { + String text = tok.getText(); + bool exists = + macros.ContainsKey(text); + states.Peek().setActive(!exists); + return source_skipline(true); + } + } + // break; + + case PP_ENDIF: + pop_state(); + return source_skipline(warnings.HasFlag(Warning.ENDIF_LABELS)); + // break; + + case PP_LINE: + return source_skipline(false); + // break; + + case PP_PRAGMA: + if(!isActive()) + return source_skipline(false); + return pragma(); + // break; + + case PP_IMPORT: + if(!isActive()) + return source_skipline(false); + else + return import(); + + default: + /* Actual unknown directives are + * processed above. If we get here, + * we succeeded the map lookup but + * failed to handle it. Therefore, + * this is (unconditionally?) fatal. */ + // if (isActive()) /* XXX Could be warning. */ + throw new Exception( + "Internal error: Unknown directive " + + tok); + // return source_skipline(false); + } + BREAK_PP: ; + break; + + + } + BREAK_LEX: ; + } + } + + private Token import() + { + return include(false, true); + } + + public Token token_nonwhite() { + Token tok; + do { + tok = _token(); + } while (isWhite(tok)); + return tok; + } + + /** + * Returns the next preprocessor token. + * + * @see Token + * @throws LexerException if a preprocessing error occurs. + * @throws InternalException if an unexpected error condition arises. + */ + public Token token() { + Token tok = _token(); + if (getFeature(Feature.DEBUG)) + System.Console.Error.WriteLine("pp: Returning " + tok); + return tok; + } + + /* First ppcmd is 1, not 0. */ + public const int PP_DEFINE = 1; + public const int PP_ELIF = 2; + public const int PP_ELSE = 3; + public const int PP_ENDIF = 4; + public const int PP_ERROR = 5; + public const int PP_IF = 6; + public const int PP_IFDEF = 7; + public const int PP_IFNDEF = 8; + public const int PP_INCLUDE = 9; + public const int PP_LINE = 10; + public const int PP_PRAGMA = 11; + public const int PP_UNDEF = 12; + public const int PP_WARNING = 13; + public const int PP_INCLUDE_NEXT = 14; + public const int PP_IMPORT = 15; + + private static readonly Dictionary ppcmds = + new Dictionary(); + + static Preprocessor() { + ppcmds.Add("define", PP_DEFINE); + ppcmds.Add("elif", PP_ELIF); + ppcmds.Add("else", PP_ELSE); + ppcmds.Add("endif", PP_ENDIF); + ppcmds.Add("error", PP_ERROR); + ppcmds.Add("if", PP_IF); + ppcmds.Add("ifdef", PP_IFDEF); + ppcmds.Add("ifndef", PP_IFNDEF); + ppcmds.Add("include", PP_INCLUDE); + ppcmds.Add("line", PP_LINE); + ppcmds.Add("pragma", PP_PRAGMA); + ppcmds.Add("undef", PP_UNDEF); + ppcmds.Add("warning", PP_WARNING); + ppcmds.Add("include_next", PP_INCLUDE_NEXT); + ppcmds.Add("import", PP_IMPORT); + } + + + override public String ToString() { + StringBuilder buf = new StringBuilder(); + + Source s = getSource(); + while (s != null) { + buf.Append(" -> ").Append(s).Append("\n"); + s = s.getParent(); + } + + Dictionary macros = getMacros(); + List keys = new List( + macros.Keys + ); + keys.Sort(); + foreach(string key in keys) { + Macro macro = macros[key]; + buf.Append("#").Append("macro ").Append(macro).Append("\n"); + } + + return buf.ToString(); + } + + public void Dispose() { + { + Source s = source; + while (s != null) { + s.close(); + s = s.getParent(); + } + } + foreach (Source s in inputs) { + s.close(); + } + } + +} + +} \ No newline at end of file diff --git a/PreprocessorListener.cs b/PreprocessorListener.cs new file mode 100644 index 0000000..1a9b397 --- /dev/null +++ b/PreprocessorListener.cs @@ -0,0 +1,86 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; + +namespace CppNet { + +/** + * A handler for preprocessor events, primarily errors and warnings. + * + * If no PreprocessorListener is installed in a Preprocessor, all + * error and warning events will throw an exception. Installing a + * listener allows more intelligent handling of these events. + */ +public class PreprocessorListener { + + private int errors; + private int warnings; + + public PreprocessorListener() { + clear(); + } + + public void clear() { + errors = 0; + warnings = 0; + } + + public int getErrors() { + return errors; + } + + public int getWarnings() { + return warnings; + } + + protected void print(String msg) { + System.Console.Error.WriteLine(msg); + } + + /** + * Handles a warning. + * + * The behaviour of this method is defined by the + * implementation. It may simply record the error message, or + * it may throw an exception. + */ + public void handleWarning(Source source, int line, int column, + String msg) { + warnings++; + print(source.getName() + ":" + line + ":" + column + + ": warning: " + msg); + } + + /** + * Handles an error. + * + * The behaviour of this method is defined by the + * implementation. It may simply record the error message, or + * it may throw an exception. + */ + public void handleError(Source source, int line, int column, + String msg) { + errors++; + print(source.getName() + ":" + line + ":" + column + + ": error: " + msg); + } + + public void handleSourceChange(Source source, String ev) { + } + +} +} \ No newline at end of file diff --git a/Properties/AssemblyInfo.cs b/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..e5be9e8 --- /dev/null +++ b/Properties/AssemblyInfo.cs @@ -0,0 +1,30 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("CppNet")] +[assembly: AssemblyProduct("CppNet")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("a972fb74-7a43-4c22-a381-2b8f0f5d7d2c")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Build and Revision Numbers +// by using the '*' as shown below: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/Source.cs b/Source.cs new file mode 100644 index 0000000..484f491 --- /dev/null +++ b/Source.cs @@ -0,0 +1,298 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; +using System.Collections.Generic; +using boolean = System.Boolean; + + +namespace CppNet +{ + + + /** + * An input to the Preprocessor. + * + * Inputs may come from Files, Strings or other sources. The + * preprocessor maintains a stack of Sources. Operations such as + * file inclusion or token pasting will push a new source onto + * the Preprocessor stack. Sources pop from the stack when they + * are exhausted; this may be transparent or explicit. + * + * BUG: Error messages are not handled properly. + */ + public abstract class Source : Iterable, Closeable + { + private Source parent; + private boolean autopop; + private PreprocessorListener listener; + private boolean active; + private boolean werror; + + /* LineNumberReader */ + + /* + // We can't do this, since we would lose the LexerException + private class Itr implements Iterator { + private Token next = null; + private void advance() { + try { + if (next != null) + next = token(); + } + catch (IOException e) { + throw new UnsupportedOperationException( + "Failed to advance token iterator: " + + e.getMessage() + ); + } + } + public boolean hasNext() { + return next.getType() != EOF; + } + public Token next() { + advance(); + Token t = next; + next = null; + return t; + } + public void remove() { + throw new UnsupportedOperationException( + "Cannot remove tokens from a Source." + ); + } + } + */ + + public Source() + { + this.parent = null; + this.autopop = false; + this.listener = null; + this.active = true; + this.werror = false; + } + + /** + * Sets the parent source of this source. + * + * Sources form a singly linked list. + */ + internal void setParent(Source parent, boolean autopop) + { + this.parent = parent; + this.autopop = autopop; + } + + /** + * Returns the parent source of this source. + * + * Sources form a singly linked list. + */ + internal Source getParent() + { + return parent; + } + + // @OverrideMustInvoke + internal virtual void init(Preprocessor pp) + { + setListener(pp.getListener()); + this.werror = pp.getWarnings().HasFlag(Warning.ERROR); + } + + /** + * Sets the listener for this Source. + * + * Normally this is set by the Preprocessor when a Source is + * used, but if you are using a Source as a standalone object, + * you may wish to call this. + */ + public void setListener(PreprocessorListener pl) + { + this.listener = pl; + } + + /** + * Returns the File currently being lexed. + * + * If this Source is not a {@link FileLexerSource}, then + * it will ask the parent Source, and so forth recursively. + * If no Source on the stack is a FileLexerSource, returns null. + */ + internal virtual String getPath() + { + Source parent = getParent(); + if(parent != null) + return parent.getPath(); + return null; + } + + /** + * Returns the human-readable name of the current Source. + */ + internal virtual String getName() + { + Source parent = getParent(); + if(parent != null) + return parent.getName(); + return null; + } + + /** + * Returns the current line number within this Source. + */ + public virtual int getLine() + { + Source parent = getParent(); + if(parent == null) + return 0; + return parent.getLine(); + } + + /** + * Returns the current column number within this Source. + */ + public virtual int getColumn() + { + Source parent = getParent(); + if(parent == null) + return 0; + return parent.getColumn(); + } + + /** + * Returns true if this Source is expanding the given macro. + * + * This is used to prevent macro recursion. + */ + internal virtual boolean isExpanding(Macro m) + { + Source parent = getParent(); + if(parent != null) + return parent.isExpanding(m); + return false; + } + + /** + * Returns true if this Source should be transparently popped + * from the input stack. + * + * Examples of such sources are macro expansions. + */ + internal boolean isAutopop() + { + return autopop; + } + + /** + * Returns true if this source has line numbers. + */ + internal virtual boolean isNumbered() + { + return false; + } + + /* This is an incredibly lazy way of disabling warnings when + * the source is not active. */ + internal void setActive(boolean b) + { + this.active = b; + } + + internal boolean isActive() + { + return active; + } + + /** + * Returns the next Token parsed from this input stream. + * + * @see Token + */ + public abstract Token token(); + + /** + * Returns a token iterator for this Source. + */ + public Iterator iterator() + { + return new SourceIterator(this); + } + + /** + * Skips tokens until the end of line. + * + * @param white true if only whitespace is permitted on the + * remainder of the line. + * @return the NL token. + */ + public Token skipline(boolean white) + { + for(; ; ) { + Token tok = token(); + switch(tok.getType()) { + case Token.EOF: + /* There ought to be a newline before EOF. + * At least, in any skipline context. */ + /* XXX Are we sure about this? */ + warning(tok.getLine(), tok.getColumn(), + "No newline before end of file"); + return new Token(Token.NL, + tok.getLine(), tok.getColumn(), + "\n"); + // return tok; + case Token.NL: + /* This may contain one or more newlines. */ + return tok; + case Token.CCOMMENT: + case Token.CPPCOMMENT: + case Token.WHITESPACE: + break; + default: + /* XXX Check white, if required. */ + if(white) + warning(tok.getLine(), tok.getColumn(), + "Unexpected nonwhite token"); + break; + } + } + } + + protected void error(int line, int column, String msg) + { + if(listener != null) + listener.handleError(this, line, column, msg); + else + throw new LexerException("Error at " + line + ":" + column + ": " + msg); + } + + protected void warning(int line, int column, String msg) + { + if(werror) + error(line, column, msg); + else if(listener != null) + listener.handleWarning(this, line, column, msg); + else + throw new LexerException("Warning at " + line + ":" + column + ": " + msg); + } + + public virtual void close() + { + } + } + +} \ No newline at end of file diff --git a/SourceIterator.cs b/SourceIterator.cs new file mode 100644 index 0000000..f0f6887 --- /dev/null +++ b/SourceIterator.cs @@ -0,0 +1,98 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.IO; + +using boolean = System.Boolean; + +namespace CppNet +{ + + /** + * An Iterator for {@link Source Sources}, + * returning {@link Token Tokens}. + */ + public class SourceIterator : Iterator + { + private Source source; + private Token tok; + + public SourceIterator(Source s) + { + this.source = s; + this.tok = null; + } + + /** + * Rethrows IOException inside IllegalStateException. + */ + private void advance() + { + try { + if(tok == null) + tok = source.token(); + } catch(LexerException e) { + throw new IllegalStateException(e); + } catch(IOException e) { + throw new ApplicationException("",e); + } + } + + /** + * Returns true if the enclosed Source has more tokens. + * + * The EOF token is never returned by the iterator. + * @throws IllegalStateException if the Source + * throws a LexerException or IOException + */ + public boolean hasNext() + { + advance(); + return tok.getType() != Token.EOF; + } + + /** + * Returns the next token from the enclosed Source. + * + * The EOF token is never returned by the iterator. + * @throws IllegalStateException if the Source + * throws a LexerException or IOException + */ + public Token next() + { + if(!hasNext()) + throw new ArgumentOutOfRangeException(); + Token t = this.tok; + this.tok = null; + return t; + } + + /** + * Not supported. + * + * @throws UnsupportedOperationException. + */ + public void remove() + { + throw new NotSupportedException(); + } + } + + +} \ No newline at end of file diff --git a/State.cs b/State.cs new file mode 100644 index 0000000..324cc78 --- /dev/null +++ b/State.cs @@ -0,0 +1,89 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; + +namespace CppNet +{ + + /* pp */ + class State + { + bool _parent; + bool _active; + bool _sawElse; + + /* pp */ + internal State() + { + this._parent = true; + this._active = true; + this._sawElse = false; + } + + /* pp */ + internal State(State parent) + { + this._parent = parent.isParentActive() && parent.isActive(); + this._active = true; + this._sawElse = false; + } + + /* Required for #elif */ + /* pp */ + internal void setParentActive(bool b) + { + this._parent = b; + } + + /* pp */ + internal bool isParentActive() + { + return _parent; + } + + /* pp */ + internal void setActive(bool b) + { + this._active = b; + } + + /* pp */ + internal bool isActive() + { + return _active; + } + + /* pp */ + internal void setSawElse() + { + _sawElse = true; + } + + /* pp */ + internal bool sawElse() + { + return _sawElse; + } + + public override String ToString() + { + return "parent=" + _parent + + ", active=" + _active + + ", sawelse=" + _sawElse; + } + } +} \ No newline at end of file diff --git a/StringLexerSource.cs b/StringLexerSource.cs new file mode 100644 index 0000000..0399598 --- /dev/null +++ b/StringLexerSource.cs @@ -0,0 +1,55 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; +using System.IO; + +namespace CppNet { + +/** + * A Source for lexing a String. + * + * This class is used by token pasting, but can be used by user + * code. + */ +public class StringLexerSource : LexerSource { + + /** + * Creates a new Source for lexing the given String. + * + * @param ppvalid true if preprocessor directives are to be + * honoured within the string. + */ + public StringLexerSource(String str, bool ppvalid) : + base(new StringReader(str), ppvalid) { + } + + /** + * Creates a new Source for lexing the given String. + * + * By default, preprocessor directives are not honoured within + * the string. + */ + public StringLexerSource(String str) : + this(str, false) { + } + + override public String ToString() { + return "string literal"; + } +} + +} \ No newline at end of file diff --git a/Token.cs b/Token.cs new file mode 100644 index 0000000..6ecf707 --- /dev/null +++ b/Token.cs @@ -0,0 +1,353 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; +using System.Text; + +namespace CppNet { + +/** + * A Preprocessor token. + * + * @see Preprocessor + */ + public sealed class Token + { + // public const int EOF = -1; + + private int type; + private int line; + private int column; + private Object value; + private String text; + + public Token(int type, int line, int column, + String text, Object value) + { + this.type = type; + this.line = line; + this.column = column; + this.text = text; + this.value = value; + } + + public Token(int type, int line, int column, String text) : + this(type, line, column, text, null) + { + } + + /* pp */ + internal Token(int type, String text, Object value) : + this(type, -1, -1, text, value) + { + } + + /* pp */ + internal Token(int type, String text) : + this(type, text, null) + { + } + + /* pp */ + internal Token(int type) : + this(type, type < _TOKENS ? texts[type] : "TOK" + type) + { + } + + /** + * Returns the semantic type of this token. + */ + public int getType() + { + return type; + } + + internal void setLocation(int line, int column) + { + this.line = line; + this.column = column; + } + + /** + * Returns the line at which this token started. + * + * Lines are numbered from zero. + */ + public int getLine() + { + return line; + } + + /** + * Returns the column at which this token started. + * + * Columns are numbered from zero. + */ + public int getColumn() + { + return column; + } + + /** + * Returns the original or generated text of this token. + * + * This is distinct from the semantic value of the token. + * + * @see #getValue() + */ + public String getText() + { + return text; + } + + /** + * Returns the semantic value of this token. + * + * For strings, this is the parsed String. + * For integers, this is an Integer object. + * For other token types, as appropriate. + * + * @see #getText() + */ + public Object getValue() + { + return value; + } + + /** + * Returns a description of this token, for debugging purposes. + */ + public String ToString() + { + StringBuilder buf = new StringBuilder(); + + buf.Append('[').Append(getTokenName(type)); + if(line != -1) { + buf.Append('@').Append(line); + if(column != -1) + buf.Append(',').Append(column); + } + buf.Append("]:"); + if(text != null) + buf.Append('"').Append(text).Append('"'); + else if(type > 3 && type < 256) + buf.Append((char)type); + else + buf.Append('<').Append(type).Append('>'); + if(value != null) + buf.Append('=').Append(value); + return buf.ToString(); + } + + /** + * Returns the descriptive name of the given token type. + * + * This is mostly used for stringification and debugging. + */ + public static String getTokenName(int type) + { + if(type < 0) + return "Invalid" + type; + if(type >= names.Length) + return "Invalid" + type; + if(names[type] == null) + return "Unknown" + type; + return names[type]; + } + + /** The token type AND_EQ. */ + public const int AND_EQ = 257; + /** The token type ARROW. */ + public const int ARROW = 258; + /** The token type CHARACTER. */ + public const int CHARACTER = 259; + /** The token type CCOMMENT. */ + public const int CCOMMENT = 260; + /** The token type CPPCOMMENT. */ + public const int CPPCOMMENT = 261; + /** The token type DEC. */ + public const int DEC = 262; + /** The token type DIV_EQ. */ + public const int DIV_EQ = 263; + /** The token type ELLIPSIS. */ + public const int ELLIPSIS = 264; + /** The token type EOF. */ + public const int EOF = 265; + /** The token type EQ. */ + public const int EQ = 266; + /** The token type GE. */ + public const int GE = 267; + /** The token type HASH. */ + public const int HASH = 268; + /** The token type HEADER. */ + public const int HEADER = 269; + /** The token type IDENTIFIER. */ + public const int IDENTIFIER = 270; + /** The token type INC. */ + public const int INC = 271; + /** The token type INTEGER. */ + public const int INTEGER = 272; + /** The token type LAND. */ + public const int LAND = 273; + /** The token type LAND_EQ. */ + public const int LAND_EQ = 274; + /** The token type LE. */ + public const int LE = 275; + /** The token type LITERAL. */ + public const int LITERAL = 276; + /** The token type LOR. */ + public const int LOR = 277; + /** The token type LOR_EQ. */ + public const int LOR_EQ = 278; + /** The token type LSH. */ + public const int LSH = 279; + /** The token type LSH_EQ. */ + public const int LSH_EQ = 280; + /** The token type MOD_EQ. */ + public const int MOD_EQ = 281; + /** The token type MULT_EQ. */ + public const int MULT_EQ = 282; + /** The token type NE. */ + public const int NE = 283; + /** The token type NL. */ + public const int NL = 284; + /** The token type OR_EQ. */ + public const int OR_EQ = 285; + /** The token type PASTE. */ + public const int PASTE = 286; + /** The token type PLUS_EQ. */ + public const int PLUS_EQ = 287; + /** The token type RANGE. */ + public const int RANGE = 288; + /** The token type RSH. */ + public const int RSH = 289; + /** The token type RSH_EQ. */ + public const int RSH_EQ = 290; + /** The token type STRING. */ + public const int STRING = 291; + /** The token type SUB_EQ. */ + public const int SUB_EQ = 292; + /** The token type WHITESPACE. */ + public const int WHITESPACE = 293; + /** The token type XOR_EQ. */ + public const int XOR_EQ = 294; + /** The token type M_ARG. */ + public const int M_ARG = 295; + /** The token type M_PASTE. */ + public const int M_PASTE = 296; + /** The token type M_STRING. */ + public const int M_STRING = 297; + /** The token type P_LINE. */ + public const int P_LINE = 298; + /** The token type INVALID. */ + public const int INVALID = 299; + /** + * The number of possible semantic token types. + * + * Please note that not all token types below 255 are used. + */ + public const int _TOKENS = 300; + + /** The position-less space token. */ + /* pp */ + public static readonly Token space = new Token(WHITESPACE, -1, -1, " "); + + private static readonly String[] names = new String[_TOKENS]; + private static readonly String[] texts = new String[_TOKENS]; + static Token() + { + for(int i = 0; i < 255; i++) { + texts[i] = ((char)i).ToString(); + names[i] = texts[i]; + } + + texts[AND_EQ] = "&="; + texts[ARROW] = "->"; + texts[DEC] = "--"; + texts[DIV_EQ] = "/="; + texts[ELLIPSIS] = "..."; + texts[EQ] = "=="; + texts[GE] = ">="; + texts[HASH] = "#"; + texts[INC] = "++"; + texts[LAND] = "&&"; + texts[LAND_EQ] = "&&="; + texts[LE] = "<="; + texts[LOR] = "||"; + texts[LOR_EQ] = "||="; + texts[LSH] = "<<"; + texts[LSH_EQ] = "<<="; + texts[MOD_EQ] = "%="; + texts[MULT_EQ] = "*="; + texts[NE] = "!="; + texts[NL] = "\n"; + texts[OR_EQ] = "|="; + /* We have to split the two hashes or Velocity eats them. */ + texts[PASTE] = "#" + "#"; + texts[PLUS_EQ] = "+="; + texts[RANGE] = ".."; + texts[RSH] = ">>"; + texts[RSH_EQ] = ">>="; + texts[SUB_EQ] = "-="; + texts[XOR_EQ] = "^="; + + names[AND_EQ] = "AND_EQ"; + names[ARROW] = "ARROW"; + names[CHARACTER] = "CHARACTER"; + names[CCOMMENT] = "CCOMMENT"; + names[CPPCOMMENT] = "CPPCOMMENT"; + names[DEC] = "DEC"; + names[DIV_EQ] = "DIV_EQ"; + names[ELLIPSIS] = "ELLIPSIS"; + names[EOF] = "EOF"; + names[EQ] = "EQ"; + names[GE] = "GE"; + names[HASH] = "HASH"; + names[HEADER] = "HEADER"; + names[IDENTIFIER] = "IDENTIFIER"; + names[INC] = "INC"; + names[INTEGER] = "INTEGER"; + names[LAND] = "LAND"; + names[LAND_EQ] = "LAND_EQ"; + names[LE] = "LE"; + names[LITERAL] = "LITERAL"; + names[LOR] = "LOR"; + names[LOR_EQ] = "LOR_EQ"; + names[LSH] = "LSH"; + names[LSH_EQ] = "LSH_EQ"; + names[MOD_EQ] = "MOD_EQ"; + names[MULT_EQ] = "MULT_EQ"; + names[NE] = "NE"; + names[NL] = "NL"; + names[OR_EQ] = "OR_EQ"; + names[PASTE] = "PASTE"; + names[PLUS_EQ] = "PLUS_EQ"; + names[RANGE] = "RANGE"; + names[RSH] = "RSH"; + names[RSH_EQ] = "RSH_EQ"; + names[STRING] = "STRING"; + names[SUB_EQ] = "SUB_EQ"; + names[WHITESPACE] = "WHITESPACE"; + names[XOR_EQ] = "XOR_EQ"; + names[M_ARG] = "M_ARG"; + names[M_PASTE] = "M_PASTE"; + names[M_STRING] = "M_STRING"; + names[P_LINE] = "P_LINE"; + names[INVALID] = "INVALID"; + } + + } +} \ No newline at end of file diff --git a/TokenSnifferSource.cs b/TokenSnifferSource.cs new file mode 100644 index 0000000..1512b2e --- /dev/null +++ b/TokenSnifferSource.cs @@ -0,0 +1,54 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package org.anarres.cpp; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.io.PushbackReader; +import java.io.Reader; +import java.io.StringReader; + +import java.util.ArrayList; +import java.util.List; +import java.util.Iterator; + +import static org.anarres.cpp.Token.*; + +@Deprecated +/* pp */ class TokenSnifferSource extends Source { + private List target; + + /* pp */ TokenSnifferSource(List target) { + this.target = target; + } + + public Token token() + throws IOException, + LexerException { + Token tok = getParent().token(); + if (tok.getType() != EOF) + target.add(tok); + return tok; + } + + public String toString() { + return getParent().toString(); + } +} diff --git a/VirtualFile.cs b/VirtualFile.cs new file mode 100644 index 0000000..2995a02 --- /dev/null +++ b/VirtualFile.cs @@ -0,0 +1,33 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +namespace CppNet { + +/** + * An extremely lightweight virtual file interface. + */ +public interface VirtualFile { + // public String getParent(); + bool isFile(); + string getPath(); + string getName(); + VirtualFile getParentFile(); + VirtualFile getChildFile(string name); + Source getSource(); +} + +} \ No newline at end of file diff --git a/VirtualFileSystem.cs b/VirtualFileSystem.cs new file mode 100644 index 0000000..e2eadd3 --- /dev/null +++ b/VirtualFileSystem.cs @@ -0,0 +1,30 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; + +namespace CppNet +{ + /** + * An extremely lightweight virtual file system interface. + */ + public interface VirtualFileSystem + { + VirtualFile getFile(String path); + VirtualFile getFile(String dir, String name); + } + +} \ No newline at end of file diff --git a/Warning.cs b/Warning.cs new file mode 100644 index 0000000..8d70132 --- /dev/null +++ b/Warning.cs @@ -0,0 +1,38 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; + +namespace CppNet +{ + /** + * Warning classes which may optionally be emitted by the Preprocessor. + */ + [Flags] + public enum Warning + { + NONE = 0, + TRIGRAPHS = 1 << 0, + // TRADITIONAL, + IMPORT = 1 << 1, + UNDEF = 1 << 2, + UNUSED_MACROS = 1 << 3, + ENDIF_LABELS = 1 << 4, + ERROR = 1 << 5, + // SYSTEM_HEADERS + } + +} \ No newline at end of file