diff --git a/Argument.cs b/Argument.cs new file mode 100644 index 0000000..340ed77 --- /dev/null +++ b/Argument.cs @@ -0,0 +1,72 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; +using System.Collections.Generic; +using System.Text; + +namespace CppNet { +/** + * A macro argument. + * + * This encapsulates a raw and preprocessed token stream. + */ +internal class Argument : List { + public const int NO_ARGS = -1; + + private List _expansion; + + public Argument() { + this._expansion = null; + } + + public void addToken(Token tok) { + Add(tok); + } + + internal void expand(Preprocessor p) { + /* Cache expansion. */ + if(_expansion == null) { + this._expansion = p.expand(this); + // System.out.println("Expanded arg " + this); + } + } + + public Iterator expansion() + { + return _expansion.iterator(); + } + + override public String ToString() { + StringBuilder buf = new StringBuilder(); + buf.Append("Argument("); + // buf.Append(super.toString()); + buf.Append("raw=[ "); + for (int i = 0; i < this.Count; i++) + buf.Append(this[i].getText()); + buf.Append(" ];expansion=[ "); + if(_expansion == null) + buf.Append("null"); + else + for(int i = 0; i < _expansion.Count; i++) + buf.Append(_expansion[i].getText()); + buf.Append(" ])"); + return buf.ToString(); + } + +} + +} \ No newline at end of file diff --git a/CppNet.csproj b/CppNet.csproj new file mode 100644 index 0000000..921869a --- /dev/null +++ b/CppNet.csproj @@ -0,0 +1,74 @@ + + + + + Debug + AnyCPU + {C2FD9262-69F8-4B75-9AB1-FF359C9143E9} + Library + Properties + CppNet + CppNet + v4.5 + 512 + + + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + + + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/CppReader.cs b/CppReader.cs new file mode 100644 index 0000000..fbe3545 --- /dev/null +++ b/CppReader.cs @@ -0,0 +1,153 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package org.anarres.cpp; + +import java.io.File; +import java.io.IOException; +import java.io.Reader; + +import static org.anarres.cpp.Token.*; + +/** + * A Reader wrapper around the Preprocessor. + * + * This is a utility class to provide a transparent {@link Reader} + * which preprocesses the input text. + * + * @see Preprocessor + * @see Reader + */ +public class CppReader extends Reader { + + private Preprocessor cpp; + private String token; + private int idx; + + public CppReader(final Reader r) { + cpp = new Preprocessor(new LexerSource(r, true) { + @Override + public String getName() { + return ""; + } + }); + token = ""; + idx = 0; + } + + public CppReader(Preprocessor p) { + cpp = p; + token = ""; + idx = 0; + } + + /** + * Returns the Preprocessor used by this CppReader. + */ + public Preprocessor getPreprocessor() { + return cpp; + } + + /** + * Defines the given name as a macro. + * + * This is a convnience method. + */ + public void addMacro(String name) + throws LexerException { + cpp.addMacro(name); + } + + /** + * Defines the given name as a macro. + * + * This is a convnience method. + */ + public void addMacro(String name, String value) + throws LexerException { + cpp.addMacro(name, value); + } + + private boolean refill() + throws IOException { + try { + assert cpp != null : "cpp is null : was it closed?"; + if (token == null) + return false; + while (idx >= token.length()) { + Token tok = cpp.token(); + switch (tok.getType()) { + case EOF: + token = null; + return false; + case CCOMMENT: + case CPPCOMMENT: + if (!cpp.getFeature(Feature.KEEPCOMMENTS)) { + token = " "; + break; + } + default: + token = tok.getText(); + break; + } + idx = 0; + } + return true; + } + catch (LexerException e) { + /* Never happens. + if (e.getCause() instanceof IOException) + throw (IOException)e.getCause(); + */ + IOException ie = new IOException(String.valueOf(e)); + ie.initCause(e); + throw ie; + } + } + + public int read() + throws IOException { + if (!refill()) + return -1; + return token.charAt(idx++); + } + + /* XXX Very slow and inefficient. */ + public int read(char cbuf[], int off, int len) + throws IOException { + if (token == null) + return -1; + for (int i = 0; i < len; i++) { + int ch = read(); + if (ch == -1) + return i; + cbuf[off + i] = (char)ch; + } + return len; + } + + public void close() + throws IOException { + if (cpp != null) { + cpp.close(); + cpp = null; + } + token = null; + } + +} diff --git a/CppTask.cs b/CppTask.cs new file mode 100644 index 0000000..f86c2d1 --- /dev/null +++ b/CppTask.cs @@ -0,0 +1,113 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package org.anarres.cpp; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; + +import org.apache.tools.ant.BuildException; +import org.apache.tools.ant.Task; + +import org.anarres.cpp.LexerException; +import org.anarres.cpp.Preprocessor; +import org.anarres.cpp.Token; + +/** + * An ant task for jcpp. + */ +public class CppTask extends Task { + + private static class Macro { + private String name; + private String value; + + public void setName(String name) { + this.name = name; + } + + public String getName() { + return name; + } + + public void setValue(String value) { + this.value = value; + } + + public String getValue() { + return value; + } + } + + private File input; + private File output; + private Preprocessor cpp; + + public CppTask() { + super(); + cpp = new Preprocessor(); + } + + public void setInput(File input) { + this.input = input; + } + + public void setOutput(File output) { + this.output = output; + } + + public void addMacro(Macro macro) { + try { + cpp.addMacro(macro.getName(), macro.getValue()); + } + catch (LexerException e) { + throw new BuildException(e); + } + } + + public void execute() { + FileWriter writer = null; + try { + if (input == null) + throw new BuildException("Input not specified"); + if (output == null) + throw new BuildException("Output not specified"); + cpp.addInput(this.input); + writer = new FileWriter(this.output); + for (;;) { + Token tok = cpp.token(); + if (tok != null && tok.getType() == Token.EOF) + break; + writer.write(tok.getText()); + } + } + catch (Exception e) { + throw new BuildException(e); + } + finally { + if (writer != null) { + try { + writer.close(); + } + catch (IOException e) { + } + } + } + } + +} diff --git a/Feature.cs b/Feature.cs new file mode 100644 index 0000000..e66afad --- /dev/null +++ b/Feature.cs @@ -0,0 +1,49 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +using System; + +namespace CppNet +{ + /** + * Features of the Preprocessor, which may be enabled or disabled. + */ + [Flags] + public enum Feature + { + NONE = 0, + /** Supports ANSI digraphs. */ + DIGRAPHS = 1 << 0, + /** Supports ANSI trigraphs. */ + TRIGRAPHS = 1 << 1, + /** Outputs linemarker tokens. */ + LINEMARKERS = 1 << 2, + /** Reports tokens of type INVALID as errors. */ + CSYNTAX = 1 << 3, + /** Preserves comments in the lexed output. */ + KEEPCOMMENTS = 1 << 4, + /** Preserves comments in the lexed output, even when inactive. */ + KEEPALLCOMMENTS = 1 << 5, + VERBOSE = 1 << 6, + DEBUG = 1 << 7, + + /** Supports lexing of objective-C. */ + OBJCSYNTAX = 1 << 8, + INCLUDENEXT = 1 << 9 + } + +} \ No newline at end of file diff --git a/FileLexerSource.cs b/FileLexerSource.cs new file mode 100644 index 0000000..9ae9704 --- /dev/null +++ b/FileLexerSource.cs @@ -0,0 +1,67 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; +using System.IO; + +namespace CppNet { + +/** + * A {@link Source} which lexes a file. + * + * The input is buffered. + * + * @see Source + */ +public class FileLexerSource : LexerSource { + // private File file; + private String path; + + /** + * Creates a new Source for lexing the given File. + * + * Preprocessor directives are honoured within the file. + */ + public FileLexerSource(FileInfo file, String path) : base(new StreamReader(file.OpenRead()), true) + { + + // this.file = file; + this.path = path; + } + + public FileLexerSource(FileInfo file) : + this(file, file.FullName) { + } + + public FileLexerSource(String path) : + this(new FileInfo(path)) { + } + + override internal String getPath() { + return path; + } + + override internal String getName() + { + return getPath(); + } + + override public string ToString() { + return "file " + path; + } +} + +} \ No newline at end of file diff --git a/FixedTokenSource.cs b/FixedTokenSource.cs new file mode 100644 index 0000000..3a2c076 --- /dev/null +++ b/FixedTokenSource.cs @@ -0,0 +1,57 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +using System; +using System.Text; +using System.Collections.Generic; + +namespace CppNet { + +internal class FixedTokenSource : Source { + private static readonly Token EOF = + new Token(Token.EOF, ""); + + private List tokens; + private int idx; + + internal FixedTokenSource(params Token[] tokens) { + this.tokens = new List(tokens); + this.idx = 0; + } + + internal FixedTokenSource(List tokens) { + this.tokens = tokens; + this.idx = 0; + } + + public override Token token() { + if (idx >= tokens.Count) + return EOF; + return tokens[idx++]; + } + + override public String ToString() { + StringBuilder buf = new StringBuilder(); + buf.Append("constant token stream " + tokens); + Source parent = getParent(); + if (parent != null) + buf.Append(" in ").Append(parent); + return buf.ToString(); + } +} + +} \ No newline at end of file diff --git a/InputLexerSource.cs b/InputLexerSource.cs new file mode 100644 index 0000000..0931dc4 --- /dev/null +++ b/InputLexerSource.cs @@ -0,0 +1,68 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package org.anarres.cpp; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.IOException; + +import java.util.List; +import java.util.Iterator; + +import static org.anarres.cpp.Token.*; + +/** + * A {@link Source} which lexes a file. + * + * The input is buffered. + * + * @see Source + */ +public class InputLexerSource extends LexerSource { + /** + * Creates a new Source for lexing the given Reader. + * + * Preprocessor directives are honoured within the file. + */ + public InputLexerSource(InputStream input) + throws IOException { + super( + new BufferedReader( + new InputStreamReader( + input + ) + ), + true + ); + } + + @Override + /* pp */ String getPath() { + return ""; + } + + @Override + /* pp */ String getName() { + return "standard input"; + } + + public String toString() { + return getPath(); + } +} diff --git a/JavaCompat/JavaCompat.cs b/JavaCompat/JavaCompat.cs new file mode 100644 index 0000000..678b7e4 --- /dev/null +++ b/JavaCompat/JavaCompat.cs @@ -0,0 +1,96 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +using boolean = System.Boolean; +//using Set = System.Collections.Generic.HashSet; +//using ArrayList = System.Collections.Generic.List; +//using Map = System.Collections.Generic.Dictionary; + +namespace CppNet +{ + static class JavaCompat + { + public static StringBuilder append(this StringBuilder bld, object value) + { + return bld.Append(value); + } + + public static int length(this string str) + { + return str.Length; + } + + public static char charAt(this string str, int i) + { + return str[i]; + } + + public static T get(this List list, int i) + { + return list[i]; + } + + public static Iterator iterator(this List list) + { + return new ListIterator(list); + } + + public static string toString(this object o) + { + return o.ToString(); + } + } + + class ListIterator : Iterator + { + List _list; + int _index; + + public ListIterator(List list) + { + _list = list; + } + + public boolean hasNext() + { + return _index < _list.Count; + } + + public T next() + { + return _list[_index++]; + } + + public void remove() + { + throw new NotImplementedException(); + } + } + + public interface Closeable + { + void close(); + } + + public interface Iterable + { + Iterator iterator(); + } + + public interface Iterator + { + boolean hasNext(); + T next(); + void remove(); + } + + public class IllegalStateException : Exception + { + public IllegalStateException(Exception ex) : base("Illegal State", ex) { } + } + + +} diff --git a/JavaFile.cs b/JavaFile.cs new file mode 100644 index 0000000..72c56e3 --- /dev/null +++ b/JavaFile.cs @@ -0,0 +1,49 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using System.IO; + +namespace CppNet +{ + public class JavaFile : VirtualFile + { + string _path; + + public JavaFile(string path) + { + _path = Path.GetFullPath(path); + } + + public bool isFile() + { + return File.Exists(_path) && !File.GetAttributes(_path).HasFlag(FileAttributes.Directory); + } + + public string getPath() + { + return _path; + } + + public string getName() + { + return Path.GetFileName(_path); + } + + public VirtualFile getParentFile() + { + return new JavaFile(Path.GetDirectoryName(_path)); + } + + public VirtualFile getChildFile(string name) + { + return new JavaFile(Path.Combine(_path, name)); + } + + public Source getSource() + { + return new FileLexerSource(_path); + } + } +} diff --git a/JavaFileSystem.cs b/JavaFileSystem.cs new file mode 100644 index 0000000..006683a --- /dev/null +++ b/JavaFileSystem.cs @@ -0,0 +1,40 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; +using System.IO; + +namespace CppNet +{ + + /** + * A virtual filesystem implementation using java.io. + */ + public class JavaFileSystem : VirtualFileSystem + { + + + public VirtualFile getFile(string path) + { + return new JavaFile(path); + } + + public VirtualFile getFile(string dir, string name) + { + return new JavaFile(Path.Combine(dir, name)); + } + } +} \ No newline at end of file diff --git a/JoinReader.cs b/JoinReader.cs new file mode 100644 index 0000000..5efb987 --- /dev/null +++ b/JoinReader.cs @@ -0,0 +1,221 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; + +using System.IO; +namespace CppNet +{ + + internal class JoinReader /* extends Reader */ { + private TextReader _in; + + private PreprocessorListener listener; + private LexerSource source; + private bool trigraphs; + private bool warnings; + + private int newlines; + private bool flushnl; + private int[] unget; + private int uptr; + + public JoinReader(TextReader ain, bool trigraphs) + { + this._in = ain; + this.trigraphs = trigraphs; + this.newlines = 0; + this.flushnl = false; + this.unget = new int[2]; + this.uptr = 0; + } + + public JoinReader(TextReader ain) : + this(ain, false) + { + } + + public void setTrigraphs(bool enable, bool warnings) + { + this.trigraphs = enable; + this.warnings = warnings; + } + + internal void init(Preprocessor pp, LexerSource s) + { + this.listener = pp.getListener(); + this.source = s; + setTrigraphs(pp.getFeature(Feature.TRIGRAPHS), + pp.getWarning(Warning.TRIGRAPHS)); + } + + private int __read() + { + if(uptr > 0) + return unget[--uptr]; + return _in.Read(); + } + + private void _unread(int c) + { + if(c != -1) + unget[uptr++] = c; + System.Diagnostics.Debug.Assert(uptr <= unget.Length, + "JoinReader ungets too many characters"); + } + + protected void warning(String msg) + { + if(source != null) + source.warning(msg); + else + throw new LexerException(msg); + } + + private char trigraph(char raw, char repl) + { + if(trigraphs) { + if(warnings) + warning("trigraph ??" + raw + " converted to " + repl); + return repl; + } else { + if(warnings) + warning("trigraph ??" + raw + " ignored"); + _unread(raw); + _unread('?'); + return '?'; + } + } + + private int _read() + { + int c = __read(); + if(c == '?' && (trigraphs || warnings)) { + int d = __read(); + if(d == '?') { + int e = __read(); + switch(e) { + case '(': return trigraph('(', '['); + case ')': return trigraph(')', ']'); + case '<': return trigraph('<', '{'); + case '>': return trigraph('>', '}'); + case '=': return trigraph('=', '#'); + case '/': return trigraph('/', '\\'); + case '\'': return trigraph('\'', '^'); + case '!': return trigraph('!', '|'); + case '-': return trigraph('-', '~'); + } + _unread(e); + } + _unread(d); + } + return c; + } + + public int read() + { + if(flushnl) { + if(newlines > 0) { + newlines--; + return '\n'; + } + flushnl = false; + } + + for(; ; ) { + int c = _read(); + switch(c) { + case '\\': + int d = _read(); + switch(d) { + case '\n': + newlines++; + continue; + case '\r': + newlines++; + int e = _read(); + if(e != '\n') + _unread(e); + continue; + default: + _unread(d); + return c; + } + case '\r': + case '\n': + case '\u2028': + case '\u2029': + case '\u000B': + case '\u000C': + case '\u0085': + flushnl = true; + return c; + case -1: + if(newlines > 0) { + newlines--; + return '\n'; + } + goto default; + default: + return c; + } + } + } + + public int read(char[] cbuf, int off, int len) + { + for(int i = 0; i < len; i++) { + int ch = read(); + if(ch == -1) + return i; + cbuf[off + i] = (char)ch; + } + return len; + } + + public void close() + { + if(_in == null) { + return; + } + _in.Close(); + } + + + + override public String ToString() + { + return "JoinReader(nl=" + newlines + ")"; + } + + /* + public static void main(String[] args) throws IOException { + FileReader f = new FileReader(new File(args[0])); + BufferedReader b = new BufferedReader(f); + JoinReader r = new JoinReader(b); + BufferedWriter w = new BufferedWriter( + new java.io.OutputStreamWriter(System.out) + ); + int c; + while ((c = r.read()) != -1) { + w.write((char)c); + } + w.close(); + } + */ + + } +} \ No newline at end of file diff --git a/LexerException.cs b/LexerException.cs new file mode 100644 index 0000000..c997bcc --- /dev/null +++ b/LexerException.cs @@ -0,0 +1,32 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +using System; + +namespace CppNet { +/** + * A preprocessor exception. + * + * Note to users: I don't really like the name of this class. S. + */ +public class LexerException : Exception { + public LexerException(String msg) : base(msg) { } + + public LexerException(Exception cause) : base(cause.Message, cause) {} +} + +} \ No newline at end of file diff --git a/LexerSource.cs b/LexerSource.cs new file mode 100644 index 0000000..b55e26e --- /dev/null +++ b/LexerSource.cs @@ -0,0 +1,809 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; +using System.IO; +using System.Text; + +namespace CppNet { + +/** Does not handle digraphs. */ +public class LexerSource : Source { + static bool isJavaIdentifierStart(int c) { + return char.IsLetter((char)c) || c == '$' || c == '_'; + } + static bool isJavaIdentifierPart(int c) + { + return char.IsLetter((char)c) || c == '$' || c == '_' || char.IsDigit((char)c); + } + static bool isIdentifierIgnorable(int c) { + return c >= 0 && c <= 8 || + c >= 0xE && c <= 0x1B || + c >= 0x7F && c <= 0x9F || + char.GetUnicodeCategory((char)c) == System.Globalization.UnicodeCategory.Format; + } + + static int digit(char ch, int radix) + { + try { + return Convert.ToInt32(ch.ToString(), radix); + } catch { + return -1; + } + } + private static readonly bool DEBUG = false; + + private JoinReader reader; + private bool ppvalid; + private bool bol; + private bool include; + + private bool digraphs; + + /* Unread. */ + private int u0, u1; + private int ucount; + + private int line; + private int column; + private int lastcolumn; + private bool cr; + + /* ppvalid is: + * false in StringLexerSource, + * true in FileLexerSource */ + public LexerSource(TextReader r, bool ppvalid) { + this.reader = new JoinReader(r); + this.ppvalid = ppvalid; + this.bol = true; + this.include = false; + + this.digraphs = true; + + this.ucount = 0; + + this.line = 1; + this.column = 0; + this.lastcolumn = -1; + this.cr = false; + } + + override internal void init(Preprocessor pp) { + base.init(pp); + this.digraphs = pp.getFeature(Feature.DIGRAPHS); + this.reader.init(pp, this); + } + + + override public int getLine() { + return line; + } + + + override public int getColumn() { + return column; + } + + + override internal bool isNumbered() { + return true; + } + +/* Error handling. */ + + private void _error(String msg, bool error) { + int _l = line; + int _c = column; + if (_c == 0) { + _c = lastcolumn; + _l--; + } + else { + _c--; + } + if (error) + base.error(_l, _c, msg); + else + base.warning(_l, _c, msg); + } + + /* Allow JoinReader to call this. */ + internal void error(String msg) + { + _error(msg, true); + } + + /* Allow JoinReader to call this. */ + internal void warning(String msg) { + _error(msg, false); + } + +/* A flag for string handling. */ + + internal void setInclude(bool b) + { + this.include = b; + } + +/* + private bool _isLineSeparator(int c) { + return Character.getType(c) == Character.LINE_SEPARATOR + || c == -1; + } +*/ + + /* XXX Move to JoinReader and canonicalise newlines. */ + private static bool isLineSeparator(int c) { + switch ((char)c) { + case '\r': + case '\n': + case '\u2028': + case '\u2029': + case '\u000B': + case '\u000C': + case '\u0085': + return true; + default: + return (c == -1); + } + } + + + private int read() { + System.Diagnostics.Debug.Assert(ucount <= 2, "Illegal ucount: " + ucount); + switch (ucount) { + case 2: + ucount = 1; + return u1; + case 1: + ucount = 0; + return u0; + } + + if (reader == null) + return -1; + + int c = reader.read(); + switch (c) { + case '\r': + cr = true; + line++; + lastcolumn = column; + column = 0; + break; + case '\n': + if (cr) { + cr = false; + break; + } + goto case '\u2028'; + /* fallthrough */ + case '\u2028': + case '\u2029': + case '\u000B': + case '\u000C': + case '\u0085': + cr = false; + line++; + lastcolumn = column; + column = 0; + break; + default: + cr = false; + column++; + break; + } + +/* + if (isLineSeparator(c)) { + line++; + lastcolumn = column; + column = 0; + } + else { + column++; + } +*/ + + return c; + } + + /* You can unget AT MOST one newline. */ + private void unread(int c) { + /* XXX Must unread newlines. */ + if (c != -1) { + if (isLineSeparator(c)) { + line--; + column = lastcolumn; + cr = false; + } + else { + column--; + } + switch (ucount) { + case 0: + u0 = c; + ucount = 1; + break; + case 1: + u1 = c; + ucount = 2; + break; + default: + throw new InvalidOperationException( + "Cannot unget another character!" + ); + } + // reader.unread(c); + } + } + + /* Consumes the rest of the current line into an invalid. */ + private Token invalid(StringBuilder text, String reason) { + int d = read(); + while (!isLineSeparator(d)) { + text.Append((char)d); + d = read(); + } + unread(d); + return new Token(Token.INVALID, text.ToString(), reason); + } + + private Token ccomment() { + StringBuilder text = new StringBuilder("/*"); + int d; + do { + do { + d = read(); + text.Append((char)d); + } while (d != '*'); + do { + d = read(); + text.Append((char)d); + } while (d == '*'); + } while (d != '/'); + return new Token(Token.CCOMMENT, text.ToString()); + } + + private Token cppcomment() { + StringBuilder text = new StringBuilder("//"); + int d = read(); + while (!isLineSeparator(d)) { + text.Append((char)d); + d = read(); + } + unread(d); + return new Token(Token.CPPCOMMENT, text.ToString()); + } + + private int escape(StringBuilder text) { + int d = read(); + switch (d) { + case 'a': text.Append('a'); return 0x07; + case 'b': text.Append('b'); return '\b'; + case 'f': text.Append('f'); return '\f'; + case 'n': text.Append('n'); return '\n'; + case 'r': text.Append('r'); return '\r'; + case 't': text.Append('t'); return '\t'; + case 'v': text.Append('v'); return 0x0b; + case '\\': text.Append('\\'); return '\\'; + + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + int len = 0; + int val = 0; + do { + val = (val << 3) + digit((char)d, 8); + text.Append((char)d); + d = read(); + } while(++len < 3 && digit((char)d, 8) != -1); + unread(d); + return val; + + case 'x': + len = 0; + val = 0; + do { + val = (val << 4) + digit((char)d, 16); + text.Append((char)d); + d = read(); + } while(++len < 2 && digit((char)d, 16) != -1); + unread(d); + return val; + + /* Exclude two cases from the warning. */ + case '"': text.Append('"'); return '"'; + case '\'': text.Append('\''); return '\''; + + default: + warning("Unnecessary escape character " + (char)d); + text.Append((char)d); + return d; + } + } + + private Token character() { + StringBuilder text = new StringBuilder("'"); + int d = read(); + if (d == '\\') { + text.Append('\\'); + d = escape(text); + } + else if (isLineSeparator(d)) { + unread(d); + return new Token(Token.INVALID, text.ToString(), + "Unterminated character literal"); + } + else if (d == '\'') { + text.Append('\''); + return new Token(Token.INVALID, text.ToString(), + "Empty character literal"); + } + else if (char.IsControl((char)d)) { + text.Append('?'); + return invalid(text, "Illegal unicode character literal"); + } + else { + text.Append((char)d); + } + + int e = read(); + if (e != '\'') { + // error("Illegal character constant"); + /* We consume up to the next ' or the rest of the line. */ + for (;;) { + if (isLineSeparator(e)) { + unread(e); + break; + } + text.Append((char)e); + if (e == '\'') + break; + e = read(); + } + return new Token(Token.INVALID, text.ToString(), + "Illegal character constant " + text); + } + text.Append('\''); + /* XXX It this a bad cast? */ + return new Token(Token.CHARACTER, + text.ToString(), (char)d); + } + + private Token String(char open, char close) { + StringBuilder text = new StringBuilder(); + text.Append(open); + + StringBuilder buf = new StringBuilder(); + + for (;;) { + int c = read(); + if (c == close) { + break; + } + else if (c == '\\') { + text.Append('\\'); + if (!include) { + char d = (char)escape(text); + buf.Append(d); + } + } + else if (c == -1) { + unread(c); + // error("End of file in string literal after " + buf); + return new Token(Token.INVALID, text.ToString(), + "End of file in string literal after " + buf); + } + else if (isLineSeparator(c)) { + unread(c); + // error("Unterminated string literal after " + buf); + return new Token(Token.INVALID, text.ToString(), + "Unterminated string literal after " + buf); + } + else { + text.Append((char)c); + buf.Append((char)c); + } + } + text.Append(close); + return new Token(close == '>' ? Token.HEADER : Token.STRING, + text.ToString(), buf.ToString()); + } + + private Token _number(StringBuilder text, long val, int d) { + int bits = 0; + for (;;) { + /* XXX Error check duplicate bits. */ + if (d == 'U' || d == 'u') { + bits |= 1; + text.Append((char)d); + d = read(); + } + else if (d == 'L' || d == 'l') { + if ((bits & 4) != 0) + /* XXX warn */ ; + bits |= 2; + text.Append((char)d); + d = read(); + } + else if (d == 'I' || d == 'i') { + if ((bits & 2) != 0) + /* XXX warn */ ; + bits |= 4; + text.Append((char)d); + d = read(); + } + else if (char.IsLetter((char)d)) { + unread(d); + return new Token(Token.INVALID, text.ToString(), + "Invalid suffix \"" + (char)d + + "\" on numeric constant"); + } + else { + unread(d); + return new Token(Token.INTEGER, + text.ToString(), (long)val); + } + } + } + + /* We already chewed a zero, so empty is fine. */ + private Token number_octal() { + StringBuilder text = new StringBuilder("0"); + int d = read(); + long val = 0; + while (digit((char)d, 8) != -1) { + val = (val << 3) + digit((char)d, 8); + text.Append((char)d); + d = read(); + } + return _number(text, val, d); + } + + /* We do not know whether know the first digit is valid. */ + private Token number_hex(char x) { + StringBuilder text = new StringBuilder("0"); + text.Append(x); + int d = read(); + if (digit((char)d, 16) == -1) { + unread(d); + // error("Illegal hexadecimal constant " + (char)d); + return new Token(Token.INVALID, text.ToString(), + "Illegal hexadecimal digit " + (char)d + + " after "+ text); + } + long val = 0; + do { + val = (val << 4) + digit((char)d, 16); + text.Append((char)d); + d = read(); + } while (digit((char)d, 16) != -1); + return _number(text, val, d); + } + + /* We know we have at least one valid digit, but empty is not + * fine. */ + /* XXX This needs a complete rewrite. */ + private Token number_decimal(int c) { + StringBuilder text = new StringBuilder((char)c); + int d = c; + long val = 0; + do { + val = val * 10 + digit((char)d, 10); + text.Append((char)d); + d = read(); + } while (digit((char)d, 10) != -1); + return _number(text, val, d); + } + + private Token identifier(int c) { + StringBuilder text = new StringBuilder(); + int d; + text.Append((char)c); + for (;;) { + d = read(); + if (isIdentifierIgnorable(d)) + ; + else if (isJavaIdentifierPart(d)) + text.Append((char)d); + else + break; + } + unread(d); + return new Token(Token.IDENTIFIER, text.ToString()); + } + + private Token whitespace(int c) { + StringBuilder text = new StringBuilder(); + int d; + text.Append((char)c); + for (;;) { + d = read(); + if (ppvalid && isLineSeparator(d)) /* XXX Ugly. */ + break; + if (char.IsWhiteSpace((char)d)) + text.Append((char)d); + else + break; + } + unread(d); + return new Token(Token.WHITESPACE, text.ToString()); + } + + /* No token processed by cond() contains a newline. */ + private Token cond(char c, int yes, int no) { + int d = read(); + if (c == d) + return new Token(yes); + unread(d); + return new Token(no); + } + + public override Token token() { + Token tok = null; + + int _l = line; + int _c = column; + + int c = read(); + int d; + + switch (c) { + case '\n': + if (ppvalid) { + bol = true; + if (include) { + tok = new Token(Token.NL, _l, _c, "\n"); + } + else { + int nls = 0; + do { + nls++; + d = read(); + } while (d == '\n'); + unread(d); + char[] text = new char[nls]; + for (int i = 0; i < text.Length; i++) + text[i] = '\n'; + // Skip the bol = false below. + tok = new Token(Token.NL, _l, _c, new String(text)); + } + if (DEBUG) + System.Console.Error.WriteLine("lx: Returning NL: " + tok); + return tok; + } + /* Let it be handled as whitespace. */ + break; + + case '!': + tok = cond('=', Token.NE, '!'); + break; + + case '#': + if (bol) + tok = new Token(Token.HASH); + else + tok = cond('#', Token.PASTE, '#'); + break; + + case '+': + d = read(); + if (d == '+') + tok = new Token(Token.INC); + else if (d == '=') + tok = new Token(Token.PLUS_EQ); + else + unread(d); + break; + case '-': + d = read(); + if (d == '-') + tok = new Token(Token.DEC); + else if (d == '=') + tok = new Token(Token.SUB_EQ); + else if (d == '>') + tok = new Token(Token.ARROW); + else + unread(d); + break; + + case '*': + tok = cond('=', Token.MULT_EQ, '*'); + break; + case '/': + d = read(); + if (d == '*') + tok = ccomment(); + else if (d == '/') + tok = cppcomment(); + else if (d == '=') + tok = new Token(Token.DIV_EQ); + else + unread(d); + break; + + case '%': + d = read(); + if (d == '=') + tok = new Token(Token.MOD_EQ); + else if (digraphs && d == '>') + tok = new Token('}'); // digraph + else if (digraphs && d == ':') { + bool paste = true; + d = read(); + if (d != '%') { + unread(d); + tok = new Token('#'); // digraph + paste = false; + } + d = read(); + if (d != ':') { + unread(d); // Unread 2 chars here. + unread('%'); + tok = new Token('#'); // digraph + paste = false; + } + if(paste) { + tok = new Token(Token.PASTE); // digraph + } + } + else + unread(d); + break; + + case ':': + /* :: */ + d = read(); + if (digraphs && d == '>') + tok = new Token(']'); // digraph + else + unread(d); + break; + + case '<': + if (include) { + tok = String('<', '>'); + } + else { + d = read(); + if (d == '=') + tok = new Token(Token.LE); + else if (d == '<') + tok = cond('=', Token.LSH_EQ, Token.LSH); + else if (digraphs && d == ':') + tok = new Token('['); // digraph + else if (digraphs && d == '%') + tok = new Token('{'); // digraph + else + unread(d); + } + break; + + case '=': + tok = cond('=', Token.EQ, '='); + break; + + case '>': + d = read(); + if (d == '=') + tok = new Token(Token.GE); + else if (d == '>') + tok = cond('=', Token.RSH_EQ, Token.RSH); + else + unread(d); + break; + + case '^': + tok = cond('=', Token.XOR_EQ, '^'); + break; + + case '|': + d = read(); + if (d == '=') + tok = new Token(Token.OR_EQ); + else if (d == '|') + tok = cond('=', Token.LOR_EQ, Token.LOR); + else + unread(d); + break; + case '&': + d = read(); + if (d == '&') + tok = cond('=', Token.LAND_EQ, Token.LAND); + else if (d == '=') + tok = new Token(Token.AND_EQ); + else + unread(d); + break; + + case '.': + d = read(); + if (d == '.') + tok = cond('.', Token.ELLIPSIS, Token.RANGE); + else + unread(d); + /* XXX decimal fraction */ + break; + + case '0': + /* octal or hex */ + d = read(); + if (d == 'x' || d == 'X') + tok = number_hex((char)d); + else { + unread(d); + tok = number_octal(); + } + break; + + case '\'': + tok = character(); + break; + + case '"': + tok = String('"', '"'); + break; + + case -1: + close(); + tok = new Token(Token.EOF, _l, _c, ""); + break; + } + + if (tok == null) { + if (char.IsWhiteSpace((char)c)) { + tok = whitespace(c); + } + else if (char.IsDigit((char)c)) { + tok = number_decimal(c); + } + else if (isJavaIdentifierStart(c)) { + tok = identifier(c); + } + else { + tok = new Token(c); + } + } + + if (bol) { + switch (tok.getType()) { + case Token.WHITESPACE: + case Token.CCOMMENT: + break; + default: + bol = false; + break; + } + } + + tok.setLocation(_l, _c); + if (DEBUG) + System.Console.WriteLine("lx: Returning " + tok); + // (new Exception("here")).printStackTrace(System.out); + return tok; + } + + public override void close() + { + if(reader != null) { + reader.close(); + reader = null; + } + base.close(); + } +} + +} \ No newline at end of file diff --git a/Macro.cs b/Macro.cs new file mode 100644 index 0000000..f4b0765 --- /dev/null +++ b/Macro.cs @@ -0,0 +1,208 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; +using System.Collections.Generic; +using System.Text; + +namespace CppNet +{ + /** + * A macro object. + * + * This encapsulates a name, an argument count, and a token stream + * for replacement. The replacement token stream may contain the + * extra tokens {@link Token#M_ARG} and {@link Token#M_STRING}. + */ + public class Macro + { + private Source source; + private String name; + /* It's an explicit decision to keep these around here. We don't + * need to; the argument token type is M_ARG and the value + * is the index. The strings themselves are only used in + * stringification of the macro, for debugging. */ + private List args; + private bool variadic; + private List tokens; + + public Macro(Source source, String name) + { + this.source = source; + this.name = name; + this.args = null; + this.variadic = false; + this.tokens = new List(); + } + + public Macro(String name) + : this(null, name) + { + } + + /** + * Sets the Source from which this macro was parsed. + */ + public void setSource(Source s) + { + this.source = s; + } + + /** + * Returns the Source from which this macro was parsed. + * + * This method may return null if the macro was not parsed + * from a regular file. + */ + public Source getSource() + { + return source; + } + + /** + * Returns the name of this macro. + */ + public String getName() + { + return name; + } + + /** + * Sets the arguments to this macro. + */ + public void setArgs(List args) + { + this.args = args; + } + + /** + * Returns true if this is a function-like macro. + */ + public bool isFunctionLike() + { + return args != null; + } + + /** + * Returns the number of arguments to this macro. + */ + public int getArgs() + { + return args.Count; + } + + /** + * Sets the variadic flag on this Macro. + */ + public void setVariadic(bool b) + { + this.variadic = b; + } + + /** + * Returns true if this is a variadic function-like macro. + */ + public bool isVariadic() + { + return variadic; + } + + /** + * Adds a token to the expansion of this macro. + */ + public void addToken(Token tok) + { + this.tokens.Add(tok); + } + + /** + * Adds a "paste" operator to the expansion of this macro. + * + * A paste operator causes the next token added to be pasted + * to the previous token when the macro is expanded. + * It is an error for a macro to end with a paste token. + */ + public void addPaste(Token tok) + { + /* + * Given: tok0 ## tok1 + * We generate: M_PASTE, tok0, tok1 + * This extends as per a stack language: + * tok0 ## tok1 ## tok2 -> + * M_PASTE, tok0, M_PASTE, tok1, tok2 + */ + this.tokens.Insert(tokens.Count - 1, tok); + } + + internal List getTokens() + { + return tokens; + } + + /* Paste tokens are inserted before the first of the two pasted + * tokens, so it's a kind of bytecode notation. This method + * swaps them around again. We know that there will never be two + * sequential paste tokens, so a bool is sufficient. */ + public String getText() { + StringBuilder buf = new StringBuilder(); + bool paste = false; + for (int i = 0; i < tokens.Count; i++) { + Token tok = tokens[i]; + if (tok.getType() == Token.M_PASTE) { + System.Diagnostics.Debug.Assert(paste == false, "Two sequential pastes."); + paste = true; + continue; + } + else { + buf.Append(tok.getText()); + } + if (paste) { + buf.Append(" #" + "# "); + paste = false; + } + // buf.Append(tokens.get(i)); + } + return buf.ToString(); + } + + override public String ToString() + { + StringBuilder buf = new StringBuilder(name); + if(args != null) { + buf.Append('('); + bool first = true; + foreach(String str in args) { + if(!first) { + buf.Append(", "); + } else { + first = false; + } + buf.Append(str); + } + if(isVariadic()) { + buf.Append("..."); + } + + buf.Append(')'); + } + if(tokens.Count != 0) { + buf.Append(" => ").Append(getText()); + } + return buf.ToString(); + } + + } +} diff --git a/MacroTokenSource.cs b/MacroTokenSource.cs new file mode 100644 index 0000000..dc77eff --- /dev/null +++ b/MacroTokenSource.cs @@ -0,0 +1,197 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; +using System.Text; +using System.Collections.Generic; +using boolean = System.Boolean; +using Debug = System.Diagnostics.Debug; + +namespace CppNet { + +/* This source should always be active, since we don't expand macros + * in any inactive context. */ +internal class MacroTokenSource : Source { + private Macro macro; + private Iterator tokens; /* Pointer into the macro. */ + private List args; /* { unexpanded, expanded } */ + private Iterator arg; /* "current expansion" */ + + internal MacroTokenSource(Macro m, List args) { + this.macro = m; + this.tokens = m.getTokens().iterator(); + this.args = args; + this.arg = null; + } + + override internal boolean isExpanding(Macro m) { + /* When we are expanding an arg, 'this' macro is not + * being expanded, and thus we may re-expand it. */ + if (/* XXX this.arg == null && */ this.macro == m) + return true; + return base.isExpanding(m); + } + + /* XXX Called from Preprocessor [ugly]. */ + internal static void escape(StringBuilder buf, string cs) { + for (int i = 0; i < cs.length(); i++) { + char c = cs.charAt(i); + switch (c) { + case '\\': + buf.append("\\\\"); + break; + case '"': + buf.append("\\\""); + break; + case '\n': + buf.append("\\n"); + break; + case '\r': + buf.append("\\r"); + break; + default: + buf.append(c); + break; + } + } + } + + private void concat(StringBuilder buf, Argument arg) { + Iterator it = arg.iterator(); + while (it.hasNext()) { + Token tok = it.next(); + buf.append(tok.getText()); + } + } + + private Token stringify(Token pos, Argument arg) { + StringBuilder buf = new StringBuilder(); + concat(buf, arg); + // System.out.println("Concat: " + arg + " -> " + buf); + StringBuilder str = new StringBuilder("\""); + escape(str, buf.ToString()); + str.append("\""); + // System.out.println("Escape: " + buf + " -> " + str); + return new Token(Token.STRING, + pos.getLine(), pos.getColumn(), + str.toString(), buf.toString()); + } + + + /* At this point, we have consumed the first M_PASTE. + * @see Macro#addPaste(Token) */ + private void paste(Token ptok) { + StringBuilder buf = new StringBuilder(); + Token err = null; + /* We know here that arg is null or expired, + * since we cannot paste an expanded arg. */ + + int count = 2; + for (int i = 0; i < count; i++) { + if (!tokens.hasNext()) { + /* XXX This one really should throw. */ + error(ptok.getLine(), ptok.getColumn(), + "Paste at end of expansion"); + buf.append(' ').append(ptok.getText()); + break; + } + Token tok = tokens.next(); + // System.out.println("Paste " + tok); + switch (tok.getType()) { + case Token.M_PASTE: + /* One extra to paste, plus one because the + * paste token didn't count. */ + count += 2; + ptok = tok; + break; + case Token.M_ARG: + int idx = (int)tok.getValue(); + concat(buf, args.get(idx)); + break; + /* XXX Test this. */ + case Token.CCOMMENT: + case Token.CPPCOMMENT: + break; + default: + buf.append(tok.getText()); + break; + } + } + + /* Push and re-lex. */ + /* + StringBuilder src = new StringBuilder(); + escape(src, buf); + StringLexerSource sl = new StringLexerSource(src.toString()); + */ + StringLexerSource sl = new StringLexerSource(buf.toString()); + + /* XXX Check that concatenation produces a valid token. */ + + arg = new SourceIterator(sl); + } + + override public Token token() { + for (;;) { + /* Deal with lexed tokens first. */ + + if (arg != null) { + if (arg.hasNext()) { + Token tok2 = arg.next(); + /* XXX PASTE -> INVALID. */ + Debug.Assert(tok2.getType() != Token.M_PASTE, + "Unexpected paste token"); + return tok2; + } + arg = null; + } + + if (!tokens.hasNext()) + return new Token(Token.EOF, -1, -1, ""); /* End of macro. */ + Token tok = tokens.next(); + int idx; + switch (tok.getType()) { + case Token.M_STRING: + /* Use the nonexpanded arg. */ + idx = (int)tok.getValue(); + return stringify(tok, args.get(idx)); + case Token.M_ARG: + /* Expand the arg. */ + idx = (int)tok.getValue(); + // System.out.println("Pushing arg " + args.get(idx)); + arg = args.get(idx).expansion(); + break; + case Token.M_PASTE: + paste(tok); + break; + default: + return tok; + } + } /* for */ + } + + + override public String ToString() { + StringBuilder buf = new StringBuilder(); + buf.Append("expansion of ").Append(macro.getName()); + Source parent = getParent(); + if (parent != null) + buf.Append(" in ").Append(parent); + return buf.ToString(); + } +} + +} \ No newline at end of file diff --git a/Preprocessor.cs b/Preprocessor.cs new file mode 100644 index 0000000..7c9f794 --- /dev/null +++ b/Preprocessor.cs @@ -0,0 +1,2248 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +using System; +using System.Text; +using System.Collections.Generic; +using System.IO; + +namespace CppNet { + + +/** + * A C Preprocessor. + * The Preprocessor outputs a token stream which does not need + * re-lexing for C or C++. Alternatively, the output text may be + * reconstructed by concatenating the {@link Token#getText() text} + * values of the returned {@link Token Tokens}. (See + * {@link CppReader}, which does this.) + */ + + +/* +Source file name and line number information is conveyed by lines of the form + + # linenum filename flags + +These are called linemarkers. They are inserted as needed into +the output (but never within a string or character constant). They +mean that the following line originated in file filename at line +linenum. filename will never contain any non-printing characters; +they are replaced with octal escape sequences. + +After the file name comes zero or more flags, which are `1', `2', +`3', or `4'. If there are multiple flags, spaces separate them. Here +is what the flags mean: + +`1' + This indicates the start of a new file. +`2' + This indicates returning to a file (after having included another + file). +`3' + This indicates that the following text comes from a system header + file, so certain warnings should be suppressed. +`4' + This indicates that the following text should be treated as being + wrapped in an implicit extern "C" block. +*/ + +public class Preprocessor : IDisposable { + private class InternalSource : Source { + public override Token token() + { + throw new LexerException("Cannot read from " + getName()); + } + + internal override String getPath() + { + return ""; + } + + internal override String getName() { + return "internal data"; + } + } + + private static readonly Source INTERNAL = new InternalSource(); + private static readonly Macro __LINE__ = new Macro(INTERNAL, "__LINE__"); + private static readonly Macro __FILE__ = new Macro(INTERNAL, "__FILE__"); + private static readonly Macro __COUNTER__ = new Macro(INTERNAL, "__COUNTER__"); + + private List inputs; + + /* The fundamental engine. */ + private Dictionary macros; + private Stack states; + private Source source; + + /* Miscellaneous support. */ + private int counter; + + /* Support junk to make it work like cpp */ + private List quoteincludepath; /* -iquote */ + private List sysincludepath; /* -I */ + private List frameworkspath; + private Feature features; + private Warning warnings; + private VirtualFileSystem filesystem; + private PreprocessorListener listener; + + private List _importedPaths = new List(); + + public Preprocessor() { + this.inputs = new List(); + + this.macros = new Dictionary(); + macros.Add(__LINE__.getName(), __LINE__); + macros.Add(__FILE__.getName(), __FILE__); + macros.Add(__COUNTER__.getName(), __COUNTER__); + this.states = new Stack(); + states.Push(new State()); + this.source = null; + + this.counter = 0; + + this.quoteincludepath = new List(); + this.sysincludepath = new List(); + this.frameworkspath = new List(); + this.features = Feature.NONE; + this.warnings = Warning.NONE; + this.filesystem = new JavaFileSystem(); + this.listener = null; + } + + public Preprocessor(Source initial) : + this() { + addInput(initial); + } + + /** Equivalent to + * 'new Preprocessor(new {@link FileLexerSource}(file))' + */ + public Preprocessor(FileInfo file) : + this(new FileLexerSource(file)) { + } + + /** + * Sets the VirtualFileSystem used by this Preprocessor. + */ + public void setFileSystem(VirtualFileSystem filesystem) { + this.filesystem = filesystem; + } + + /** + * Returns the VirtualFileSystem used by this Preprocessor. + */ + public VirtualFileSystem getFileSystem() { + return filesystem; + } + + /** + * Sets the PreprocessorListener which handles events for + * this Preprocessor. + * + * The listener is notified of warnings, errors and source + * changes, amongst other things. + */ + public void setListener(PreprocessorListener listener) { + this.listener = listener; + Source s = source; + while (s != null) { + // s.setListener(listener); + s.init(this); + s = s.getParent(); + } + } + + /** + * Returns the PreprocessorListener which handles events for + * this Preprocessor. + */ + public PreprocessorListener getListener() { + return listener; + } + + /** + * Returns the feature-set for this Preprocessor. + * + * This set may be freely modified by user code. + */ + public Feature getFeatures() { + return features; + } + + /** + * Adds a feature to the feature-set of this Preprocessor. + */ + public void addFeature(Feature f) { + features |= f; + } + + /** + * Adds features to the feature-set of this Preprocessor. + */ + public void addFeatures(Feature f) { + features |= f; + } + + /** + * Returns true if the given feature is in + * the feature-set of this Preprocessor. + */ + public bool getFeature(Feature f) { + return (features & f) != Feature.NONE; + } + + /** + * Returns the warning-set for this Preprocessor. + * + * This set may be freely modified by user code. + */ + public Warning getWarnings() { + return warnings; + } + + /** + * Adds a warning to the warning-set of this Preprocessor. + */ + public void addWarning(Warning w) { + warnings |= w; + } + + /** + * Adds warnings to the warning-set of this Preprocessor. + */ + public void addWarnings(Warning w) { + warnings |= w; + } + + /** + * Returns true if the given warning is in + * the warning-set of this Preprocessor. + */ + public bool getWarning(Warning w) { + return (warnings & w) != Warning.NONE; + } + + /** + * Adds input for the Preprocessor. + * + * Inputs are processed in the order in which they are added. + */ + public void addInput(Source source) { + source.init(this); + inputs.Add(source); + } + + /** + * Adds input for the Preprocessor. + * + * @see #addInput(Source) + */ + public void addInput(FileInfo file) { + addInput(new FileLexerSource(file)); + } + + + /** + * Handles an error. + * + * If a PreprocessorListener is installed, it receives the + * error. Otherwise, an exception is thrown. + */ + protected void error(int line, int column, String msg) { + if (listener != null) + listener.handleError(source, line, column, msg); + else + throw new LexerException("Error at " + line + ":" + column + ": " + msg); + } + + /** + * Handles an error. + * + * If a PreprocessorListener is installed, it receives the + * error. Otherwise, an exception is thrown. + * + * @see #error(int, int, String) + */ + protected void error(Token tok, String msg) { + error(tok.getLine(), tok.getColumn(), msg); + } + + /** + * Handles a warning. + * + * If a PreprocessorListener is installed, it receives the + * warning. Otherwise, an exception is thrown. + */ + protected void warning(int line, int column, String msg) { + if (warnings.HasFlag(Warning.ERROR)) + error(line, column, msg); + else if (listener != null) + listener.handleWarning(source, line, column, msg); + else + throw new LexerException("Warning at " + line + ":" + column + ": " + msg); + } + + /** + * Handles a warning. + * + * If a PreprocessorListener is installed, it receives the + * warning. Otherwise, an exception is thrown. + * + * @see #warning(int, int, String) + */ + protected void warning(Token tok, String msg) { + warning(tok.getLine(), tok.getColumn(), msg); + } + + /** + * Adds a Macro to this Preprocessor. + * + * The given {@link Macro} object encapsulates both the name + * and the expansion. + */ + public void addMacro(Macro m) { + // System.out.println("Macro " + m); + String name = m.getName(); + /* Already handled as a source error in macro(). */ + if ("defined" == name) + throw new LexerException("Cannot redefine name 'defined'"); + macros[m.getName()] = m; + } + + /** + * Defines the given name as a macro. + * + * The String value is lexed into a token stream, which is + * used as the macro expansion. + */ + public void addMacro(String name, String value) { + try { + Macro m = new Macro(name); + StringLexerSource s = new StringLexerSource(value); + for (;;) { + Token tok = s.token(); + if(tok.getType() == Token.EOF) + break; + m.addToken(tok); + } + addMacro(m); + } + catch (IOException e) { + throw new LexerException(e); + } + } + + /** + * Defines the given name as a macro, with the value 1. + * + * This is a convnience method, and is equivalent to + * addMacro(name, "1"). + */ + public void addMacro(String name) { + addMacro(name, "1"); + } + + /** + * Sets the user include path used by this Preprocessor. + */ + /* Note for future: Create an IncludeHandler? */ + public void setQuoteIncludePath(List path) { + this.quoteincludepath = path; + } + + /** + * Returns the user include-path of this Preprocessor. + * + * This list may be freely modified by user code. + */ + public List getQuoteIncludePath() { + return quoteincludepath; + } + + /** + * Sets the system include path used by this Preprocessor. + */ + /* Note for future: Create an IncludeHandler? */ + public void setSystemIncludePath(List path) { + this.sysincludepath = path; + } + + /** + * Returns the system include-path of this Preprocessor. + * + * This list may be freely modified by user code. + */ + public List getSystemIncludePath() { + return sysincludepath; + } + + /** + * Sets the Objective-C frameworks path used by this Preprocessor. + */ + /* Note for future: Create an IncludeHandler? */ + public void setFrameworksPath(List path) { + this.frameworkspath = path; + } + + /** + * Returns the Objective-C frameworks path used by this + * Preprocessor. + * + * This list may be freely modified by user code. + */ + public List getFrameworksPath() { + return frameworkspath; + } + + /** + * Returns the Map of Macros parsed during the run of this + * Preprocessor. + */ + public Dictionary getMacros() { + return macros; + } + + /** + * Returns the named macro. + * + * While you can modify the returned object, unexpected things + * might happen if you do. + */ + public Macro getMacro(String name) { + Macro retval; + macros.TryGetValue(name, out retval); + return retval; + } + +/* States */ + + private void push_state() { + State top = states.Peek(); + states.Push(new State(top)); + } + + private void pop_state() { + State s = states.Pop(); + if (states.Count == 0) { + error(0, 0, "#" + "endif without #" + "if"); + states.Push(s); + } + } + + private bool isActive() { + State state = states.Peek(); + return state.isParentActive() && state.isActive(); + } + + +/* Sources */ + + /** + * Returns the top Source on the input stack. + * + * @see Source + * @see #push_source(Source,bool) + * @see #pop_source() + */ + public Source getSource() { + return source; + } + + /** + * Pushes a Source onto the input stack. + * + * @see #getSource() + * @see #pop_source() + */ + protected void push_source(Source source, bool autopop) { + source.init(this); + source.setParent(this.source, autopop); + // source.setListener(listener); + if (listener != null) + listener.handleSourceChange(this.source, "suspend"); + this.source = source; + if (listener != null) + listener.handleSourceChange(this.source, "push"); + } + + /** + * Pops a Source from the input stack. + * + * @see #getSource() + * @see #push_source(Source,bool) + */ + protected void pop_source() { + if (listener != null) + listener.handleSourceChange(this.source, "pop"); + Source s = this.source; + this.source = s.getParent(); + /* Always a noop unless called externally. */ + s.close(); + if (listener != null && this.source != null) + listener.handleSourceChange(this.source, "resume"); + } + + +/* Source tokens */ + + private Token _source_token; + + /* XXX Make this include the Token.NL, and make all cpp directives eat + * their own Token.NL. */ + private Token line_token(int line, String name, String extra) { + StringBuilder buf = new StringBuilder(); + buf.Append("#line ").Append(line) + .Append(" \""); + /* XXX This call to escape(name) is correct but ugly. */ + MacroTokenSource.escape(buf, name); + buf.Append("\"").Append(extra).Append("\n"); + return new Token(Token.P_LINE, line, 0, buf.ToString(), null); + } + + private Token source_token() { + if(_source_token != null) { + Token tok = _source_token; + _source_token = null; + if (getFeature(Feature.DEBUG)) + System.Console.Error.WriteLine("Returning unget token " + tok); + return tok; + } + + for (;;) { + Source s = getSource(); + if (s == null) { + if (inputs.Count == 0) + return new Token(Token.EOF); + Source t = inputs[0]; + inputs.RemoveAt(0); + push_source(t, true); + if (getFeature(Feature.LINEMARKERS)) + return line_token(t.getLine(), t.getName(), " 1"); + continue; + } + Token tok = s.token(); + /* XXX Refactor with skipline() */ + if(tok.getType() == Token.EOF && s.isAutopop()) { + // System.out.println("Autopop " + s); + pop_source(); + Source t = getSource(); + if (getFeature(Feature.LINEMARKERS) + && s.isNumbered() + && t != null) { + /* We actually want 'did the nested source + * contain a newline token', which isNumbered() + * approximates. This is not perfect, but works. */ + return line_token(t.getLine() + 1, t.getName(), " 2"); + } + continue; + } + if (getFeature(Feature.DEBUG)) + System.Console.Error.WriteLine("Returning fresh token " + tok); + return tok; + } + } + + private void source_untoken(Token tok) { + if (this._source_token != null) + throw new InvalidOperationException("Cannot return two tokens"); + this._source_token = tok; + } + + private bool isWhite(Token tok) { + int type = tok.getType(); + return (type == Token.WHITESPACE) + || (type == Token.CCOMMENT) + || (type == Token.CPPCOMMENT); + } + + private Token source_token_nonwhite() { + Token tok; + do { + tok = source_token(); + } while (isWhite(tok)); + return tok; + } + + /** + * Returns an Token.NL or an Token.EOF token. + * + * The metadata on the token will be correct, which is better + * than generating a new one. + * + * This method can, as of recent patches, return a P_LINE token. + */ + private Token source_skipline(bool white) { + // (new Exception("skipping line")).printStackTrace(System.out); + Source s = getSource(); + Token tok = s.skipline(white); + /* XXX Refactor with source_token() */ + if (tok.getType() == Token.EOF && s.isAutopop()) { + // System.out.println("Autopop " + s); + pop_source(); + Source t = getSource(); + if (getFeature(Feature.LINEMARKERS) + && s.isNumbered() + && t != null) { + /* We actually want 'did the nested source + * contain a newline token', which isNumbered() + * approximates. This is not perfect, but works. */ + return line_token(t.getLine() + 1, t.getName(), " 2"); + } + } + return tok; + } + + /* processes and expands a macro. */ + private bool macro(Macro m, Token orig) { + Token tok; + List args; + + // System.out.println("pp: expanding " + m); + + if (m.isFunctionLike()) { + for (;;) { + tok = source_token(); + // System.out.println("pp: open: token is " + tok); + switch (tok.getType()) { + case Token.WHITESPACE: /* XXX Really? */ + case Token.CCOMMENT: + case Token.CPPCOMMENT: + case Token.NL: + break; /* continue */ + case '(': + goto BREAK_OPEN; + default: + source_untoken(tok); + return false; + } + } + BREAK_OPEN: + + // tok = expanded_token_nonwhite(); + tok = source_token_nonwhite(); + + /* We either have, or we should have args. + * This deals elegantly with the case that we have + * one empty arg. */ + if (tok.getType() != ')' || m.getArgs() > 0) { + args = new List(); + + Argument arg = new Argument(); + int depth = 0; + bool space = false; + + ARGS: for (;;) { + // System.out.println("pp: arg: token is " + tok); + switch (tok.getType()) { + case Token.EOF: + error(tok, "EOF in macro args"); + return false; + + case ',': + if (depth == 0) { + if (m.isVariadic() && + /* We are building the last arg. */ + args.Count == m.getArgs() - 1) { + /* Just add the comma. */ + arg.addToken(tok); + } + else { + args.Add(arg); + arg = new Argument(); + } + } + else { + arg.addToken(tok); + } + space = false; + break; + case ')': + if (depth == 0) { + args.Add(arg); + goto BREAK_ARGS; + } + else { + depth--; + arg.addToken(tok); + } + space = false; + break; + case '(': + depth++; + arg.addToken(tok); + space = false; + break; + + case Token.WHITESPACE: + case Token.CCOMMENT: + case Token.CPPCOMMENT: + /* Avoid duplicating spaces. */ + space = true; + break; + + default: + /* Do not put space on the beginning of + * an argument token. */ + if (space && arg.Count != 0) + arg.addToken(Token.space); + arg.addToken(tok); + space = false; + break; + + } + // tok = expanded_token(); + tok = source_token(); + } + BREAK_ARGS: + + if(m.isVariadic() && args.Count < m.getArgs()) { + args.Add(new Argument()); + } + /* space may still be true here, thus trailing space + * is stripped from arguments. */ + + if (args.Count != m.getArgs()) { + error(tok, + "macro " + m.getName() + + " has " + m.getArgs() + " parameters " + + "but given " + args.Count + " args"); + /* We could replay the arg tokens, but I + * note that GNU cpp does exactly what we do, + * i.e. output the macro name and chew the args. + */ + return false; + } + + /* + for (Argument a : args) + a.expand(this); + */ + + for (int i = 0; i < args.Count; i++) { + args[i].expand(this); + } + + // System.out.println("Macro " + m + " args " + args); + } + else { + /* nargs == 0 and we (correctly) got () */ + args = null; + } + + } + else { + /* Macro without args. */ + args = null; + } + + if (m == __LINE__) { + push_source(new FixedTokenSource( + new Token[] { new Token(Token.INTEGER, + orig.getLine(), orig.getColumn(), + orig.getLine().ToString(), + orig.getLine()) } + ), true); + } + else if (m == __FILE__) { + StringBuilder buf = new StringBuilder("\""); + String name = getSource().getName(); + if (name == null) + name = ""; + for (int i = 0; i < name.Length; i++) { + char c = name[i]; + switch (c) { + case '\\': + buf.Append("\\\\"); + break; + case '"': + buf.Append("\\\""); + break; + default: + buf.Append(c); + break; + } + } + buf.Append("\""); + String text = buf.ToString(); + push_source(new FixedTokenSource( + new Token[] { new Token(Token.STRING, + orig.getLine(), orig.getColumn(), + text, text) } + ), true); + } + else if (m == __COUNTER__) { + /* This could equivalently have been done by adding + * a special Macro subclass which overrides getTokens(). */ + int value = this.counter++; + push_source(new FixedTokenSource( + new Token[] { new Token(Token.INTEGER, + orig.getLine(), orig.getColumn(), + value.ToString(), + value) } + ), true); + } + else { + push_source(new MacroTokenSource(m, args), true); + } + + return true; + } + + /** + * Expands an argument. + */ + /* I'd rather this were done lazily, but doing so breaks spec. */ + internal List expand(List arg) { + List expansion = new List(); + bool space = false; + + push_source(new FixedTokenSource(arg), false); + + for (;;) { + Token tok = expanded_token(); + switch (tok.getType()) { + case Token.EOF: + goto BREAK_EXPANSION; + + case Token.WHITESPACE: + case Token.CCOMMENT: + case Token.CPPCOMMENT: + space = true; + break; + + default: + if (space && expansion.Count != 0) + expansion.Add(Token.space); + expansion.Add(tok); + space = false; + break; + } + } + BREAK_EXPANSION: + + pop_source(); + + return expansion; + } + + /* processes a #define directive */ + private Token define() { + Token tok = source_token_nonwhite(); + if (tok.getType() != Token.IDENTIFIER) { + error(tok, "Expected Token.IDENTIFIER"); + return source_skipline(false); + } + /* if predefined */ + + String name = tok.getText(); + if ("defined" == name) { + error(tok, "Cannot redefine name 'defined'"); + return source_skipline(false); + } + + Macro m = new Macro(getSource(), name); + List args; + + tok = source_token(); + if (tok.getType() == '(') { + tok = source_token_nonwhite(); + if (tok.getType() != ')') { + args = new List(); + for (;;) { + switch (tok.getType()) { + case Token.IDENTIFIER: + if(m.isVariadic()) { + throw new Exception(); + } + args.Add(tok.getText()); + break; + case Token.ELLIPSIS: + m.setVariadic(true); + args.Add("__VA_ARGS__"); + break; + case Token.NL: + case Token.EOF: + error(tok, + "Unterminated macro parameter list"); + return tok; + default: + error(tok, + "error in macro parameters: " + + tok.getText()); + return source_skipline(false); + } + tok = source_token_nonwhite(); + switch (tok.getType()) { + case ',': + break; + case Token.ELLIPSIS: + tok = source_token_nonwhite(); + if (tok.getType() != ')') + error(tok, + "ellipsis must be on last argument"); + m.setVariadic(true); + goto BREAK_ARGS; + case ')': + goto BREAK_ARGS; + + case Token.NL: + case Token.EOF: + /* Do not skip line. */ + error(tok, + "Unterminated macro parameters"); + return tok; + default: + error(tok, + "Bad token in macro parameters: " + + tok.getText()); + return source_skipline(false); + } + tok = source_token_nonwhite(); + } + BREAK_ARGS:; + } + else { + System.Diagnostics.Debug.Assert(tok.getType() == ')', "Expected ')'"); + args = new List(); + } + + m.setArgs(args); + } + else { + /* For searching. */ + args = new List(); + source_untoken(tok); + } + + /* Get an expansion for the macro, using IndexOf. */ + bool space = false; + bool paste = false; + int idx; + + /* Ensure no space at start. */ + tok = source_token_nonwhite(); + for (;;) { + switch (tok.getType()) { + case Token.EOF: + goto BREAK_EXPANSION; + case Token.NL: + goto BREAK_EXPANSION; + + case Token.CCOMMENT: + case Token.CPPCOMMENT: + /* XXX This is where we implement GNU's cpp -CC. */ + // break; + case Token.WHITESPACE: + if (!paste) + space = true; + break; + + /* Paste. */ + case Token.PASTE: + space = false; + paste = true; + m.addPaste(new Token(Token.M_PASTE, + tok.getLine(), tok.getColumn(), + "#" + "#", null)); + break; + + /* Stringify. */ + case '#': + if (space) + m.addToken(Token.space); + space = false; + Token la = source_token_nonwhite(); + if(la.getType() == Token.IDENTIFIER && + ((idx = args.IndexOf(la.getText())) != -1)) { + m.addToken(new Token(Token.M_STRING, + la.getLine(), la.getColumn(), + "#" + la.getText(), + idx)); + } + else { + m.addToken(tok); + /* Allow for special processing. */ + source_untoken(la); + } + break; + + case Token.IDENTIFIER: + if (space) + m.addToken(Token.space); + space = false; + paste = false; + idx = args.IndexOf(tok.getText()); + if (idx == -1) + m.addToken(tok); + else + m.addToken(new Token(Token.M_ARG, + tok.getLine(), tok.getColumn(), + tok.getText(), + idx)); + break; + + default: + if (space) + m.addToken(Token.space); + space = false; + paste = false; + m.addToken(tok); + break; + } + tok = source_token(); + } + BREAK_EXPANSION: + + if (getFeature(Feature.DEBUG)) + System.Console.Error.WriteLine("Defined macro " + m); + addMacro(m); + + return tok; /* Token.NL or Token.EOF. */ + } + + private Token undef() { + Token tok = source_token_nonwhite(); + if (tok.getType() != Token.IDENTIFIER) { + error(tok, + "Expected identifier, not " + tok.getText()); + if(tok.getType() == Token.NL || tok.getType() == Token.EOF) + return tok; + } + else { + Macro m; + macros.TryGetValue(tok.getText(), out m); + if (m != null) { + /* XXX error if predefined */ + macros.Remove(m.getName()); + } + } + return source_skipline(true); + } + + /** + * Attempts to include the given file. + * + * User code may override this method to implement a virtual + * file system. + */ + private bool include(VirtualFile file, bool isImport, bool checkOnly) { + // System.out.println("Try to include " + file); + if (!file.isFile()) + return false; + + if(!checkOnly) { + if(isImport) { + if(_importedPaths.Contains(file.getPath())) { + return true; + } + + _importedPaths.Add(file.getPath()); + } + + if(getFeature(Feature.DEBUG)) + System.Console.WriteLine("pp: including " + file); + + push_source(file.getSource(), true); + } + return true; + } + + /** + * Includes a file from an include path, by name. + */ + private bool include(IEnumerable path, String name, bool isImport, bool checkOnly) { + foreach (String dir in path) { + VirtualFile file = filesystem.getFile(dir, name); + if (include(file, isImport, checkOnly)) + return true; + } + return false; + } + + private bool includeFramework(IEnumerable path, string name, bool isImport, bool checkOnly) + { + string[] framework = name.Split(new char[] { '/' }, 2); + if(framework.Length < 2) { + return false; + } + name = Path.Combine(Path.Combine(framework[0] + ".framework", "Headers"), framework[1]); + + foreach(String dir in path) { + VirtualFile file = filesystem.getFile(dir, name); + if(include(file, isImport, checkOnly)) + return true; + } + return false; + + } + + /** + * Handles an include directive. + */ + private bool include(String parent, int line, String name, bool quoted, bool isImport, bool checkOnly) { + VirtualFile pdir = null; + if (quoted) { + VirtualFile pfile = filesystem.getFile(parent); + pdir = pfile.getParentFile(); + VirtualFile ifile = pdir.getChildFile(name); + if(include(ifile, isImport, checkOnly)) + return true; + if(include(quoteincludepath, name, isImport, checkOnly)) + return true; + } + + if(include(sysincludepath, name, isImport, checkOnly)) + return true; + + if(includeFramework(frameworkspath, name, isImport, checkOnly)) { + return true; + } + if(checkOnly) { + return false; + } + + StringBuilder buf = new StringBuilder(); + buf.Append("File not found: ").Append(name); + buf.Append(" in"); + if (quoted) { + buf.Append(" .").Append('(').Append(pdir).Append(')'); + foreach (String dir in quoteincludepath) + buf.Append(" ").Append(dir); + } + foreach (String dir in sysincludepath) + buf.Append(" ").Append(dir); + error(line, 0, buf.ToString()); + return false; + } + + private bool has_feature() { + Token tok; + tok = token_nonwhite(); + if(tok.getType() != '(') { + throw new Exception(); + } + tok = token_nonwhite(); + string feature = tok.getText(); + + tok = token_nonwhite(); + if(tok.getType() != ')') { + throw new Exception(); + } + switch(feature) { + + case "address_sanitizer": return true; //, LangOpts.Sanitize.Address) + case "attribute_analyzer_noreturn": return true; + case "attribute_availability": return true; + case "attribute_availability_with_message": return true; + case "attribute_cf_returns_not_retained": return true; + case "attribute_cf_returns_retained": return true; + case "attribute_deprecated_with_message": return true; + case "attribute_ext_vector_type": return true; + case "attribute_ns_returns_not_retained": return true; + case "attribute_ns_returns_retained": return true; + case "attribute_ns_consumes_self": return true; + case "attribute_ns_consumed": return true; + case "attribute_cf_consumed": return true; + case "attribute_objc_ivar_unused": return true; + case "attribute_objc_method_family": return true; + case "attribute_overloadable": return true; + case "attribute_unavailable_with_message": return true; + case "attribute_unused_on_fields": return true; + case "blocks": return true; //, LangOpts.Blocks) + case "c_thread_safety_attributes": return true; + case "cxx_exceptions": return true; //, LangOpts.CXXExceptions) + case "cxx_rtti": return true; //, LangOpts.RTTI) + case "enumerator_attributes": return true; + case "memory_sanitizer": return true; //, LangOpts.Sanitize.Memory) + case "thread_sanitizer": return true; //, LangOpts.Sanitize.Thread) + case "dataflow_sanitizer": return true; //, LangOpts.Sanitize.DataFlow) + + case "objc_arr": return true; //, LangOpts.ObjCAutoRefCount) // FIXME: REMOVE? + case "objc_arc": return true; //, LangOpts.ObjCAutoRefCount) + case "objc_arc_weak": return true; //, LangOpts.ObjCARCWeak) + case "objc_default_synthesize_properties": return true; //, LangOpts.ObjC2) + case "objc_fixed_enum": return true; //, LangOpts.ObjC2) + case "objc_instancetype": return true; //, LangOpts.ObjC2) + case "objc_modules": return true; //, LangOpts.ObjC2 && LangOpts.Modules) + case "objc_nonfragile_abi": return true; //, LangOpts.ObjCRuntime.isNonFragile()) + case "objc_property_explicit_atomic": return true; // Does clang support explicit "atomic" keyword? + case "objc_protocol_qualifier_mangling": return true; + case "objc_weak_class": return true; //, LangOpts.ObjCRuntime.hasWeakClassImport()) + case "ownership_holds": return true; + case "ownership_returns": return true; + case "ownership_takes": return true; + case "objc_bool": return true; + case "objc_subscripting": return true; //, LangOpts.ObjCRuntime.isNonFragile()) + case "objc_array_literals": return true; //, LangOpts.ObjC2) + case "objc_dictionary_literals": return true; //, LangOpts.ObjC2) + case "objc_boxed_expressions": return true; //, LangOpts.ObjC2) + case "arc_cf_code_audited": return true; + // C11 features + case "c_alignas": return true; //, LangOpts.C11) + case "c_atomic": return true; //, LangOpts.C11) + case "c_generic_selections": return true; //, LangOpts.C11) + case "c_static_assert": return true; //, LangOpts.C11) + case "c_thread_local": return true; // LangOpts.C11 && PP.getTargetInfo().isTLSSupported()) + // C++11 features + case "cxx_access_control_sfinae": return true; //, LangOpts.CPlusPlus11) + case "cxx_alias_templates": return true; //, LangOpts.CPlusPlus11) + case "cxx_alignas": return true; //, LangOpts.CPlusPlus11) + case "cxx_atomic": return true; //, LangOpts.CPlusPlus11) + case "cxx_attributes": return true; //, LangOpts.CPlusPlus11) + case "cxx_auto_type": return true; //, LangOpts.CPlusPlus11) + case "cxx_constexpr": return true; //, LangOpts.CPlusPlus11) + case "cxx_decltype": return true; //, LangOpts.CPlusPlus11) + case "cxx_decltype_incomplete_return_types": return true; //, LangOpts.CPlusPlus11) + case "cxx_default_function_template_args": return true; //, LangOpts.CPlusPlus11) + case "cxx_defaulted_functions": return true; //, LangOpts.CPlusPlus11) + case "cxx_delegating_constructors": return true; //, LangOpts.CPlusPlus11) + case "cxx_deleted_functions": return true; //, LangOpts.CPlusPlus11) + case "cxx_explicit_conversions": return true; //, LangOpts.CPlusPlus11) + case "cxx_generalized_initializers": return true; //, LangOpts.CPlusPlus11) + case "cxx_implicit_moves": return true; //, LangOpts.CPlusPlus11) + case "cxx_inheriting_constructors": return true; //, LangOpts.CPlusPlus11) + case "cxx_inline_namespaces": return true; //, LangOpts.CPlusPlus11) + case "cxx_lambdas": return true; //, LangOpts.CPlusPlus11) + case "cxx_local_type_template_args": return true; //, LangOpts.CPlusPlus11) + case "cxx_nonstatic_member_init": return true; //, LangOpts.CPlusPlus11) + case "cxx_noexcept": return true; //, LangOpts.CPlusPlus11) + case "cxx_nullptr": return true; //, LangOpts.CPlusPlus11) + case "cxx_override_control": return true; //, LangOpts.CPlusPlus11) + case "cxx_range_for": return true; //, LangOpts.CPlusPlus11) + case "cxx_raw_string_literals": return true; //, LangOpts.CPlusPlus11) + case "cxx_reference_qualified_functions": return true; //, LangOpts.CPlusPlus11) + case "cxx_rvalue_references": return true; //, LangOpts.CPlusPlus11) + case "cxx_strong_enums": return true; //, LangOpts.CPlusPlus11) + case "cxx_static_assert": return true; //, LangOpts.CPlusPlus11) + case "cxx_thread_local": return true; //LangOpts.CPlusPlus11 && PP.getTargetInfo().isTLSSupported()) + case "cxx_trailing_return": return true; //, LangOpts.CPlusPlus11) + case "cxx_unicode_literals": return true; //, LangOpts.CPlusPlus11) + case "cxx_unrestricted_unions": return true; //, LangOpts.CPlusPlus11) + case "cxx_user_literals": return true; //, LangOpts.CPlusPlus11) + case "cxx_variadic_templates": return true; //, LangOpts.CPlusPlus11) + // C++1y features + case "cxx_aggregate_nsdmi": return true; //, LangOpts.CPlusPlus1y) + case "cxx_binary_literals": return true; //, LangOpts.CPlusPlus1y) + case "cxx_contextual_conversions": return true; //, LangOpts.CPlusPlus1y) + case "cxx_decltype_auto": return true; //, LangOpts.CPlusPlus1y) + case "cxx_generic_lambdas": return true; //, LangOpts.CPlusPlus1y) + case "cxx_init_captures": return true; //, LangOpts.CPlusPlus1y) + case "cxx_relaxed_constexpr": return true; //, LangOpts.CPlusPlus1y) + case "cxx_return_type_deduction": return true; //, LangOpts.CPlusPlus1y) + case "cxx_variable_templates": return true; //, LangOpts.CPlusPlus1y) + case "has_nothrow_assign": return true; //, LangOpts.CPlusPlus) + case "has_nothrow_copy": return true; //, LangOpts.CPlusPlus) + case "has_nothrow_constructor": return true; //, LangOpts.CPlusPlus) + case "has_trivial_assign": return true; //, LangOpts.CPlusPlus) + case "has_trivial_copy": return true; //, LangOpts.CPlusPlus) + case "has_trivial_constructor": return true; //, LangOpts.CPlusPlus) + case "has_trivial_destructor": return true; //, LangOpts.CPlusPlus) + case "has_virtual_destructor": return true; //, LangOpts.CPlusPlus) + case "is_abstract": return true; //, LangOpts.CPlusPlus) + case "is_base_of": return true; //, LangOpts.CPlusPlus) + case "is_class": return true; //, LangOpts.CPlusPlus) + case "is_constructible": return true; //, LangOpts.CPlusPlus) + case "is_convertible_to": return true; //, LangOpts.CPlusPlus) + case "is_empty": return true; //, LangOpts.CPlusPlus) + case "is_enum": return true; //, LangOpts.CPlusPlus) + case "is_final": return true; //, LangOpts.CPlusPlus) + case "is_literal": return true; //, LangOpts.CPlusPlus) + case "is_standard_layout": return true; //, LangOpts.CPlusPlus) + case "is_pod": return true; //, LangOpts.CPlusPlus) + case "is_polymorphic": return true; //, LangOpts.CPlusPlus) + case "is_sealed": return true; //, LangOpts.MicrosoftExt) + case "is_trivial": return true; //, LangOpts.CPlusPlus) + case "is_trivially_assignable": return true; //, LangOpts.CPlusPlus) + case "is_trivially_constructible": return true; //, LangOpts.CPlusPlus) + case "is_trivially_copyable": return true; //, LangOpts.CPlusPlus) + case "is_union": return true; //, LangOpts.CPlusPlus) + case "modules": return true; //, LangOpts.Modules) + case "tls": return true; // PP.getTargetInfo().isTLSSupported()) + case "underlying_type": return true; //, LangOpts.CPlusPlus) + default: + return false; + } + + + + } + + private bool has_include(bool next) + { + LexerSource lexer = (LexerSource)source; + string name; + bool quoted; + + Token tok; + tok = token_nonwhite(); + if(tok.getType() != '(') { + throw new Exception(); + } + + lexer.setInclude(true); + tok = token_nonwhite(); + + if(tok.getType() == Token.STRING) { + /* XXX Use the original text, not the value. + * Backslashes must not be treated as escapes here. */ + StringBuilder buf = new StringBuilder((String)tok.getValue()); + for(; ; ) { + tok = token_nonwhite(); + switch(tok.getType()) { + case Token.STRING: + buf.Append((String)tok.getValue()); + break; + case Token.NL: + case Token.EOF: + goto BREAK_HEADER; + default: + warning(tok, + "Unexpected token on #" + "include line"); + return false; + } + } + BREAK_HEADER: + name = buf.ToString(); + quoted = true; + } else if(tok.getType() == Token.HEADER) { + name = (String)tok.getValue(); + quoted = false; + } else { + throw new Exception(); + } + + tok = token_nonwhite(); + if(tok.getType() != ')') { + throw new Exception(); + } + + return include(source.getPath(), tok.getLine(), name, quoted, false, true); + } + + private Token include(bool next, bool isImport) { + LexerSource lexer = (LexerSource)source; + try { + lexer.setInclude(true); + Token tok = token_nonwhite(); + + String name; + bool quoted; + + if(tok.getType() == Token.STRING) { + /* XXX Use the original text, not the value. + * Backslashes must not be treated as escapes here. */ + StringBuilder buf = new StringBuilder((String)tok.getValue()); + for (;;) { + tok = token_nonwhite(); + switch (tok.getType()) { + case Token.STRING: + buf.Append((String)tok.getValue()); + break; + case Token.NL: + case Token.EOF: + goto BREAK_HEADER; + default: + warning(tok, + "Unexpected token on #"+"include line"); + return source_skipline(false); + } + } + BREAK_HEADER: + name = buf.ToString(); + quoted = true; + } else if(tok.getType() == Token.HEADER) { + name = (String)tok.getValue(); + quoted = false; + tok = source_skipline(true); + } + else { + error(tok, + "Expected string or header, not " + tok.getText()); + switch (tok.getType()) { + case Token.NL: + case Token.EOF: + return tok; + default: + /* Only if not a Token.NL or Token.EOF already. */ + return source_skipline(false); + } + } + + /* Do the inclusion. */ + include(source.getPath(), tok.getLine(), name, quoted, isImport, false); + + /* 'tok' is the 'nl' after the include. We use it after the + * #line directive. */ + if (getFeature(Feature.LINEMARKERS)) + return line_token(1, source.getName(), " 1"); + return tok; + } + finally { + lexer.setInclude(false); + } + } + + protected void pragma(Token name, List value) { + warning(name, "Unknown #" + "pragma: " + name.getText()); + } + + private Token pragma() { + Token name; + + for (;;) { + Token tok = token(); + switch (tok.getType()) { + case Token.EOF: + /* There ought to be a newline before Token.EOF. + * At least, in any skipline context. */ + /* XXX Are we sure about this? */ + warning(tok, + "End of file in #" + "pragma"); + return tok; + case Token.NL: + /* This may contain one or more newlines. */ + warning(tok, + "Empty #" + "pragma"); + return tok; + case Token.CCOMMENT: + case Token.CPPCOMMENT: + case Token.WHITESPACE: + continue; + case Token.IDENTIFIER: + name = tok; + goto BREAK_NAME; + default: + return source_skipline(false); + } + } + BREAK_NAME: + + Token tok2; + List value = new List(); + for (;;) { + tok2 = token(); + switch (tok2.getType()) { + case Token.EOF: + /* There ought to be a newline before Token.EOF. + * At least, in any skipline context. */ + /* XXX Are we sure about this? */ + warning(tok2, + "End of file in #" + "pragma"); + goto BREAK_VALUE; + case Token.NL: + /* This may contain one or more newlines. */ + goto BREAK_VALUE; + case Token.CCOMMENT: + case Token.CPPCOMMENT: + break; + case Token.WHITESPACE: + value.Add(tok2); + break; + default: + value.Add(tok2); + break; + } + } + BREAK_VALUE: + + pragma(name, value); + + return tok2; /* The Token.NL. */ + } + + /* For #error and #warning. */ + private void error(Token pptok, bool is_error) { + StringBuilder buf = new StringBuilder(); + buf.Append('#').Append(pptok.getText()).Append(' '); + /* Peculiar construction to ditch first whitespace. */ + Token tok = source_token_nonwhite(); + for (;;) { + switch (tok.getType()) { + case Token.NL: + case Token.EOF: + goto BREAK_ERROR; + default: + buf.Append(tok.getText()); + break; + } + tok = source_token(); + } + BREAK_ERROR: + if (is_error) + error(pptok, buf.ToString()); + else + warning(pptok, buf.ToString()); + } + + + + + /* This bypasses token() for #elif expressions. + * If we don't do this, then isActive() == false + * causes token() to simply chew the entire input line. */ + private Token expanded_token() { + for (;;) { + Token tok = source_token(); + // System.out.println("Source token is " + tok); + if (tok.getType() == Token.IDENTIFIER) { + Macro m; + macros.TryGetValue(tok.getText(), out m); + if (m == null) + return tok; + if (source.isExpanding(m)) + return tok; + if (macro(m, tok)) + continue; + } + return tok; + } + } + + private Token expanded_token_nonwhite() { + Token tok; + do { + tok = expanded_token(); + // System.out.println("expanded token is " + tok); + } while (isWhite(tok)); + return tok; + } + + + private Token _expr_token = null; + + private Token expr_token() { + Token tok = _expr_token; + + if (tok != null) { + // System.out.println("ungetting"); + _expr_token = null; + } + else { + tok = expanded_token_nonwhite(); + // System.out.println("expt is " + tok); + + if (tok.getType() == Token.IDENTIFIER && + tok.getText() == "defined") { + Token la = source_token_nonwhite(); + bool paren = false; + if (la.getType() == '(') { + paren = true; + la = source_token_nonwhite(); + } + + // System.out.println("Core token is " + la); + + if (la.getType() != Token.IDENTIFIER) { + error(la, + "defined() needs identifier, not " + + la.getText()); + tok = new Token(Token.INTEGER, + la.getLine(), la.getColumn(), + "0", 0); + } + else if (macros.ContainsKey(la.getText())) { + // System.out.println("Found macro"); + tok = new Token(Token.INTEGER, + la.getLine(), la.getColumn(), + "1", 1); + } else if(la.getText() == "__has_include_next" || la.getText() == "__has_include" || la.getText() == "__has_feature") { + tok = new Token(Token.INTEGER, + la.getLine(), la.getColumn(), + "1", 1); + } else { + // System.out.println("Not found macro"); + tok = new Token(Token.INTEGER, + la.getLine(), la.getColumn(), + "0", 0); + } + + if (paren) { + la = source_token_nonwhite(); + if (la.getType() != ')') { + expr_untoken(la); + error(la, "Missing ) in defined()"); + } + } + } + } + + // System.out.println("expr_token returns " + tok); + + return tok; + } + + private void expr_untoken(Token tok) { + if(_expr_token != null) + throw new Exception ( + "Cannot unget two expression tokens." + ); + _expr_token = tok; + } + + private int expr_priority(Token op) { + switch (op.getType()) { + case '/': return 11; + case '%': return 11; + case '*': return 11; + case '+': return 10; + case '-': return 10; + case Token.LSH: return 9; + case Token.RSH: return 9; + case '<': return 8; + case '>': return 8; + case Token.LE: return 8; + case Token.GE: return 8; + case Token.EQ: return 7; + case Token.NE: return 7; + case '&': return 6; + case '^': return 5; + case '|': return 4; + case Token.LAND: return 3; + case Token.LOR: return 2; + case '?': return 1; + default: + // System.out.println("Unrecognised operator " + op); + return 0; + } + } + + private long expr(int priority) { + /* + System.out.flush(); + (new Exception("expr(" + priority + ") called")).printStackTrace(); + System.err.flush(); + */ + + Token tok = expr_token(); + long lhs, rhs; + + // System.out.println("Expr lhs token is " + tok); + + switch (tok.getType()) { + case '(': + lhs = expr(0); + tok = expr_token(); + if (tok.getType() != ')') { + expr_untoken(tok); + error(tok, "missing ) in expression"); + return 0; + } + break; + + case '~': lhs = ~expr(11); break; + case '!': lhs = expr(11) == 0 ? 1 : 0; break; + case '-': lhs = -expr(11); break; + case Token.INTEGER: + lhs = Convert.ToInt64(tok.getValue()); + break; + case Token.CHARACTER: + lhs = (long)((char)tok.getValue()); + break; + case Token.IDENTIFIER: + if(tok.getText() == "__has_include_next") { + lhs = has_include(true) ? 1 : 0; + } else if(tok.getText() == "__has_include") { + lhs = has_include(false) ? 1 : 0; + } else if(tok.getText() == "__has_feature") { + lhs = has_feature() ? 1 : 0; + + + } else { + if(warnings.HasFlag(Warning.UNDEF)) { + warning(tok, "Undefined token '" + tok.getText() + + "' encountered in conditional."); + } + lhs = 0; + } + break; + + default: + expr_untoken(tok); + error(tok, + "Bad token in expression: " + tok.getText()); + return 0; + } + + for (;;) { + // System.out.println("expr: lhs is " + lhs + ", pri = " + priority); + Token op = expr_token(); + int pri = expr_priority(op); /* 0 if not a binop. */ + if (pri == 0 || priority >= pri) { + expr_untoken(op); + goto BREAK_EXPR; + } + rhs = expr(pri); + // System.out.println("rhs token is " + rhs); + switch (op.getType()) { + case '/': + if (rhs == 0) { + error(op, "Division by zero"); + lhs = 0; + } + else { + lhs = lhs / rhs; + } + break; + case '%': + if (rhs == 0) { + error(op, "Modulus by zero"); + lhs = 0; + } + else { + lhs = lhs % rhs; + } + break; + case '*': lhs = lhs * rhs; break; + case '+': lhs = lhs + rhs; break; + case '-': lhs = lhs - rhs; break; + case '<': lhs = lhs < rhs ? 1 : 0; break; + case '>': lhs = lhs > rhs ? 1 : 0; break; + case '&': lhs = lhs & rhs; break; + case '^': lhs = lhs ^ rhs; break; + case '|': lhs = lhs | rhs; break; + + case Token.LSH: lhs = lhs << (int)rhs; break; + case Token.RSH: lhs = lhs >> (int)rhs; break; + case Token.LE: lhs = lhs <= rhs ? 1 : 0; break; + case Token.GE: lhs = lhs >= rhs ? 1 : 0; break; + case Token.EQ: lhs = lhs == rhs ? 1 : 0; break; + case Token.NE: lhs = lhs != rhs ? 1 : 0; break; + case Token.LAND: lhs = (lhs != 0) && (rhs != 0) ? 1 : 0; break; + case Token.LOR: lhs = (lhs != 0) || (rhs != 0) ? 1 : 0; break; + + case '?': + Token colon = expr_token(); + if(colon.getText() != ":") { + throw new Exception(); + } + long rrhs = expr(0); + if(lhs == 1) { + lhs = rhs; + } else { + lhs = rrhs; + } + break; + + default: + error(op, + "Unexpected operator " + op.getText()); + return 0; + + } + } + BREAK_EXPR: + /* + System.out.flush(); + (new Exception("expr returning " + lhs)).printStackTrace(); + System.err.flush(); + */ + // System.out.println("expr returning " + lhs); + + return lhs; + } + + private Token toWhitespace(Token tok) { + String text = tok.getText(); + int len = text.Length; + bool cr = false; + int nls = 0; + + for (int i = 0; i < len; i++) { + char c = text[i]; + + switch (c) { + case '\r': + cr = true; + nls++; + break; + case '\n': + if (cr) { + cr = false; + break; + } + goto case '\u2028'; + /* fallthrough */ + case '\u2028': + case '\u2029': + case '\u000B': + case '\u000C': + case '\u0085': + cr = false; + nls++; + break; + } + } + + char[] cbuf = new char[nls]; + for(int i = 0; i < nls; i++) { cbuf[i] = '\n'; } + + return new Token(Token.WHITESPACE, + tok.getLine(), tok.getColumn(), + new String(cbuf)); + } + + private Token _token() { + + SKIP_TOKEN: + for (;;) { + Token tok; + if (!isActive()) { + try { + /* XXX Tell lexer to ignore warnings. */ + source.setActive(false); + tok = source_token(); + } + finally { + /* XXX Tell lexer to stop ignoring warnings. */ + source.setActive(true); + } + switch (tok.getType()) { + case Token.HASH: + case Token.NL: + case Token.EOF: + /* The preprocessor has to take action here. */ + break; + case Token.WHITESPACE: + return tok; + case Token.CCOMMENT: + case Token.CPPCOMMENT: + // Patch up to preserve whitespace. + if (getFeature(Feature.KEEPALLCOMMENTS)) + return tok; + if (!isActive()) + return toWhitespace(tok); + if (getFeature(Feature.KEEPCOMMENTS)) + return tok; + return toWhitespace(tok); + default: + // Return Token.NL to preserve whitespace. + /* XXX This might lose a comment. */ + return source_skipline(false); + } + } + else { + tok = source_token(); + } + + LEX: switch (tok.getType()) { + case Token.EOF: + /* Pop the stacks. */ + return tok; + + case Token.WHITESPACE: + case Token.NL: + //goto SKIP_TOKEN; + return tok; + + case Token.CCOMMENT: + case Token.CPPCOMMENT: + + //if(!getFeature(Feature.KEEPALLCOMMENTS)) { + // goto SKIP_TOKEN; + //} + return tok; + + case '!': case '%': case '&': + case '(': case ')': case '*': + case '+': case ',': case '-': + case '/': case ':': case ';': + case '<': case '=': case '>': + case '?': case '[': case ']': + case '^': case '{': case '|': + case '}': case '~': case '.': + + /* From Olivier Chafik for Objective C? */ + case '@': + /* The one remaining ASCII, might as well. */ + case '`': + + // case '#': + + case Token.AND_EQ: + case Token.ARROW: + case Token.CHARACTER: + case Token.DEC: + case Token.DIV_EQ: + case Token.ELLIPSIS: + case Token.EQ: + case Token.GE: + case Token.HEADER: /* Should only arise from include() */ + case Token.INC: + case Token.LAND: + case Token.LE: + case Token.LOR: + case Token.LSH: + case Token.LSH_EQ: + case Token.SUB_EQ: + case Token.MOD_EQ: + case Token.MULT_EQ: + case Token.NE: + case Token.OR_EQ: + case Token.PLUS_EQ: + case Token.RANGE: + case Token.RSH: + case Token.RSH_EQ: + case Token.STRING: + case Token.XOR_EQ: + return tok; + + case Token.INTEGER: + return tok; + + case Token.IDENTIFIER: + Macro m; + macros.TryGetValue(tok.getText(), out m); + if(tok.getText() == "__has_include_next") { + Console.WriteLine(); + } + if (m == null) + return tok; + if (source.isExpanding(m)) + return tok; + if (macro(m, tok)) + break; + return tok; + + case Token.P_LINE: + if (getFeature(Feature.LINEMARKERS)) + return tok; + break; + + case Token.INVALID: + if (getFeature(Feature.CSYNTAX)) + error(tok, (String)tok.getValue()); + return tok; + + default: + throw new Exception("Bad token " + tok); + // break; + + case Token.HASH: + tok = source_token_nonwhite(); + // (new Exception("here")).printStackTrace(); + switch (tok.getType()) { + case Token.NL: + goto BREAK_LEX; /* Some code has #\n */ + case Token.IDENTIFIER: + break; + default: + error(tok, + "Preprocessor directive not a word " + + tok.getText()); + return source_skipline(false); + } + int _ppcmd = ppcmds[tok.getText()]; + if (_ppcmd == null) { + error(tok, + "Unknown preprocessor directive " + + tok.getText()); + return source_skipline(false); + } + int ppcmd = _ppcmd; + + PP: switch(ppcmd) { + + case PP_DEFINE: + if(!isActive()) + return source_skipline(false); + else + return define(); + // break; + + case PP_UNDEF: + if(!isActive()) + return source_skipline(false); + else + return undef(); + // break; + + case PP_INCLUDE: + if(!isActive()) + return source_skipline(false); + else + return include(false, false); + // break; + case PP_INCLUDE_NEXT: + if(!isActive()) + return source_skipline(false); + if(!getFeature(Feature.INCLUDENEXT)) { + error(tok, + "Directive include_next not enabled" + ); + return source_skipline(false); + } + return include(true, false); + // break; + + case PP_WARNING: + case PP_ERROR: + if(!isActive()) + return source_skipline(false); + else + error(tok, ppcmd == PP_ERROR); + break; + + case PP_IF: + push_state(); + if(!isActive()) { + return source_skipline(false); + } + _expr_token = null; + states.Peek().setActive(expr(0) != 0); + tok = expr_token(); /* unget */ + if(tok.getType() == Token.NL) + return tok; + return source_skipline(true); + // break; + + case PP_ELIF: + State state = states.Peek(); + if(false) { + /* Check for 'if' */ + ; + } else if(state.sawElse()) { + error(tok, + "#elif after #" + "else"); + return source_skipline(false); + } else if(!state.isParentActive()) { + /* Nested in skipped 'if' */ + return source_skipline(false); + } else if(state.isActive()) { + /* The 'if' part got executed. */ + state.setParentActive(false); + /* This is like # else # if but with + * only one # end. */ + state.setActive(false); + return source_skipline(false); + } else { + _expr_token = null; + state.setActive(expr(0) != 0); + tok = expr_token(); /* unget */ + if(tok.getType() == Token.NL) + return tok; + return source_skipline(true); + } + // break; + + case PP_ELSE: + state = states.Peek(); + if(false) + /* Check for 'if' */ + ; + else if(state.sawElse()) { + error(tok, + "#" + "else after #" + "else"); + return source_skipline(false); + } else { + state.setSawElse(); + state.setActive(!state.isActive()); + return source_skipline(warnings.HasFlag(Warning.ENDIF_LABELS)); + } + // break; + + case PP_IFDEF: + push_state(); + if(!isActive()) { + return source_skipline(false); + } else { + tok = source_token_nonwhite(); + // System.out.println("ifdef " + tok); + if(tok.getType() != Token.IDENTIFIER) { + error(tok, + "Expected identifier, not " + + tok.getText()); + return source_skipline(false); + } else { + String text = tok.getText(); + bool exists = + macros.ContainsKey(text); + states.Peek().setActive(exists); + return source_skipline(true); + } + } + // break; + + case PP_IFNDEF: + push_state(); + if(!isActive()) { + return source_skipline(false); + } else { + tok = source_token_nonwhite(); + if(tok.getType() != Token.IDENTIFIER) { + error(tok, + "Expected identifier, not " + + tok.getText()); + return source_skipline(false); + } else { + String text = tok.getText(); + bool exists = + macros.ContainsKey(text); + states.Peek().setActive(!exists); + return source_skipline(true); + } + } + // break; + + case PP_ENDIF: + pop_state(); + return source_skipline(warnings.HasFlag(Warning.ENDIF_LABELS)); + // break; + + case PP_LINE: + return source_skipline(false); + // break; + + case PP_PRAGMA: + if(!isActive()) + return source_skipline(false); + return pragma(); + // break; + + case PP_IMPORT: + if(!isActive()) + return source_skipline(false); + else + return import(); + + default: + /* Actual unknown directives are + * processed above. If we get here, + * we succeeded the map lookup but + * failed to handle it. Therefore, + * this is (unconditionally?) fatal. */ + // if (isActive()) /* XXX Could be warning. */ + throw new Exception( + "Internal error: Unknown directive " + + tok); + // return source_skipline(false); + } + BREAK_PP: ; + break; + + + } + BREAK_LEX: ; + } + } + + private Token import() + { + return include(false, true); + } + + public Token token_nonwhite() { + Token tok; + do { + tok = _token(); + } while (isWhite(tok)); + return tok; + } + + /** + * Returns the next preprocessor token. + * + * @see Token + * @throws LexerException if a preprocessing error occurs. + * @throws InternalException if an unexpected error condition arises. + */ + public Token token() { + Token tok = _token(); + if (getFeature(Feature.DEBUG)) + System.Console.Error.WriteLine("pp: Returning " + tok); + return tok; + } + + /* First ppcmd is 1, not 0. */ + public const int PP_DEFINE = 1; + public const int PP_ELIF = 2; + public const int PP_ELSE = 3; + public const int PP_ENDIF = 4; + public const int PP_ERROR = 5; + public const int PP_IF = 6; + public const int PP_IFDEF = 7; + public const int PP_IFNDEF = 8; + public const int PP_INCLUDE = 9; + public const int PP_LINE = 10; + public const int PP_PRAGMA = 11; + public const int PP_UNDEF = 12; + public const int PP_WARNING = 13; + public const int PP_INCLUDE_NEXT = 14; + public const int PP_IMPORT = 15; + + private static readonly Dictionary ppcmds = + new Dictionary(); + + static Preprocessor() { + ppcmds.Add("define", PP_DEFINE); + ppcmds.Add("elif", PP_ELIF); + ppcmds.Add("else", PP_ELSE); + ppcmds.Add("endif", PP_ENDIF); + ppcmds.Add("error", PP_ERROR); + ppcmds.Add("if", PP_IF); + ppcmds.Add("ifdef", PP_IFDEF); + ppcmds.Add("ifndef", PP_IFNDEF); + ppcmds.Add("include", PP_INCLUDE); + ppcmds.Add("line", PP_LINE); + ppcmds.Add("pragma", PP_PRAGMA); + ppcmds.Add("undef", PP_UNDEF); + ppcmds.Add("warning", PP_WARNING); + ppcmds.Add("include_next", PP_INCLUDE_NEXT); + ppcmds.Add("import", PP_IMPORT); + } + + + override public String ToString() { + StringBuilder buf = new StringBuilder(); + + Source s = getSource(); + while (s != null) { + buf.Append(" -> ").Append(s).Append("\n"); + s = s.getParent(); + } + + Dictionary macros = getMacros(); + List keys = new List( + macros.Keys + ); + keys.Sort(); + foreach(string key in keys) { + Macro macro = macros[key]; + buf.Append("#").Append("macro ").Append(macro).Append("\n"); + } + + return buf.ToString(); + } + + public void Dispose() { + { + Source s = source; + while (s != null) { + s.close(); + s = s.getParent(); + } + } + foreach (Source s in inputs) { + s.close(); + } + } + +} + +} \ No newline at end of file diff --git a/PreprocessorListener.cs b/PreprocessorListener.cs new file mode 100644 index 0000000..1a9b397 --- /dev/null +++ b/PreprocessorListener.cs @@ -0,0 +1,86 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; + +namespace CppNet { + +/** + * A handler for preprocessor events, primarily errors and warnings. + * + * If no PreprocessorListener is installed in a Preprocessor, all + * error and warning events will throw an exception. Installing a + * listener allows more intelligent handling of these events. + */ +public class PreprocessorListener { + + private int errors; + private int warnings; + + public PreprocessorListener() { + clear(); + } + + public void clear() { + errors = 0; + warnings = 0; + } + + public int getErrors() { + return errors; + } + + public int getWarnings() { + return warnings; + } + + protected void print(String msg) { + System.Console.Error.WriteLine(msg); + } + + /** + * Handles a warning. + * + * The behaviour of this method is defined by the + * implementation. It may simply record the error message, or + * it may throw an exception. + */ + public void handleWarning(Source source, int line, int column, + String msg) { + warnings++; + print(source.getName() + ":" + line + ":" + column + + ": warning: " + msg); + } + + /** + * Handles an error. + * + * The behaviour of this method is defined by the + * implementation. It may simply record the error message, or + * it may throw an exception. + */ + public void handleError(Source source, int line, int column, + String msg) { + errors++; + print(source.getName() + ":" + line + ":" + column + + ": error: " + msg); + } + + public void handleSourceChange(Source source, String ev) { + } + +} +} \ No newline at end of file diff --git a/Properties/AssemblyInfo.cs b/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..e5be9e8 --- /dev/null +++ b/Properties/AssemblyInfo.cs @@ -0,0 +1,30 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("CppNet")] +[assembly: AssemblyProduct("CppNet")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("a972fb74-7a43-4c22-a381-2b8f0f5d7d2c")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Build and Revision Numbers +// by using the '*' as shown below: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/Source.cs b/Source.cs new file mode 100644 index 0000000..484f491 --- /dev/null +++ b/Source.cs @@ -0,0 +1,298 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; +using System.Collections.Generic; +using boolean = System.Boolean; + + +namespace CppNet +{ + + + /** + * An input to the Preprocessor. + * + * Inputs may come from Files, Strings or other sources. The + * preprocessor maintains a stack of Sources. Operations such as + * file inclusion or token pasting will push a new source onto + * the Preprocessor stack. Sources pop from the stack when they + * are exhausted; this may be transparent or explicit. + * + * BUG: Error messages are not handled properly. + */ + public abstract class Source : Iterable, Closeable + { + private Source parent; + private boolean autopop; + private PreprocessorListener listener; + private boolean active; + private boolean werror; + + /* LineNumberReader */ + + /* + // We can't do this, since we would lose the LexerException + private class Itr implements Iterator { + private Token next = null; + private void advance() { + try { + if (next != null) + next = token(); + } + catch (IOException e) { + throw new UnsupportedOperationException( + "Failed to advance token iterator: " + + e.getMessage() + ); + } + } + public boolean hasNext() { + return next.getType() != EOF; + } + public Token next() { + advance(); + Token t = next; + next = null; + return t; + } + public void remove() { + throw new UnsupportedOperationException( + "Cannot remove tokens from a Source." + ); + } + } + */ + + public Source() + { + this.parent = null; + this.autopop = false; + this.listener = null; + this.active = true; + this.werror = false; + } + + /** + * Sets the parent source of this source. + * + * Sources form a singly linked list. + */ + internal void setParent(Source parent, boolean autopop) + { + this.parent = parent; + this.autopop = autopop; + } + + /** + * Returns the parent source of this source. + * + * Sources form a singly linked list. + */ + internal Source getParent() + { + return parent; + } + + // @OverrideMustInvoke + internal virtual void init(Preprocessor pp) + { + setListener(pp.getListener()); + this.werror = pp.getWarnings().HasFlag(Warning.ERROR); + } + + /** + * Sets the listener for this Source. + * + * Normally this is set by the Preprocessor when a Source is + * used, but if you are using a Source as a standalone object, + * you may wish to call this. + */ + public void setListener(PreprocessorListener pl) + { + this.listener = pl; + } + + /** + * Returns the File currently being lexed. + * + * If this Source is not a {@link FileLexerSource}, then + * it will ask the parent Source, and so forth recursively. + * If no Source on the stack is a FileLexerSource, returns null. + */ + internal virtual String getPath() + { + Source parent = getParent(); + if(parent != null) + return parent.getPath(); + return null; + } + + /** + * Returns the human-readable name of the current Source. + */ + internal virtual String getName() + { + Source parent = getParent(); + if(parent != null) + return parent.getName(); + return null; + } + + /** + * Returns the current line number within this Source. + */ + public virtual int getLine() + { + Source parent = getParent(); + if(parent == null) + return 0; + return parent.getLine(); + } + + /** + * Returns the current column number within this Source. + */ + public virtual int getColumn() + { + Source parent = getParent(); + if(parent == null) + return 0; + return parent.getColumn(); + } + + /** + * Returns true if this Source is expanding the given macro. + * + * This is used to prevent macro recursion. + */ + internal virtual boolean isExpanding(Macro m) + { + Source parent = getParent(); + if(parent != null) + return parent.isExpanding(m); + return false; + } + + /** + * Returns true if this Source should be transparently popped + * from the input stack. + * + * Examples of such sources are macro expansions. + */ + internal boolean isAutopop() + { + return autopop; + } + + /** + * Returns true if this source has line numbers. + */ + internal virtual boolean isNumbered() + { + return false; + } + + /* This is an incredibly lazy way of disabling warnings when + * the source is not active. */ + internal void setActive(boolean b) + { + this.active = b; + } + + internal boolean isActive() + { + return active; + } + + /** + * Returns the next Token parsed from this input stream. + * + * @see Token + */ + public abstract Token token(); + + /** + * Returns a token iterator for this Source. + */ + public Iterator iterator() + { + return new SourceIterator(this); + } + + /** + * Skips tokens until the end of line. + * + * @param white true if only whitespace is permitted on the + * remainder of the line. + * @return the NL token. + */ + public Token skipline(boolean white) + { + for(; ; ) { + Token tok = token(); + switch(tok.getType()) { + case Token.EOF: + /* There ought to be a newline before EOF. + * At least, in any skipline context. */ + /* XXX Are we sure about this? */ + warning(tok.getLine(), tok.getColumn(), + "No newline before end of file"); + return new Token(Token.NL, + tok.getLine(), tok.getColumn(), + "\n"); + // return tok; + case Token.NL: + /* This may contain one or more newlines. */ + return tok; + case Token.CCOMMENT: + case Token.CPPCOMMENT: + case Token.WHITESPACE: + break; + default: + /* XXX Check white, if required. */ + if(white) + warning(tok.getLine(), tok.getColumn(), + "Unexpected nonwhite token"); + break; + } + } + } + + protected void error(int line, int column, String msg) + { + if(listener != null) + listener.handleError(this, line, column, msg); + else + throw new LexerException("Error at " + line + ":" + column + ": " + msg); + } + + protected void warning(int line, int column, String msg) + { + if(werror) + error(line, column, msg); + else if(listener != null) + listener.handleWarning(this, line, column, msg); + else + throw new LexerException("Warning at " + line + ":" + column + ": " + msg); + } + + public virtual void close() + { + } + } + +} \ No newline at end of file diff --git a/SourceIterator.cs b/SourceIterator.cs new file mode 100644 index 0000000..f0f6887 --- /dev/null +++ b/SourceIterator.cs @@ -0,0 +1,98 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.IO; + +using boolean = System.Boolean; + +namespace CppNet +{ + + /** + * An Iterator for {@link Source Sources}, + * returning {@link Token Tokens}. + */ + public class SourceIterator : Iterator + { + private Source source; + private Token tok; + + public SourceIterator(Source s) + { + this.source = s; + this.tok = null; + } + + /** + * Rethrows IOException inside IllegalStateException. + */ + private void advance() + { + try { + if(tok == null) + tok = source.token(); + } catch(LexerException e) { + throw new IllegalStateException(e); + } catch(IOException e) { + throw new ApplicationException("",e); + } + } + + /** + * Returns true if the enclosed Source has more tokens. + * + * The EOF token is never returned by the iterator. + * @throws IllegalStateException if the Source + * throws a LexerException or IOException + */ + public boolean hasNext() + { + advance(); + return tok.getType() != Token.EOF; + } + + /** + * Returns the next token from the enclosed Source. + * + * The EOF token is never returned by the iterator. + * @throws IllegalStateException if the Source + * throws a LexerException or IOException + */ + public Token next() + { + if(!hasNext()) + throw new ArgumentOutOfRangeException(); + Token t = this.tok; + this.tok = null; + return t; + } + + /** + * Not supported. + * + * @throws UnsupportedOperationException. + */ + public void remove() + { + throw new NotSupportedException(); + } + } + + +} \ No newline at end of file diff --git a/State.cs b/State.cs new file mode 100644 index 0000000..324cc78 --- /dev/null +++ b/State.cs @@ -0,0 +1,89 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; + +namespace CppNet +{ + + /* pp */ + class State + { + bool _parent; + bool _active; + bool _sawElse; + + /* pp */ + internal State() + { + this._parent = true; + this._active = true; + this._sawElse = false; + } + + /* pp */ + internal State(State parent) + { + this._parent = parent.isParentActive() && parent.isActive(); + this._active = true; + this._sawElse = false; + } + + /* Required for #elif */ + /* pp */ + internal void setParentActive(bool b) + { + this._parent = b; + } + + /* pp */ + internal bool isParentActive() + { + return _parent; + } + + /* pp */ + internal void setActive(bool b) + { + this._active = b; + } + + /* pp */ + internal bool isActive() + { + return _active; + } + + /* pp */ + internal void setSawElse() + { + _sawElse = true; + } + + /* pp */ + internal bool sawElse() + { + return _sawElse; + } + + public override String ToString() + { + return "parent=" + _parent + + ", active=" + _active + + ", sawelse=" + _sawElse; + } + } +} \ No newline at end of file diff --git a/StringLexerSource.cs b/StringLexerSource.cs new file mode 100644 index 0000000..0399598 --- /dev/null +++ b/StringLexerSource.cs @@ -0,0 +1,55 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; +using System.IO; + +namespace CppNet { + +/** + * A Source for lexing a String. + * + * This class is used by token pasting, but can be used by user + * code. + */ +public class StringLexerSource : LexerSource { + + /** + * Creates a new Source for lexing the given String. + * + * @param ppvalid true if preprocessor directives are to be + * honoured within the string. + */ + public StringLexerSource(String str, bool ppvalid) : + base(new StringReader(str), ppvalid) { + } + + /** + * Creates a new Source for lexing the given String. + * + * By default, preprocessor directives are not honoured within + * the string. + */ + public StringLexerSource(String str) : + this(str, false) { + } + + override public String ToString() { + return "string literal"; + } +} + +} \ No newline at end of file diff --git a/Token.cs b/Token.cs new file mode 100644 index 0000000..6ecf707 --- /dev/null +++ b/Token.cs @@ -0,0 +1,353 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; +using System.Text; + +namespace CppNet { + +/** + * A Preprocessor token. + * + * @see Preprocessor + */ + public sealed class Token + { + // public const int EOF = -1; + + private int type; + private int line; + private int column; + private Object value; + private String text; + + public Token(int type, int line, int column, + String text, Object value) + { + this.type = type; + this.line = line; + this.column = column; + this.text = text; + this.value = value; + } + + public Token(int type, int line, int column, String text) : + this(type, line, column, text, null) + { + } + + /* pp */ + internal Token(int type, String text, Object value) : + this(type, -1, -1, text, value) + { + } + + /* pp */ + internal Token(int type, String text) : + this(type, text, null) + { + } + + /* pp */ + internal Token(int type) : + this(type, type < _TOKENS ? texts[type] : "TOK" + type) + { + } + + /** + * Returns the semantic type of this token. + */ + public int getType() + { + return type; + } + + internal void setLocation(int line, int column) + { + this.line = line; + this.column = column; + } + + /** + * Returns the line at which this token started. + * + * Lines are numbered from zero. + */ + public int getLine() + { + return line; + } + + /** + * Returns the column at which this token started. + * + * Columns are numbered from zero. + */ + public int getColumn() + { + return column; + } + + /** + * Returns the original or generated text of this token. + * + * This is distinct from the semantic value of the token. + * + * @see #getValue() + */ + public String getText() + { + return text; + } + + /** + * Returns the semantic value of this token. + * + * For strings, this is the parsed String. + * For integers, this is an Integer object. + * For other token types, as appropriate. + * + * @see #getText() + */ + public Object getValue() + { + return value; + } + + /** + * Returns a description of this token, for debugging purposes. + */ + public String ToString() + { + StringBuilder buf = new StringBuilder(); + + buf.Append('[').Append(getTokenName(type)); + if(line != -1) { + buf.Append('@').Append(line); + if(column != -1) + buf.Append(',').Append(column); + } + buf.Append("]:"); + if(text != null) + buf.Append('"').Append(text).Append('"'); + else if(type > 3 && type < 256) + buf.Append((char)type); + else + buf.Append('<').Append(type).Append('>'); + if(value != null) + buf.Append('=').Append(value); + return buf.ToString(); + } + + /** + * Returns the descriptive name of the given token type. + * + * This is mostly used for stringification and debugging. + */ + public static String getTokenName(int type) + { + if(type < 0) + return "Invalid" + type; + if(type >= names.Length) + return "Invalid" + type; + if(names[type] == null) + return "Unknown" + type; + return names[type]; + } + + /** The token type AND_EQ. */ + public const int AND_EQ = 257; + /** The token type ARROW. */ + public const int ARROW = 258; + /** The token type CHARACTER. */ + public const int CHARACTER = 259; + /** The token type CCOMMENT. */ + public const int CCOMMENT = 260; + /** The token type CPPCOMMENT. */ + public const int CPPCOMMENT = 261; + /** The token type DEC. */ + public const int DEC = 262; + /** The token type DIV_EQ. */ + public const int DIV_EQ = 263; + /** The token type ELLIPSIS. */ + public const int ELLIPSIS = 264; + /** The token type EOF. */ + public const int EOF = 265; + /** The token type EQ. */ + public const int EQ = 266; + /** The token type GE. */ + public const int GE = 267; + /** The token type HASH. */ + public const int HASH = 268; + /** The token type HEADER. */ + public const int HEADER = 269; + /** The token type IDENTIFIER. */ + public const int IDENTIFIER = 270; + /** The token type INC. */ + public const int INC = 271; + /** The token type INTEGER. */ + public const int INTEGER = 272; + /** The token type LAND. */ + public const int LAND = 273; + /** The token type LAND_EQ. */ + public const int LAND_EQ = 274; + /** The token type LE. */ + public const int LE = 275; + /** The token type LITERAL. */ + public const int LITERAL = 276; + /** The token type LOR. */ + public const int LOR = 277; + /** The token type LOR_EQ. */ + public const int LOR_EQ = 278; + /** The token type LSH. */ + public const int LSH = 279; + /** The token type LSH_EQ. */ + public const int LSH_EQ = 280; + /** The token type MOD_EQ. */ + public const int MOD_EQ = 281; + /** The token type MULT_EQ. */ + public const int MULT_EQ = 282; + /** The token type NE. */ + public const int NE = 283; + /** The token type NL. */ + public const int NL = 284; + /** The token type OR_EQ. */ + public const int OR_EQ = 285; + /** The token type PASTE. */ + public const int PASTE = 286; + /** The token type PLUS_EQ. */ + public const int PLUS_EQ = 287; + /** The token type RANGE. */ + public const int RANGE = 288; + /** The token type RSH. */ + public const int RSH = 289; + /** The token type RSH_EQ. */ + public const int RSH_EQ = 290; + /** The token type STRING. */ + public const int STRING = 291; + /** The token type SUB_EQ. */ + public const int SUB_EQ = 292; + /** The token type WHITESPACE. */ + public const int WHITESPACE = 293; + /** The token type XOR_EQ. */ + public const int XOR_EQ = 294; + /** The token type M_ARG. */ + public const int M_ARG = 295; + /** The token type M_PASTE. */ + public const int M_PASTE = 296; + /** The token type M_STRING. */ + public const int M_STRING = 297; + /** The token type P_LINE. */ + public const int P_LINE = 298; + /** The token type INVALID. */ + public const int INVALID = 299; + /** + * The number of possible semantic token types. + * + * Please note that not all token types below 255 are used. + */ + public const int _TOKENS = 300; + + /** The position-less space token. */ + /* pp */ + public static readonly Token space = new Token(WHITESPACE, -1, -1, " "); + + private static readonly String[] names = new String[_TOKENS]; + private static readonly String[] texts = new String[_TOKENS]; + static Token() + { + for(int i = 0; i < 255; i++) { + texts[i] = ((char)i).ToString(); + names[i] = texts[i]; + } + + texts[AND_EQ] = "&="; + texts[ARROW] = "->"; + texts[DEC] = "--"; + texts[DIV_EQ] = "/="; + texts[ELLIPSIS] = "..."; + texts[EQ] = "=="; + texts[GE] = ">="; + texts[HASH] = "#"; + texts[INC] = "++"; + texts[LAND] = "&&"; + texts[LAND_EQ] = "&&="; + texts[LE] = "<="; + texts[LOR] = "||"; + texts[LOR_EQ] = "||="; + texts[LSH] = "<<"; + texts[LSH_EQ] = "<<="; + texts[MOD_EQ] = "%="; + texts[MULT_EQ] = "*="; + texts[NE] = "!="; + texts[NL] = "\n"; + texts[OR_EQ] = "|="; + /* We have to split the two hashes or Velocity eats them. */ + texts[PASTE] = "#" + "#"; + texts[PLUS_EQ] = "+="; + texts[RANGE] = ".."; + texts[RSH] = ">>"; + texts[RSH_EQ] = ">>="; + texts[SUB_EQ] = "-="; + texts[XOR_EQ] = "^="; + + names[AND_EQ] = "AND_EQ"; + names[ARROW] = "ARROW"; + names[CHARACTER] = "CHARACTER"; + names[CCOMMENT] = "CCOMMENT"; + names[CPPCOMMENT] = "CPPCOMMENT"; + names[DEC] = "DEC"; + names[DIV_EQ] = "DIV_EQ"; + names[ELLIPSIS] = "ELLIPSIS"; + names[EOF] = "EOF"; + names[EQ] = "EQ"; + names[GE] = "GE"; + names[HASH] = "HASH"; + names[HEADER] = "HEADER"; + names[IDENTIFIER] = "IDENTIFIER"; + names[INC] = "INC"; + names[INTEGER] = "INTEGER"; + names[LAND] = "LAND"; + names[LAND_EQ] = "LAND_EQ"; + names[LE] = "LE"; + names[LITERAL] = "LITERAL"; + names[LOR] = "LOR"; + names[LOR_EQ] = "LOR_EQ"; + names[LSH] = "LSH"; + names[LSH_EQ] = "LSH_EQ"; + names[MOD_EQ] = "MOD_EQ"; + names[MULT_EQ] = "MULT_EQ"; + names[NE] = "NE"; + names[NL] = "NL"; + names[OR_EQ] = "OR_EQ"; + names[PASTE] = "PASTE"; + names[PLUS_EQ] = "PLUS_EQ"; + names[RANGE] = "RANGE"; + names[RSH] = "RSH"; + names[RSH_EQ] = "RSH_EQ"; + names[STRING] = "STRING"; + names[SUB_EQ] = "SUB_EQ"; + names[WHITESPACE] = "WHITESPACE"; + names[XOR_EQ] = "XOR_EQ"; + names[M_ARG] = "M_ARG"; + names[M_PASTE] = "M_PASTE"; + names[M_STRING] = "M_STRING"; + names[P_LINE] = "P_LINE"; + names[INVALID] = "INVALID"; + } + + } +} \ No newline at end of file diff --git a/TokenSnifferSource.cs b/TokenSnifferSource.cs new file mode 100644 index 0000000..1512b2e --- /dev/null +++ b/TokenSnifferSource.cs @@ -0,0 +1,54 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package org.anarres.cpp; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.io.PushbackReader; +import java.io.Reader; +import java.io.StringReader; + +import java.util.ArrayList; +import java.util.List; +import java.util.Iterator; + +import static org.anarres.cpp.Token.*; + +@Deprecated +/* pp */ class TokenSnifferSource extends Source { + private List target; + + /* pp */ TokenSnifferSource(List target) { + this.target = target; + } + + public Token token() + throws IOException, + LexerException { + Token tok = getParent().token(); + if (tok.getType() != EOF) + target.add(tok); + return tok; + } + + public String toString() { + return getParent().toString(); + } +} diff --git a/VirtualFile.cs b/VirtualFile.cs new file mode 100644 index 0000000..2995a02 --- /dev/null +++ b/VirtualFile.cs @@ -0,0 +1,33 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +namespace CppNet { + +/** + * An extremely lightweight virtual file interface. + */ +public interface VirtualFile { + // public String getParent(); + bool isFile(); + string getPath(); + string getName(); + VirtualFile getParentFile(); + VirtualFile getChildFile(string name); + Source getSource(); +} + +} \ No newline at end of file diff --git a/VirtualFileSystem.cs b/VirtualFileSystem.cs new file mode 100644 index 0000000..e2eadd3 --- /dev/null +++ b/VirtualFileSystem.cs @@ -0,0 +1,30 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; + +namespace CppNet +{ + /** + * An extremely lightweight virtual file system interface. + */ + public interface VirtualFileSystem + { + VirtualFile getFile(String path); + VirtualFile getFile(String dir, String name); + } + +} \ No newline at end of file diff --git a/Warning.cs b/Warning.cs new file mode 100644 index 0000000..8d70132 --- /dev/null +++ b/Warning.cs @@ -0,0 +1,38 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +using System; + +namespace CppNet +{ + /** + * Warning classes which may optionally be emitted by the Preprocessor. + */ + [Flags] + public enum Warning + { + NONE = 0, + TRIGRAPHS = 1 << 0, + // TRADITIONAL, + IMPORT = 1 << 1, + UNDEF = 1 << 2, + UNUSED_MACROS = 1 << 3, + ENDIF_LABELS = 1 << 4, + ERROR = 1 << 5, + // SYSTEM_HEADERS + } + +} \ No newline at end of file