This commit is contained in:
Scott Yih 2016-12-07 12:03:02 -08:00
Родитель f1b4ebf821
Коммит 76dd3b6dfe
12 изменённых файлов: 1082 добавлений и 0 удалений

9
FastRDFStore/App.config Normal file
Просмотреть файл

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="utf-8" ?>
<configuration>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5" />
</startup>
<runtime>
<gcAllowVeryLargeObjects enabled="true" />
</runtime>
</configuration>

Просмотреть файл

@ -0,0 +1,671 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Text;
namespace FastRDFStore
{
public class FastRDFStore : IFastRDFStore
{
private const int pageSize = 1024*1024*1024; // 1GB
private static Object datalock = new Object();
private static bool initialized;
private static Dictionary<string, Dictionary<string, Tuple<long, int>>> midToCompressedBlobLocation;
private static List<byte[]> datapages;
private static Dictionary<string, Tuple<long, List<int>>> largeMidsToCompressedBlobsLocations;
private static Dictionary<string, Dictionary<string, bool>> cvtNodes;
private static Dictionary<string, string> namesTable;
private static Dictionary<string, FBNodeType> predObjTypeTable;
private static HashSet<string> setGhostMid;
private static Logger logger;
private static string datadir;
public static void Initialize(string datadirParam, string logFilename)
{
datadir = datadirParam;
logger = new Logger(logFilename);
logger.Log("Initializing FastRDFStore");
try
{
if (!initialized)
{
lock (datalock)
{
if (!initialized)
// avoid race condition with another thread also trying to initialize at the same time
{
LoadIndex();
initialized = true;
}
}
}
}
catch (Exception e)
{
// Log it, but rethrow so the caller knows the initialization failed.
logger.LogException("Exception when initializing FastRDFStore", e);
throw;
}
logger.Log("Done initializing");
}
public string[] GetOutboundPredicates(string subjectMid)
{
try
{
logger.Log("GetOutboundPredicates called for " + subjectMid);
if (!initialized || subjectMid == null || subjectMid.Length < 2) return new string[] {};
var results = GetPredicateObjectPairsForSubject(subjectMid);
var adjoiningPredicates = results.AsParallel().Where(e => !e.Item1.StartsWith("wikipedia.") &&
e.Item1 != "type.object.type" &&
e.Item1 != "type.object.key" &&
e.Item1 != "type.object.name" &&
e.Item1 != "type.object.permission" &&
e.Item1 != "common.topic.alias" &&
e.Item1 != "common.topic.description" &&
e.Item1 != "common.topic.image" &&
!IsCVT(e.Item2)).Select(e => e.Item1);
var adjPredCnt = adjoiningPredicates.GroupBy(x => x).ToDictionary(g => g.Key, g => g.Count());
var cvtHopResults = results.AsParallel().Where(e => IsCVT(e.Item2) &&
!e.Item1.StartsWith("wikipedia.") &&
e.Item1 != "common.topic.article" &&
e.Item1 != "common.topic.webpage" &&
e.Item1 != "common.topic.description" &&
e.Item1 != "common.document.text" &&
e.Item1 != "common.topic.image");
var cvtPredCnt = cvtHopResults.GroupBy(x => x.Item1).ToDictionary(g => g.Key, g => g.Count());
// Check if there is any conflict and try to resolve it
var conflictPreds = new HashSet<string>(adjPredCnt.Keys.Intersect(cvtPredCnt.Keys));
foreach(var cPred in conflictPreds)
{
if (adjPredCnt[cPred] > cvtPredCnt[cPred])
cvtPredCnt.Remove(cPred);
else if (adjPredCnt[cPred] < cvtPredCnt[cPred])
adjPredCnt.Remove(cPred);
logger.Log("Cannot resolve adj-cvt predicate conflict: " + subjectMid + " " + cPred);
}
var cvtHopPredicates = cvtHopResults.Where(e => cvtPredCnt.ContainsKey(e.Item1))
.Select(e => new
{
predicate = e.Item1,
cvt = e.Item2,
cvtPredicates = GetPredicateObjectPairsForSubject(e.Item2).Select(pair => pair.Item1)
.Where(predicate2 => predicate2 != "type.object.type" &&
predicate2 != "type.object.key" &&
predicate2 != "common.topic.description" &&
predicate2 != "common.document.text").Distinct()
})
.SelectMany(e => e.cvtPredicates.Select(predicate2 => e.predicate + " " + predicate2));
var allPredicates = adjoiningPredicates.Where(x => adjPredCnt.ContainsKey(x)).Union(cvtHopPredicates).OrderBy(e => e);
return allPredicates.ToArray();
}
catch (Exception e)
{
logger.LogException("GetOutboundPredicates failed", e);
return new string[] { };
}
}
public string[] GetEntityNames(string[] entMids)
{
try
{
return entMids.Select(mid => namesTable.ContainsKey(mid) ? namesTable[mid] : "")
.ToArray();
}
catch (Exception e)
{
logger.LogException("GetEntityNames failed", e);
return new string[] { };
}
}
// public method which doesn't return the dictionary of nodes in the graph.
public SimpleFBObject GetSimpleObjectPredicatesAndCVTs(string subjectMid, int maxPerPredicate = int.MaxValue, bool followCVT = true)
{
try
{
logger.Log("GetSimpleObjectPredicatesAndCVTs called for "+subjectMid);
Dictionary<string, FBObject> nodesInGraph;
return GetSimpleObjectPredicatesAndCVTs(subjectMid, out nodesInGraph, maxPerPredicate, followCVT);
}
catch (Exception e)
{
logger.LogException("GetSimpleObjectPredicatesAndCVTs failed", e);
return null;
}
}
private SimpleFBObject GetSimpleObjectPredicatesAndCVTs(string subjectMid,
out Dictionary<string, FBObject> nodesInGraph, int maxPerPredicate = int.MaxValue, bool followCVT = true)
{
SimpleFBObject myself = new SimpleFBObject();
myself.Mid = subjectMid;
myself.Name = GetName(subjectMid);
Dictionary<string, FBObject> existingNodes = new Dictionary<string, FBObject>();
existingNodes[subjectMid] = myself;
myself.Objects = GetPredicatesAndNamedObjectsIncludingCVTs(existingNodes, subjectMid, maxPerPredicate,
followCVT);
nodesInGraph = existingNodes;
return myself;
}
/// <summary>
///
/// </summary>
/// <param name="existingNodes"></param>
/// <param name="subjectMid"></param>
/// <param name="maxPerPredicate"></param>
/// <param name="followCVT"></param>
/// <returns>Predicates and objects hanging off of subjectMid. We guarantee that each predicate appears only once in the array</returns>
private PredicateAndObjects[] GetPredicatesAndNamedObjectsIncludingCVTs(
Dictionary<string, FBObject> existingNodes, string subjectMid, int maxPerPredicate = int.MaxValue,
bool followCVT = true)
{
List<Tuple<string, string>> results = GetPredicateObjectPairsForSubject(subjectMid);
Dictionary<string, List<FBObject>> predicatesToReturn = new Dictionary<string, List<FBObject>>();
Dictionary<string, int> predicateCountDict = new Dictionary<string, int>();
foreach (Tuple<string, string> pair in results)
{
string predicate = pair.Item1;
string obj = pair.Item2;
// Check if the obj type is legit
FBNodeType legitObjType, objType;
objType = IsCVT(obj) ? FBNodeType.CVT : (IsEntity(obj) ? FBNodeType.Entity : FBNodeType.Value);
if (predObjTypeTable.TryGetValue(predicate, out legitObjType) && objType != legitObjType)
continue;
// Check if obj is a ghost MID using the pre-compiled ghost MID table
if (objType != FBNodeType.Value && setGhostMid.Contains(obj))
continue;
// Check if obj is a ghost MID if (1) it's not in the cvtNodes and (2) it does not have an entity name
// This may happen because we do not index some tuples because of the predicates are excluded.
if (IsEntity(obj) && !IsCVT(obj) && string.IsNullOrEmpty(GetName(obj)))
continue;
// Skip this predicate if we have added it maxPerPredicate times
int predicateCount;
predicateCountDict.TryGetValue(predicate, out predicateCount);
// sets predicateCount to 0 if not in the dictionary
if (predicateCount >= maxPerPredicate)
continue; // Skip any more predicates, we've reached our max
predicateCountDict[predicate] = predicateCount + 1;
// Get the list of answers we're returning for this predicate
List<FBObject> predicateObjects;
if (!predicatesToReturn.TryGetValue(predicate, out predicateObjects))
{
predicateObjects = new List<FBObject>();
predicatesToReturn[predicate] = predicateObjects;
}
if (objType == FBNodeType.Entity)
{
FBObject fbObject;
if (!existingNodes.TryGetValue(obj, out fbObject))
{
SimpleFBObject simpleFBObject = new SimpleFBObject();
simpleFBObject.Mid = obj;
simpleFBObject.Name = GetName(obj);
existingNodes[obj] = simpleFBObject;
fbObject = simpleFBObject;
}
predicateObjects.Add(fbObject);
}
else if (objType == FBNodeType.Value)
{
ValueFBObject fbObject = new ValueFBObject();
fbObject.Value = obj;
predicateObjects.Add(fbObject);
}
else if (followCVT) // (objType == FBNodeType.CVT)
{
FBObject fbObject;
if (!existingNodes.TryGetValue(obj, out fbObject))
{
CVTFBObject cvtFBObject = new CVTFBObject();
cvtFBObject.Mid = obj;
cvtFBObject.Objects = GetPredicatesAndNamedObjectsIncludingCVTs(existingNodes, obj,
maxPerPredicate, false /* don't follow CVT nodes from this CVT node */);
existingNodes[obj] = cvtFBObject;
fbObject = cvtFBObject;
}
predicateObjects.Add(fbObject);
}
}
// Convert to the return type (arrays instead of lists and dictionaries)
return
predicatesToReturn.Select(
pair => new PredicateAndObjects() {Predicate = pair.Key, Objects = pair.Value.ToArray()}).ToArray();
}
private void FilterToSinglePredicate(SimpleFBObject node, string predicate)
{
PredicateAndObjects predicateAndObject = null;
foreach (PredicateAndObjects p in node.Objects)
{
if (p.Predicate == predicate)
{
predicateAndObject = p;
break;
}
}
if (predicateAndObject == null)
{
// Didn't find the predicate
node.Objects = new PredicateAndObjects[0];
}
node.Objects = new PredicateAndObjects[1] {predicateAndObject};
}
public SimpleFBObject GetSimpleObjectFilteredPredicateAndObjects(string subjectMid, string predicate)
{
try
{
logger.Log("GetSimpleObjectFilteredPredicateAndObjects called for subj=" + subjectMid + ", pred="+predicate);
Dictionary<string, FBObject> nodesInGraph;
SimpleFBObject initial = GetSimpleObjectPredicatesAndCVTs(subjectMid, out nodesInGraph, int.MaxValue, true);
string[] predicateParts = predicate.Split(' ');
if (predicateParts.Length < 1 || predicateParts.Length > 2) return null;
FilterToSinglePredicate(initial, predicateParts[0]);
if (initial.Objects.Length == 0)
return initial; // Doesn't contain the desired predicate
PredicateAndObjects predicateAndObjects = initial.Objects[0];
if (predicateParts.Length == 2)
{
foreach (FBObject fbo in predicateAndObjects.Objects)
{
if (fbo is CVTFBObject)
{
foreach (PredicateAndObjects poi in (((CVTFBObject)fbo).Objects))
{
if (poi.Predicate == predicateParts[1])
{
foreach (FBObject fboObj in poi.Objects)
{
if (fboObj is SimpleFBObject)
{
SimpleFBObject fboAnswer = (SimpleFBObject)fboObj;
if (fboAnswer.Objects == null)
{
// We need to expand the objects for this node
PredicateAndObjects[] resultsForObj =
GetPredicatesAndNamedObjectsIncludingCVTs(nodesInGraph, fboAnswer.Mid,
int.MaxValue, false);
fboAnswer.Objects = resultsForObj;
}
}
}
}
}
}
}
}
else
{
foreach (FBObject fbo in predicateAndObjects.Objects)
{
if (fbo is SimpleFBObject)
{
SimpleFBObject fboAnswer = (SimpleFBObject)fbo;
if (fboAnswer.Objects == null)
{
// We need to expand the objects for this node
PredicateAndObjects[] resultsForObj = GetPredicatesAndNamedObjectsIncludingCVTs(
nodesInGraph, fboAnswer.Mid, int.MaxValue, false);
fboAnswer.Objects = resultsForObj;
}
}
}
}
return initial;
}
catch (Exception e)
{
logger.LogException("GetSimpleObjectFilteredPredicateAndObjects failed", e);
return null;
}
}
public string[][] FindNodeSquencesOnPredicateChain(string startMid, string[] chainPredicates)
{
try
{
//logger.Log("FindNodeSquencesOnPredicateChain called for subj=" + startMid + ", chainPreds=" + string.Join(" ", chainPredicates));
if (chainPredicates == null || chainPredicates.Length == 0)
return null;
var pred = chainPredicates[0]; // first predicate on the chain
var objNodes = GetPredicateObjectPairsForSubject(startMid)
.Where(x => x.Item1 == pred) // (predicate, object)
.Select(x => x.Item2) // object only
.ToArray();
if (!objNodes.Any())
return null;
if (chainPredicates.Length == 1) // done
return objNodes.Select(x => new[] {x}).ToArray();
// more than one predicate in the chain
var ret = new List<string[]>();
foreach (var node in objNodes)
{
var subSequences = FindNodeSquencesOnPredicateChain(node, chainPredicates.Skip(1).ToArray());
if (subSequences == null) // cannot continue
continue;
ret.AddRange(subSequences.Select(seq => (new[] {node}).Concat(seq).ToArray()));
}
return ret.Any() ? ret.ToArray() : null;
}
catch (Exception e)
{
logger.LogException("FindNodeSquencesOnPredicateChain failed", e);
return null;
}
}
private string GetName(string mid)
{
if (mid == null)
return null;
string name;
if (namesTable.TryGetValue(mid, out name))
return name;
else
return null;
}
private bool IsCVT(string subject)
{
var key = GetSubjectKey(subject);
if (cvtNodes.ContainsKey(key))
{
var dictionary = cvtNodes[key];
return dictionary.ContainsKey(subject);
}
return false;
}
private bool IsEntity(string obj)
{
// We're missing this information in the compressed dataset.
// For now, we'll do the following, but long-term consider fixing this to make it explicit in the dataset
return obj.StartsWith("m.") || obj.StartsWith("g.") ||
obj.StartsWith("en.") || !string.IsNullOrEmpty(GetName(obj)); // "en." is to support SEMPRE Freebase
}
private List<Tuple<string, string>> GetPredicateObjectPairsForSubject(string subject)
{
try
{
long offset;
var compressedChunksLengths = new List<int>();
if (largeMidsToCompressedBlobsLocations.ContainsKey(subject))
{
var found = largeMidsToCompressedBlobsLocations[subject];
offset = found.Item1;
compressedChunksLengths.AddRange(found.Item2);
}
else
{
var partitionkey = GetSubjectKey(subject);
var dictionary = midToCompressedBlobLocation[partitionkey];
var compressedResultLocation = dictionary[subject];
offset = compressedResultLocation.Item1;
var length = compressedResultLocation.Item2;
compressedChunksLengths.Add(length);
}
var toReturn = new List<Tuple<string, string>>();
foreach (var length in compressedChunksLengths)
{
// does it span pages?
var startPage = (int)(offset/pageSize);
var endPage = (int)((offset + length - 1)/pageSize);
byte[] compressedResult;
int compressedResultIndex;
int compressedResultCount;
if (startPage == endPage)
{
compressedResult = datapages[(int)(offset/pageSize)];
compressedResultIndex = (int)(offset%pageSize);
compressedResultCount = length;
}
else
{
compressedResult = new byte[length];
compressedResultIndex = 0;
compressedResultCount = length;
// first page
int index = 0;
for (int i = (int)(offset%pageSize); i < pageSize; i++)
{
compressedResult[index] = datapages[startPage][i];
index++;
}
// intermediary pages
for (int page = startPage + 1; page < endPage; page++)
{
for (int i = 0; i < pageSize; i++)
{
compressedResult[index] = datapages[page][i];
index++;
}
}
// last page
for (int i = 0; i < (int)((offset + length)%pageSize); i++)
{
compressedResult[index] = datapages[endPage][i];
index++;
}
}
using (
var memorystream = new MemoryStream(compressedResult, compressedResultIndex,
compressedResultCount))
{
var gzipstream = new GZipStream(memorystream, CompressionMode.Decompress, false);
var reader = new StreamReader(gzipstream, Encoding.Unicode);
string line;
while ((line = reader.ReadLine()) != null)
{
var split = line.Split('\t');
if (split.Length == 2 && !string.IsNullOrEmpty(split[0]))
toReturn.Add(new Tuple<string, string>(split[0], split[1]));
}
}
offset += length;
}
return toReturn;
}
catch (Exception e)
{
logger.LogException("GetPredicateObjectPairsForSubject failed", e);
return new List<Tuple<string, string>>();
}
}
private static string GetSubjectKey(string subject)
{
return (subject.StartsWith("m.") || subject.StartsWith("g."))
? ((subject.Length > 3)
? subject.Substring(0, 4)
: (subject.Length > 2) ? subject.Substring(0, 3) : subject.Substring(0, 2))
: (subject.Length > 1)
? subject.Substring(0, 2)
: subject.Substring(0, 1);
}
private static void LoadIndex()
{
string midToOffsetPath = Path.Combine(datadir, "midToOffset.bin");
string largeMidToOffsetPath = Path.Combine(datadir, "largeMidToOffset.bin");
string datapagesPath = Path.Combine(datadir, "datapages.bin");
string cvtNodesPath = Path.Combine(datadir, "cvtnodes.bin");
string namesTablePath = Path.Combine(datadir, "namesTable.bin");
string predicateObjTypePath = Path.Combine(datadir, "predicate.objtype.txt");
string ghostMidPath = Path.Combine(datadir, "ghost_mid.txt");
logger.Log("Reading the ghost MID table");
setGhostMid = new HashSet<string>(File.ReadAllLines(ghostMidPath));
logger.Log("Reading the Predicate Objective Type table");
predObjTypeTable = new Dictionary<string, FBNodeType>();
foreach (var x in File.ReadLines(predicateObjTypePath)
.Select(ln => ln.Split('\t'))
.Select(
f =>
new
{
pred = f[0],
valcnt = long.Parse(f[1]),
entcnt = long.Parse(f[2]),
cvtcnt = long.Parse(f[3])
}))
{
if ((x.valcnt == 0 && x.entcnt == 0) ||
(x.entcnt == 0 && x.cvtcnt == 0) ||
(x.valcnt == 0 && x.cvtcnt == 0)) // no inconsistency in the data, skip
continue;
if (x.valcnt >= Math.Max(x.entcnt, x.cvtcnt))
predObjTypeTable.Add(x.pred, FBNodeType.Value);
else if (x.entcnt >= Math.Max(x.valcnt, x.cvtcnt))
predObjTypeTable.Add(x.pred, FBNodeType.Entity);
else
predObjTypeTable.Add(x.pred, FBNodeType.CVT);
}
logger.Log("Reading names table");
namesTable = DeserializeRelationTable(File.OpenRead(namesTablePath));
logger.Log("Reading index");
midToCompressedBlobLocation = Deserialize(File.OpenRead(midToOffsetPath));
largeMidsToCompressedBlobsLocations = DeserializeSimple(File.OpenRead(largeMidToOffsetPath));
cvtNodes = DeserializeCVTNodes(File.OpenRead(cvtNodesPath));
datapages = new List<byte[]>();
using (var binreader = new BinaryReader(File.OpenRead(datapagesPath)))
{
while (true)
{
var page = binreader.ReadBytes(pageSize);
datapages.Add(page);
if (page.Length < pageSize)
break;
}
}
}
private static Dictionary<string, Dictionary<string, Tuple<long, int>>> Deserialize(Stream stream)
{
var reader = new BinaryReader(stream);
var dictionariesCount = reader.ReadInt32();
var toReturn = new Dictionary<string, Dictionary<string, Tuple<long, int>>>();
for (int i = 0; i < dictionariesCount; i++)
{
var key = reader.ReadString();
int count = reader.ReadInt32();
var dictionary = new Dictionary<string, Tuple<long, int>>(count);
for (int n = 0; n < count; n++)
{
var subject = reader.ReadString();
var offset = reader.ReadInt64();
var bytecount = reader.ReadInt32();
dictionary.Add(subject, new Tuple<long, int>(offset, bytecount));
}
toReturn.Add(key, dictionary);
}
return toReturn;
}
private static Dictionary<string, Dictionary<string, bool>> DeserializeCVTNodes(Stream stream)
{
var reader = new BinaryReader(stream);
var dictionariesCount = reader.ReadInt32();
var toReturn = new Dictionary<string, Dictionary<string, bool>>();
for (int i = 0; i < dictionariesCount; i++)
{
var key = reader.ReadString();
int count = reader.ReadInt32();
var dictionary = new Dictionary<string, bool>(count);
for (int n = 0; n < count; n++)
{
var mid = reader.ReadString();
var isCVT = reader.ReadBoolean();
dictionary.Add(mid, isCVT);
}
toReturn.Add(key, dictionary);
}
return toReturn;
}
private static Dictionary<string, Tuple<long, List<int>>> DeserializeSimple(Stream stream)
{
var reader = new BinaryReader(stream);
var dictionaryCount = reader.ReadInt32();
var toReturn = new Dictionary<string, Tuple<long, List<int>>>();
for (int n = 0; n < dictionaryCount; n++)
{
var subject = reader.ReadString();
var offset = reader.ReadInt64();
var numCounts = reader.ReadInt32();
var list = new List<int>();
for (int i = 0; i < numCounts; i++)
list.Add(reader.ReadInt32());
toReturn.Add(subject, new Tuple<long, List<int>>(offset, list));
}
return toReturn;
}
private static Dictionary<string, string> DeserializeRelationTable(Stream stream)
{
BinaryReader reader = new BinaryReader(stream);
int dictionaryCount = reader.ReadInt32();
Dictionary<string, string> relationDictionary = new Dictionary<string, string>(dictionaryCount);
for (int i = 0; i < dictionaryCount; i++)
{
string key = reader.ReadString();
string value = reader.ReadString();
relationDictionary[key] = value;
}
return relationDictionary;
}
}
}

Просмотреть файл

@ -0,0 +1,100 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{9E64D23C-9C19-49DA-9903-12F151140AE7}</ProjectGuid>
<OutputType>Exe</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>FastRDFStore</RootNamespace>
<AssemblyName>FastRDFStore</AssemblyName>
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<SccProjectName>SAK</SccProjectName>
<SccLocalPath>SAK</SccLocalPath>
<SccAuxPath>SAK</SccAuxPath>
<SccProvider>SAK</SccProvider>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<Prefer32Bit>false</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<Prefer32Bit>false</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x64'">
<DebugSymbols>true</DebugSymbols>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<DebugType>full</DebugType>
<PlatformTarget>x64</PlatformTarget>
<ErrorReport>prompt</ErrorReport>
<CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x64'">
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<Optimize>true</Optimize>
<DebugType>pdbonly</DebugType>
<PlatformTarget>x64</PlatformTarget>
<ErrorReport>prompt</ErrorReport>
<CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<ItemGroup>
<Reference Include="CommandLine, Version=1.9.71.2, Culture=neutral, PublicKeyToken=de6f01bd326f8c32, processorArchitecture=MSIL">
<HintPath>..\packages\CommandLineParser.1.9.71\lib\net45\CommandLine.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Runtime.Serialization" />
<Reference Include="System.ServiceModel" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
<Reference Include="System.Data" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="..\SharedDataTypes\FreebaseCommonTypes.cs">
<Link>FreebaseCommonTypes.cs</Link>
</Compile>
<Compile Include="FastRDFStore.cs" />
<Compile Include="IFastRDFStore.cs" />
<Compile Include="Logger.cs" />
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
</ItemGroup>
<ItemGroup>
<None Include="App.config" />
<None Include="packages.config" />
</ItemGroup>
<ItemGroup>
<WCFMetadata Include="Service References\" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
</Project>

Просмотреть файл

@ -0,0 +1,21 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<VisualStudioVersion Condition="'$(VisualStudioVersion)' == ''">14.0</VisualStudioVersion>
<VSToolsPath Condition="'$(VSToolsPath)' == ''">$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)</VSToolsPath>
</PropertyGroup>
<Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.Props" Condition="'$(VSToolsPath)' != ''" />
<PropertyGroup Label="Globals">
<ProjectGuid>8db47aba-3494-448e-86e5-4926fb8d876a</ProjectGuid>
<RootNamespace>FastRDFStoreCore</RootNamespace>
<BaseIntermediateOutputPath Condition="'$(BaseIntermediateOutputPath)'=='' ">.\obj</BaseIntermediateOutputPath>
<OutputPath Condition="'$(OutputPath)'=='' ">.\bin\</OutputPath>
<TargetFrameworkVersion>v4.5.2</TargetFrameworkVersion>
</PropertyGroup>
<PropertyGroup>
<SchemaVersion>2.0</SchemaVersion>
</PropertyGroup>
<Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.targets" Condition="'$(VSToolsPath)' != ''" />
</Project>

Просмотреть файл

@ -0,0 +1,86 @@
using System;
using System.Runtime.Serialization;
namespace FastRDFStore
{
public enum FBNodeType
{
Value,
Entity,
CVT
};
[DataContract(IsReference = true)]
public class PredicateAndObjects
{
[DataMember]
public string Predicate { get; set; }
[DataMember]
public FBObject[] Objects { get; set; }
}
// A FBObject can either be:
// - a simple value ("42"): ValueFBObject(value="42")
// - an entity (Ireland): SimpleFBObject(mid="m.012wgb", name="Ireland")
// - a CVT node (dated integer value) CVTFBObject
[DataContract(IsReference = true)]
[KnownType(typeof (ValueFBObject))]
[KnownType(typeof (SimpleFBObject))]
[KnownType(typeof (CVTFBObject))]
public abstract class FBObject
{
public abstract string PrettyString();
public abstract string GetNameOrValue();
public virtual string GetMid() { return String.Empty; }
}
[DataContract(IsReference = true)]
public class ValueFBObject : FBObject
{
[DataMember]
public string Value { get; set; }
public override string PrettyString() { return Value; }
public override string GetNameOrValue() { return Value; }
}
[DataContract(IsReference = true)]
public class SimpleFBObject : FBObject
{
[DataMember]
public string Mid { get; set; }
[DataMember]
public string Name { get; set; }
[DataMember]
public PredicateAndObjects[] Objects { get; set; }
// Guaranteed that each predicate appears only once. May be null
public override string PrettyString() { return Name; }
public override string GetNameOrValue() { return Name; }
public override string GetMid() { return Mid; }
}
[DataContract(IsReference = true)]
public class CVTFBObject : FBObject
{
[DataMember]
public string Mid { get; set; } // mattri: Is this needed? If not used, could remove to save network traffic
[DataMember]
public PredicateAndObjects[] Objects { get; set; }
// Guaranteed that each predicate appears only once. mattri: Can a CVT node have the same predicate coming off of it twice, with different objects? If not, replace with just an array of <predicate,object> pairs
public override string PrettyString() { return "[CVT " + Mid + "]"; }
public override string GetNameOrValue() { return ""; }
public override string GetMid() { return Mid; }
}
}

Просмотреть файл

@ -0,0 +1,24 @@
using System.Runtime.Serialization;
using System.ServiceModel;
namespace FastRDFStore
{
[ServiceContract(Namespace = "urn:ps")]
public interface IFastRDFStore
{
[OperationContract]
string[] GetOutboundPredicates(string subjectMid);
[OperationContract]
string[] GetEntityNames(string[] entMids);
[OperationContract]
SimpleFBObject GetSimpleObjectPredicatesAndCVTs(string subjectMid, int maxPerPredicate, bool followCVT);
[OperationContract]
SimpleFBObject GetSimpleObjectFilteredPredicateAndObjects(string subjectMid, string predicate);
[OperationContract]
string[][] FindNodeSquencesOnPredicateChain(string startMid, string[] chainPredicates);
}
}

44
FastRDFStore/Logger.cs Normal file
Просмотреть файл

@ -0,0 +1,44 @@
using System;
using System.IO;
using System.Text;
namespace FastRDFStore
{
public class Logger
{
public enum Severity { INFO, WARNING, ERROR, EXCEPTION };
public object outputLock = new object();
private readonly string logFilename;
public Logger(string logFilename) { this.logFilename = logFilename; }
private static string EscapeNewlineAndTab(string s)
{
return s.Replace("\r\n", "\\n").Replace("\n\r", "\\n").Replace("\r", "\\n").Replace("\n", "\\n").Replace("\t", "\\t");
}
public void LogException(string message, Exception e)
{
string fullMessage = message + ". Exception info: " + e.ToString();
if (e.InnerException != null)
fullMessage += " *** With InnerException: " + e.InnerException.ToString();
Log(fullMessage, Severity.EXCEPTION);
}
public void Log(string message, Severity severity = Severity.INFO)
{
if (string.IsNullOrWhiteSpace(logFilename)) // empty log file name -> skip logging
return;
lock (outputLock)
{
// Use "sortable" datetime for later log file processing convenience
string line = DateTime.Now.ToString("s") + "\t" + severity + "\t" + EscapeNewlineAndTab(message);
Console.WriteLine(line);
File.AppendAllText(logFilename, line + Environment.NewLine);
}
}
}
}

83
FastRDFStore/Program.cs Normal file
Просмотреть файл

@ -0,0 +1,83 @@
using System;
using System.ServiceModel;
using System.Threading;
using CommandLine;
// This is the project that starts the FastRDFStore WCF service.
namespace FastRDFStore
{
internal class CommandLineArguments
{
[Option('i', "idir", HelpText = "Directory containing *.bin files", DefaultValue = "")]
public string idir { get; set; }
[Option('s', "server", HelpText = "Server", DefaultValue = "localhost")]
public string server { get; set; }
[Option('p', "port", HelpText = "Connect to the FastRDFStore server on this port", DefaultValue = 9358)]
public int port { get; set; }
[Option('l', "log", HelpText = "Log file. Set to empty to disable logging", DefaultValue = "FastRDFStore.log")]
public string logfile { get; set; }
[ParserState]
public IParserState LastParserState { get; set; }
[HelpOption]
public string GetUsage()
{
return CommandLine.Text.HelpText.AutoBuild(this,
(CommandLine.Text.HelpText current) =>
{
current.Copyright = " ";
current.AdditionalNewLineAfterOption = false;
current.MaximumDisplayWidth = Console.WindowWidth;
current.Heading = System.AppDomain.CurrentDomain.FriendlyName + " Usage:";
CommandLine.Text.HelpText.DefaultParsingErrorsHandler(this, current);
});
}
}
internal class Program
{
private static void Main(string[] args)
{
CommandLineArguments cmd = new CommandLineArguments();
Parser.Default.ParseArgumentsStrict(args, cmd);
FastRDFStore.Initialize(cmd.idir, cmd.logfile);
StartRDFStoreService(cmd.server, cmd.port);
// Wait for user to hit CTRL-C
Thread.Sleep(Timeout.Infinite);
}
public static void StartRDFStoreService(string server, int port)
{
var sh = new ServiceHost(typeof (FastRDFStore));
var binding = new NetTcpBinding(SecurityMode.None)
{
MaxBufferSize = int.MaxValue,
MaxBufferPoolSize = int.MaxValue,
MaxReceivedMessageSize = int.MaxValue,
ReceiveTimeout = TimeSpan.MaxValue,
CloseTimeout = TimeSpan.MaxValue,
TransferMode = TransferMode.Buffered
};
binding.ReaderQuotas.MaxDepth = int.MaxValue;
//binding.MaxConnections = 5;
//binding.ListenBacklog = 5;
var endPointStringSolver = String.Format("net.tcp://{0}:{1}/solver", server, port);
sh.AddServiceEndpoint(typeof (IFastRDFStore), binding, endPointStringSolver);
sh.Open();
}
}
}

Просмотреть файл

@ -0,0 +1,36 @@
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("FastRDFStore")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("FastRDFStore")]
[assembly: AssemblyCopyright("Copyright © 2015")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]
// The following GUID is for the ID of the typelib if this project is exposed to COM
[assembly: Guid("19cfb8cc-7d2a-4259-be9e-76a1b4380955")]
// Version information for an assembly consists of the following four values:
//
// Major Version
// Minor Version
// Build Number
// Revision
//
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]

Просмотреть файл

@ -0,0 +1,2 @@
set VERSION=Debug
xcopy /d /y bin\%VERSION%\* \\tspace10\e$\users\tmsnwork\runfb\bin

2
FastRDFStore/deploy.bat Normal file
Просмотреть файл

@ -0,0 +1,2 @@
set VERSION=Release
xcopy /d /y bin\%VERSION%\* \\tspace10\e$\users\tmsnwork\runfb\bin

Просмотреть файл

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="CommandLineParser" version="1.9.71" targetFramework="net45" />
</packages>