First import

This commit is contained in:
King_DuckZ 2017-01-06 12:41:43 +00:00
commit 4f08d44df3
3 changed files with 156089 additions and 0 deletions

294
cercasogno.rb Normal file
View file

@ -0,0 +1,294 @@
#!/usr/bin/env ruby
#coding: utf-8
require 'pathname'
$APP_PATH = File.join(File.dirname(Pathname.new(__FILE__).realpath), "/")
require 'yaml'
#require 'set'
begin
require 'Win32/Console/ANSI' if RUBY_PLATFORM =~ /win32/
$COLORIZING = true
rescue LoadError
$COLORIZING = false
end
=begin
start of code copied from cercasogno_dizmaker.rb
note that the following code should never change!!
=end
DREAM_INFO = Struct.new("DREAM_INFO", :strDesc, :strMeaning, :nNumber, :aSubDescs, :strReferenceURL)
=begin
end of code copied from cercasogno_dizmaker.rb
=end
CConsoleInfo = Struct.new(:nRows, :nColumns, :bAutoReturn)
class CIndexer
def initialize(hData)
@hExpanded = Hash.new
hData.each do |strCompactIndex, value|
raise "Tutti gli indici per questo dizionario devono essere di tipo String ma è stato trovato un oggetto di classe #{strCompactIndex.class}: \"#{strCompactIndex.inspect}\"" unless strCompactIndex.is_a? String
raise "Tutti gli indici per questo dizionario devono essere stringhe con lunghezza almeno 1" if strCompactIndex.length < 1
getExpandedIndex(strCompactIndex).each do |strExp|
raise "Un elemento precedentemente incontrato è già stato espanso in \"#{strExp}\"" if @hExpanded.include? strExp
@hExpanded[strExp] = value
end
end
#puts @hExpanded.inspect
end
def [](index)
@hExpanded[index]
end
def getExactKeysByGuessing(strKey)
if @hExpanded.include? strKey
return [@hExpanded[strKey]]
else
#if the easy part didn't work, we need to put some extra effort in the search
return getListOfCandidates(strKey)
end
end
private
def getExpandedIndex(strContracted)
aSplit = strContracted.split(/[\\\/-]/).collect {|strItem| strItem.strip}.select {|strItem| strItem.length > 0}
return [strContracted] if aSplit.length <= 1
aSuffixes = aSplit[1..-1]
nSuffixLen = aSuffixes.inject(aSuffixes.first.length) do |memo, strSuffix|
nLen = strSuffix.length();
strSuffix.length().between?(1, memo) ? strSuffix.length() : memo
end
strBase = aSplit.first
nSuffixLen = strBase.length if strBase.length < nSuffixLen
aSplit[0] = strBase[strBase.length - nSuffixLen..-1]
strBase = (strBase.length > nSuffixLen ? strBase[0..(strBase.length - nSuffixLen - 1)] : "")
aSplit.collect {|strSuffix| strBase + strSuffix}
end
def getListOfCandidates(strSearch)
regSearch = /(?i)#{CIndexer::duplicateAccentedForRegex(strSearch)}/
getListOfCandidatesByRegex(regSearch)
end
def getListOfCandidatesByRegex(regSearch)
aRet = Array.new
@hExpanded.each_key do |strKey|
aRet << strKey if regSearch =~ strKey
end
aRet
end
def self.duplicateAccentedForRegex(str)
aAccents = [
["a", "á", "à", "â", "ä", "ã"],
["A", "Ã", "Ä", "Â", "À", "Á"],
["e", "é", "è", "ê", "ë"],
["E", "Ë", "É", "È", "Ê"],
["i", "í", "ì", "î", "ï"],
["I", "Í", "Î", "Ì", "Ï"],
["o", "ó", "ò", "ô", "ö", "õ"],
["O", "Õ", "Ö", "Ô", "Ò", "Ó"],
["u", "ú", "ù", "û", "ü"],
["U", "Ú", "Û", "Ù", "Ü"],
["c", "ç"], ["C", "Ç"],
["n", "ñ"], ["N", "Ñ"]
]
#sAccentedMix = Set.new(aAccents.flatten)
hAccentedMix = Hash.new
aAccents.each_index do |z|
aAccents[z].each {|strLetter| hAccentedMix[strLetter] = z}
end
strRet = ""
str.each_char do |strLetter|
if hAccentedMix.include? strLetter then
strRet += "[" + aAccents[hAccentedMix[strLetter]].join("|") + "]"
else
strRet += strLetter
end
end
return strRet
end
end
class CAnsiColorizer
@@regStartsByDigit = /^\d/
def initialize(bEnabled=nil)
@bEnabled = (bEnabled.nil? ? $COLORIZING : bEnabled)
@hTemplates = Hash.new
@hRegexToColor = Hash.new
end
def addTemplate(strName, nForeground, nBackground=nil, nMain=0)
raise "Template names cannot start by a number" if strName.is_a?(String) && (@@regStartsByDigit =~ strName)
@hTemplates[strName] = [nMain, nForeground, nBackground].compact.join(";")
return true
end
def getColorized(strText, color)
if color.is_a?(Numeric) || color.is_a?(String) && (@@regStartsByDigit =~ color) then
return colorize(strText, color.to_s)
else
if @hTemplates.include?(color) then
return colorize(strText, @hTemplates[color])
else
return strText
end
end
end
def addAutoColorRegex(regRegex, color)
raise "Invalid regex" unless regRegex.is_a? Regexp
@hRegexToColor[regRegex] = color
true
end
def getAutoColorized(strText)
strRet = strText
@hRegexToColor.each do |regMatch, color|
m = regMatch.match(strText)
nFrom = 0
while m do
nOldLen = strRet.length
strRet[(nFrom+m.begin(0))..(nFrom+m.end(0)-1)] = self.getColorized(m[0], color)
nFrom += m.end(0) + strRet.length - nOldLen
m = regMatch.match(strRet[nFrom..-1])
end
end
strRet
end
private
def colorize(text, color_code)
#source: http://kpumuk.info/ruby-on-rails/colorizing-console-ruby-script-output/
@bEnabled ? "\033[#{color_code}m#{text}\033[0m" : text
end
end
def GetConsoleInfo()
CConsoleInfo.new(40, 80, false)
end
def PutsMulticolumnFixedLength(aList, colorizer, nWidth=nil, bVerticalOrder=true, nForceMinSpacing=1)
return 0 if aList.empty?
nLen = aList.first.length
strEOL = "\n"
unless nWidth then
ciInfo = GetConsoleInfo()
strEOL = "" if ciInfo.bAutoReturn
nWidth = ciInfo.nColumns
end
raise "Gli elementi devono avere lunghezza maggiore di 0" if nLen == 0
raise "Gli elementi non devono essere più lunghi dello spazio disponibile (#{nLen} > #{nWidth})" if nLen > nWidth
nColumns = (nWidth < nLen + nForceMinSpacing ? 1 : nWidth / (nLen + nForceMinSpacing) )
strSpacing = " " * ((nWidth - nLen * nColumns) / nColumns)
#raise "assert" if strSpacing.length < nForceMinSpacing
nBlockLen = strSpacing.length + nLen
#assert strSpacing >= 1
if !bVerticalOrder then
nCount = nColumns
(aList.length / nColumns).times do |z|
nFrom = z * nColumns
nCount = aList.length - nFrom if aList.length - nFrom < nCount
print colorizer.getAutoColorized(aList[nFrom..(nFrom + nCount - 1)].collect {|s| s + strSpacing}.join("")) + strEOL
end
else
nCount = nColumns
nRows = aList.length / nColumns
nLongerColumns = aList.length % nColumns
((aList.length + nColumns - 1) / nColumns).times do |z|
nFrom = z * nColumns
nCount = aList.length - nFrom if aList.length - nFrom < nCount
aIndices = Array.new(nCount) {|i| i * nRows + [nLongerColumns, i].min + z}
print colorizer.getAutoColorized(aList.values_at(*aIndices).collect {|s| s.to_s + strSpacing}.join("")) + strEOL
end
end
end
def Disambiguate(aList, colorizer)
raise "Ricevuta una lista vuota" if aList.empty?
return 0 if aList.length == 1
puts "Specifica meglio il termine della ricerca:"
nLongestEntry = aList.inject(aList.first.length) do |memo, strItem|
raise "Tutti gli elementi ricevuti devono essere di tipo String, ma è stato trovato un #{strItem.class}" unless strItem.is_a? String
(strItem.length > memo ? strItem.length : memo)
end
strSeparator = " - "
nLongestID = aList.length.to_s.length
nRequiredCols = nLongestID + nLongestEntry + strSeparator.length
aMenu = Array.new
aList.each_index do |z|
aMenu << (z + 1).to_s.ljust(nLongestID, " ") + strSeparator + aList[z].rjust(nLongestEntry, " ")
end
PutsMulticolumnFixedLength(aMenu, colorizer, nil, true, 2)
bDone = false
nIndex = 0
regInt = /^\d{1,3}$/
until bDone do
strInp = $stdin.gets.chomp.strip
if regInt =~ strInp then
nIndexMaybe = strInp.to_i
if nIndexMaybe.between?(0, aList.length) then
nIndex = nIndexMaybe
bDone = true
end
end
end
nIndex - 1
end
def DrawMeaning(objDream, colorizer)
raise "objDream can't be null" if objDream.nil?
#:strDesc, :strMeaning, :nNumber, :aSubDescs, :strReferenceURL
puts "#{colorizer.getColorized(objDream.strDesc.capitalize, :main)} (#{colorizer.getColorized(objDream.nNumber, :numeric)}):"
puts objDream.strMeaning if objDream.strMeaning && objDream.strMeaning.length > 0
if objDream.aSubDescs then
aMeanings = Array.new
objDream.aSubDescs.each do |objSubDesc|
aMeanings << "#{colorizer.getColorized(objSubDesc.strDesc, :desc)}: #{colorizer.getColorized(objSubDesc.strMeaning, :normal)}; #{colorizer.getColorized(objSubDesc.nNumber, :numeric)}"
end
puts aMeanings.sort.join("\n")
end
nil
end
if ARGV.length == 0 then
puts "Specificare il tema della ricerca"
exit
end
#hDreams = YAML::load(File.read(File.join($APP_PATH, "cercasogno_diz.yml")))
hDreams = nil
File.open(File.join($APP_PATH, "cercasogno_diz.yml"), 'r') {|fh| hDreams = YAML.load(fh) }
#puts hDreams.length
indexer = CIndexer.new(hDreams)
colorizer = CAnsiColorizer.new
colorizer.addAutoColorRegex(/\d+/, :numeric)
colorizer.addTemplate(:numeric, 36)
colorizer.addTemplate(:main, 31, nil, 4)
colorizer.addTemplate(:desc, 33)
aGuess = indexer.getExactKeysByGuessing(ARGV.first)
nSelection = 0
if aGuess.length == 0 then
puts "Nessun risultato per \"#{ARGV.first}\""
exit
elsif aGuess.length > 1 then
nSelection = Disambiguate(aGuess, colorizer)
end
DrawMeaning(indexer[aGuess[nSelection]], colorizer)

155666
cercasogno_diz.yml Normal file

File diff suppressed because it is too large Load diff

129
cercasogno_dizmaker.rb Normal file
View file

@ -0,0 +1,129 @@
#!/usr/bin/env ruby
#coding: utf-8
require 'pathname'
$APP_PATH = File.join(File.dirname(Pathname.new(__FILE__).realpath), "/")
require 'rubygems'
require 'net/http'
require 'nokogiri'
require 'yaml'
DREAM_INFO = Struct.new(:strDesc, :strMeaning, :nNumber, :aSubDescs, :strReferenceURL)
def DropIntermediateAccents(str)
accents = {
[/á\B/, /à\B/, /â\B/, /ä\B/, /ã\B/] => 'a',
[/Ã\B/, /Ä\B/, /Â\B/, /À\B/, /Á\B/] => 'A',
[/é\B/, /è\B/, /ê\B/, /ë\B/] => 'e',
[/Ë\B/, /É\B/, /È\B/, /Ê\B/] => 'E',
[/í\B/, /ì\B/, /î\B/, /ï\B/] => 'i',
[/Í\B/, /Î\B/, /Ì\B/, /Ï\B/] => 'I',
[/ó\B/, /ò\B/, /ô\B/, /ö\B/, /õ\B/] => 'o',
[/Õ\B/, /Ö\B/, /Ô\B/, /Ò\B/, /Ó\B/] => 'O',
[/ú\B/, /ù\B/, /û\B/, /ü\B/] => 'u',
[/Ú\B/, /Û\B/, /Ù\B/, /Ü\B/] => 'U',
[/ç/] => 'c', [/Ç/] => 'C',
[/ñ/] => 'n', [/Ñ/] => 'N'
}
strRet = str.to_s()
accents.each do |ac,rep|
ac.each do |s|
strRet.gsub!(s, rep)
end
end
return strRet #.gsub(/[^\w\s.:,;@#§\[\]()=?!^"£$%&@°\\\/=*+-]\B/, "")
end
def GetPurifiedString(strText)
strRet = DropIntermediateAccents(strText.strip())
#strRet = strText.strip()
hCleaning = {"" => /\r|\n/, " " => /\s{2,}/, "'" => ""}
hCleaning.each do |strReplace, regMatch|
strRet.gsub! regMatch, strReplace
end
return strRet.strip
end
def ExtractSubsections(strSectionRaw)
aSubsections = Array.new
regSubElement = /^\W*([^0-9;:,.-]+)[\s;:,.-]+([^0-9;:=-]+?)\b\W+(\d+)/
regInteger = /^\d+$/
mSubElement = regSubElement.match(strSectionRaw)
nStart = 0
while mSubElement do
diNew = DREAM_INFO.new(GetPurifiedString(mSubElement[1]), GetPurifiedString(mSubElement[2]), mSubElement[3].to_i())
aSubsections << diNew
bCriticalError = regInteger =~ diNew.strMeaning || regInteger =~ diNew.strDesc
bCriticalError |= diNew.strMeaning.empty? || diNew.strDesc.empty?
$stderr.puts "M: " + diNew.strMeaning if bCriticalError || /[-.:,;@#§+\\!"£$%&()=?^°\[\]*0-9]/ =~ diNew.strMeaning
$stderr.puts "D: " + diNew.strDesc if bCriticalError || /[-.:,;@#§+\\!"£$%&()=?^°\[\]*0-9]/ =~ diNew.strDesc
raise "Critical error" if bCriticalError
# puts "#{diNew.strDesc}: #{diNew.strMeaning} (#{diNew.nNumber})"
nStart += mSubElement.end(0)
mSubElement = regSubElement.match(strSectionRaw[nStart..-1])
end
if aSubsections.length == 0
return nil
else
return aSubsections
end
end
def ExtractElementsFromHtml(strReferenceUrl, strHtml)
#The following query fetches <br> nodes from parent node "table" (ref: http://stackoverflow.com/questions/1485356/how-to-get-xpath-of-text-between-br-or-br)
strXPathBR = "//table[@class=\"centraleUnico\"]/br/following-sibling::text() | //table[@class=\"centraleUnico\"]//br/preceding-sibling::text()"
regHead = /^(.+?)\b.*=\s*(\d+)/
regDetails = /^.+?:.+?,\s*\d+/
bExpectingDetails = false
diPrevItem = nil
hRet = Hash.new
doc = Nokogiri::HTML(strHtml)
doc.xpath(strXPathBR).each do |link|
strText = GetPurifiedString(link.content)
bMatchHead, bMatchDetails = regHead =~ strText, regDetails =~ strText
next unless bMatchHead || bMatchDetails
bExpectingDetails = diPrevItem && bMatchDetails
if bExpectingDetails then
diPrevItem.aSubDescs = ExtractSubsections(strText)
# raise "An item's details were expected" unless mExtract
else
mExtract = regHead.match(strText)
raise "A new item was expected" unless mExtract
diPrevItem = DREAM_INFO.new(mExtract[1], nil, mExtract[2].to_i(), nil, strReferenceUrl)
hRet[diPrevItem.strDesc] = diPrevItem
print diPrevItem.strDesc + " "
end
bExpectingDetails = !bExpectingDetails
end
# strHtml.force_encoding("iso-8859-1").encode("UTF-8")
return hRet
end
aLetters = %w{a b c d e f g h i l m n o p q r s t u v z}
HOST_URL = "www.metropolino.com"
hElements = Hash.new
conn = Net::HTTP.new(HOST_URL, 80)
aLetters.each do |strLetter|
strPageAddress = "/smorfia/interpretazione-dei-sogni-" + strLetter + ".asp"
response, strPage = conn.get(strPageAddress)
if response.is_a? Net::HTTPSuccess then
hElements.merge! ExtractElementsFromHtml(HOST_URL + strPageAddress, strPage)
else
puts "Error while retrieving #{strPageAddress} (Error #{response.code}: #{response.message})"
response.error!
end
end
puts
File.open(File.join($APP_PATH, "sogno_diz.yml"), "w") do |fDst|
fDst.write(YAML::dump(hElements))
end