First import
This commit is contained in:
commit
4f08d44df3
3 changed files with 156089 additions and 0 deletions
294
cercasogno.rb
Normal file
294
cercasogno.rb
Normal file
|
@ -0,0 +1,294 @@
|
|||
#!/usr/bin/env ruby
|
||||
#coding: utf-8
|
||||
|
||||
require 'pathname'
|
||||
$APP_PATH = File.join(File.dirname(Pathname.new(__FILE__).realpath), "/")
|
||||
|
||||
require 'yaml'
|
||||
#require 'set'
|
||||
|
||||
begin
|
||||
require 'Win32/Console/ANSI' if RUBY_PLATFORM =~ /win32/
|
||||
$COLORIZING = true
|
||||
rescue LoadError
|
||||
$COLORIZING = false
|
||||
end
|
||||
|
||||
=begin
|
||||
start of code copied from cercasogno_dizmaker.rb
|
||||
note that the following code should never change!!
|
||||
=end
|
||||
DREAM_INFO = Struct.new("DREAM_INFO", :strDesc, :strMeaning, :nNumber, :aSubDescs, :strReferenceURL)
|
||||
=begin
|
||||
end of code copied from cercasogno_dizmaker.rb
|
||||
=end
|
||||
|
||||
CConsoleInfo = Struct.new(:nRows, :nColumns, :bAutoReturn)
|
||||
|
||||
class CIndexer
|
||||
def initialize(hData)
|
||||
@hExpanded = Hash.new
|
||||
hData.each do |strCompactIndex, value|
|
||||
raise "Tutti gli indici per questo dizionario devono essere di tipo String ma è stato trovato un oggetto di classe #{strCompactIndex.class}: \"#{strCompactIndex.inspect}\"" unless strCompactIndex.is_a? String
|
||||
raise "Tutti gli indici per questo dizionario devono essere stringhe con lunghezza almeno 1" if strCompactIndex.length < 1
|
||||
getExpandedIndex(strCompactIndex).each do |strExp|
|
||||
raise "Un elemento precedentemente incontrato è già stato espanso in \"#{strExp}\"" if @hExpanded.include? strExp
|
||||
@hExpanded[strExp] = value
|
||||
end
|
||||
end
|
||||
#puts @hExpanded.inspect
|
||||
end
|
||||
|
||||
def [](index)
|
||||
@hExpanded[index]
|
||||
end
|
||||
|
||||
def getExactKeysByGuessing(strKey)
|
||||
if @hExpanded.include? strKey
|
||||
return [@hExpanded[strKey]]
|
||||
else
|
||||
#if the easy part didn't work, we need to put some extra effort in the search
|
||||
return getListOfCandidates(strKey)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
def getExpandedIndex(strContracted)
|
||||
aSplit = strContracted.split(/[\\\/-]/).collect {|strItem| strItem.strip}.select {|strItem| strItem.length > 0}
|
||||
return [strContracted] if aSplit.length <= 1
|
||||
|
||||
aSuffixes = aSplit[1..-1]
|
||||
nSuffixLen = aSuffixes.inject(aSuffixes.first.length) do |memo, strSuffix|
|
||||
nLen = strSuffix.length();
|
||||
strSuffix.length().between?(1, memo) ? strSuffix.length() : memo
|
||||
end
|
||||
|
||||
strBase = aSplit.first
|
||||
nSuffixLen = strBase.length if strBase.length < nSuffixLen
|
||||
aSplit[0] = strBase[strBase.length - nSuffixLen..-1]
|
||||
strBase = (strBase.length > nSuffixLen ? strBase[0..(strBase.length - nSuffixLen - 1)] : "")
|
||||
aSplit.collect {|strSuffix| strBase + strSuffix}
|
||||
end
|
||||
|
||||
def getListOfCandidates(strSearch)
|
||||
regSearch = /(?i)#{CIndexer::duplicateAccentedForRegex(strSearch)}/
|
||||
getListOfCandidatesByRegex(regSearch)
|
||||
end
|
||||
|
||||
def getListOfCandidatesByRegex(regSearch)
|
||||
aRet = Array.new
|
||||
@hExpanded.each_key do |strKey|
|
||||
aRet << strKey if regSearch =~ strKey
|
||||
end
|
||||
aRet
|
||||
end
|
||||
|
||||
def self.duplicateAccentedForRegex(str)
|
||||
aAccents = [
|
||||
["a", "á", "à", "â", "ä", "ã"],
|
||||
["A", "Ã", "Ä", "Â", "À", "Á"],
|
||||
["e", "é", "è", "ê", "ë"],
|
||||
["E", "Ë", "É", "È", "Ê"],
|
||||
["i", "í", "ì", "î", "ï"],
|
||||
["I", "Í", "Î", "Ì", "Ï"],
|
||||
["o", "ó", "ò", "ô", "ö", "õ"],
|
||||
["O", "Õ", "Ö", "Ô", "Ò", "Ó"],
|
||||
["u", "ú", "ù", "û", "ü"],
|
||||
["U", "Ú", "Û", "Ù", "Ü"],
|
||||
["c", "ç"], ["C", "Ç"],
|
||||
["n", "ñ"], ["N", "Ñ"]
|
||||
]
|
||||
#sAccentedMix = Set.new(aAccents.flatten)
|
||||
hAccentedMix = Hash.new
|
||||
aAccents.each_index do |z|
|
||||
aAccents[z].each {|strLetter| hAccentedMix[strLetter] = z}
|
||||
end
|
||||
|
||||
strRet = ""
|
||||
str.each_char do |strLetter|
|
||||
if hAccentedMix.include? strLetter then
|
||||
strRet += "[" + aAccents[hAccentedMix[strLetter]].join("|") + "]"
|
||||
else
|
||||
strRet += strLetter
|
||||
end
|
||||
end
|
||||
return strRet
|
||||
end
|
||||
end
|
||||
|
||||
class CAnsiColorizer
|
||||
@@regStartsByDigit = /^\d/
|
||||
|
||||
def initialize(bEnabled=nil)
|
||||
@bEnabled = (bEnabled.nil? ? $COLORIZING : bEnabled)
|
||||
@hTemplates = Hash.new
|
||||
@hRegexToColor = Hash.new
|
||||
end
|
||||
|
||||
def addTemplate(strName, nForeground, nBackground=nil, nMain=0)
|
||||
raise "Template names cannot start by a number" if strName.is_a?(String) && (@@regStartsByDigit =~ strName)
|
||||
@hTemplates[strName] = [nMain, nForeground, nBackground].compact.join(";")
|
||||
return true
|
||||
end
|
||||
|
||||
def getColorized(strText, color)
|
||||
if color.is_a?(Numeric) || color.is_a?(String) && (@@regStartsByDigit =~ color) then
|
||||
return colorize(strText, color.to_s)
|
||||
else
|
||||
if @hTemplates.include?(color) then
|
||||
return colorize(strText, @hTemplates[color])
|
||||
else
|
||||
return strText
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def addAutoColorRegex(regRegex, color)
|
||||
raise "Invalid regex" unless regRegex.is_a? Regexp
|
||||
@hRegexToColor[regRegex] = color
|
||||
true
|
||||
end
|
||||
|
||||
def getAutoColorized(strText)
|
||||
strRet = strText
|
||||
@hRegexToColor.each do |regMatch, color|
|
||||
m = regMatch.match(strText)
|
||||
nFrom = 0
|
||||
while m do
|
||||
nOldLen = strRet.length
|
||||
strRet[(nFrom+m.begin(0))..(nFrom+m.end(0)-1)] = self.getColorized(m[0], color)
|
||||
nFrom += m.end(0) + strRet.length - nOldLen
|
||||
m = regMatch.match(strRet[nFrom..-1])
|
||||
end
|
||||
end
|
||||
strRet
|
||||
end
|
||||
private
|
||||
def colorize(text, color_code)
|
||||
#source: http://kpumuk.info/ruby-on-rails/colorizing-console-ruby-script-output/
|
||||
@bEnabled ? "\033[#{color_code}m#{text}\033[0m" : text
|
||||
end
|
||||
end
|
||||
|
||||
def GetConsoleInfo()
|
||||
CConsoleInfo.new(40, 80, false)
|
||||
end
|
||||
|
||||
def PutsMulticolumnFixedLength(aList, colorizer, nWidth=nil, bVerticalOrder=true, nForceMinSpacing=1)
|
||||
return 0 if aList.empty?
|
||||
nLen = aList.first.length
|
||||
strEOL = "\n"
|
||||
unless nWidth then
|
||||
ciInfo = GetConsoleInfo()
|
||||
strEOL = "" if ciInfo.bAutoReturn
|
||||
nWidth = ciInfo.nColumns
|
||||
end
|
||||
|
||||
raise "Gli elementi devono avere lunghezza maggiore di 0" if nLen == 0
|
||||
raise "Gli elementi non devono essere più lunghi dello spazio disponibile (#{nLen} > #{nWidth})" if nLen > nWidth
|
||||
nColumns = (nWidth < nLen + nForceMinSpacing ? 1 : nWidth / (nLen + nForceMinSpacing) )
|
||||
strSpacing = " " * ((nWidth - nLen * nColumns) / nColumns)
|
||||
#raise "assert" if strSpacing.length < nForceMinSpacing
|
||||
nBlockLen = strSpacing.length + nLen
|
||||
#assert strSpacing >= 1
|
||||
|
||||
if !bVerticalOrder then
|
||||
nCount = nColumns
|
||||
(aList.length / nColumns).times do |z|
|
||||
nFrom = z * nColumns
|
||||
nCount = aList.length - nFrom if aList.length - nFrom < nCount
|
||||
print colorizer.getAutoColorized(aList[nFrom..(nFrom + nCount - 1)].collect {|s| s + strSpacing}.join("")) + strEOL
|
||||
end
|
||||
else
|
||||
nCount = nColumns
|
||||
nRows = aList.length / nColumns
|
||||
nLongerColumns = aList.length % nColumns
|
||||
((aList.length + nColumns - 1) / nColumns).times do |z|
|
||||
nFrom = z * nColumns
|
||||
nCount = aList.length - nFrom if aList.length - nFrom < nCount
|
||||
aIndices = Array.new(nCount) {|i| i * nRows + [nLongerColumns, i].min + z}
|
||||
print colorizer.getAutoColorized(aList.values_at(*aIndices).collect {|s| s.to_s + strSpacing}.join("")) + strEOL
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def Disambiguate(aList, colorizer)
|
||||
raise "Ricevuta una lista vuota" if aList.empty?
|
||||
return 0 if aList.length == 1
|
||||
|
||||
puts "Specifica meglio il termine della ricerca:"
|
||||
|
||||
nLongestEntry = aList.inject(aList.first.length) do |memo, strItem|
|
||||
raise "Tutti gli elementi ricevuti devono essere di tipo String, ma è stato trovato un #{strItem.class}" unless strItem.is_a? String
|
||||
(strItem.length > memo ? strItem.length : memo)
|
||||
end
|
||||
|
||||
strSeparator = " - "
|
||||
nLongestID = aList.length.to_s.length
|
||||
nRequiredCols = nLongestID + nLongestEntry + strSeparator.length
|
||||
|
||||
aMenu = Array.new
|
||||
aList.each_index do |z|
|
||||
aMenu << (z + 1).to_s.ljust(nLongestID, " ") + strSeparator + aList[z].rjust(nLongestEntry, " ")
|
||||
end
|
||||
PutsMulticolumnFixedLength(aMenu, colorizer, nil, true, 2)
|
||||
|
||||
bDone = false
|
||||
nIndex = 0
|
||||
regInt = /^\d{1,3}$/
|
||||
until bDone do
|
||||
strInp = $stdin.gets.chomp.strip
|
||||
if regInt =~ strInp then
|
||||
nIndexMaybe = strInp.to_i
|
||||
if nIndexMaybe.between?(0, aList.length) then
|
||||
nIndex = nIndexMaybe
|
||||
bDone = true
|
||||
end
|
||||
end
|
||||
end
|
||||
nIndex - 1
|
||||
end
|
||||
|
||||
def DrawMeaning(objDream, colorizer)
|
||||
raise "objDream can't be null" if objDream.nil?
|
||||
#:strDesc, :strMeaning, :nNumber, :aSubDescs, :strReferenceURL
|
||||
puts "#{colorizer.getColorized(objDream.strDesc.capitalize, :main)} (#{colorizer.getColorized(objDream.nNumber, :numeric)}):"
|
||||
puts objDream.strMeaning if objDream.strMeaning && objDream.strMeaning.length > 0
|
||||
if objDream.aSubDescs then
|
||||
aMeanings = Array.new
|
||||
objDream.aSubDescs.each do |objSubDesc|
|
||||
aMeanings << "#{colorizer.getColorized(objSubDesc.strDesc, :desc)}: #{colorizer.getColorized(objSubDesc.strMeaning, :normal)}; #{colorizer.getColorized(objSubDesc.nNumber, :numeric)}"
|
||||
end
|
||||
puts aMeanings.sort.join("\n")
|
||||
end
|
||||
nil
|
||||
end
|
||||
|
||||
if ARGV.length == 0 then
|
||||
puts "Specificare il tema della ricerca"
|
||||
exit
|
||||
end
|
||||
|
||||
#hDreams = YAML::load(File.read(File.join($APP_PATH, "cercasogno_diz.yml")))
|
||||
hDreams = nil
|
||||
File.open(File.join($APP_PATH, "cercasogno_diz.yml"), 'r') {|fh| hDreams = YAML.load(fh) }
|
||||
#puts hDreams.length
|
||||
indexer = CIndexer.new(hDreams)
|
||||
|
||||
colorizer = CAnsiColorizer.new
|
||||
colorizer.addAutoColorRegex(/\d+/, :numeric)
|
||||
|
||||
colorizer.addTemplate(:numeric, 36)
|
||||
colorizer.addTemplate(:main, 31, nil, 4)
|
||||
colorizer.addTemplate(:desc, 33)
|
||||
|
||||
aGuess = indexer.getExactKeysByGuessing(ARGV.first)
|
||||
nSelection = 0
|
||||
if aGuess.length == 0 then
|
||||
puts "Nessun risultato per \"#{ARGV.first}\""
|
||||
exit
|
||||
elsif aGuess.length > 1 then
|
||||
nSelection = Disambiguate(aGuess, colorizer)
|
||||
end
|
||||
|
||||
DrawMeaning(indexer[aGuess[nSelection]], colorizer)
|
155666
cercasogno_diz.yml
Normal file
155666
cercasogno_diz.yml
Normal file
File diff suppressed because it is too large
Load diff
129
cercasogno_dizmaker.rb
Normal file
129
cercasogno_dizmaker.rb
Normal file
|
@ -0,0 +1,129 @@
|
|||
#!/usr/bin/env ruby
|
||||
#coding: utf-8
|
||||
|
||||
require 'pathname'
|
||||
$APP_PATH = File.join(File.dirname(Pathname.new(__FILE__).realpath), "/")
|
||||
|
||||
require 'rubygems'
|
||||
require 'net/http'
|
||||
require 'nokogiri'
|
||||
require 'yaml'
|
||||
|
||||
DREAM_INFO = Struct.new(:strDesc, :strMeaning, :nNumber, :aSubDescs, :strReferenceURL)
|
||||
|
||||
def DropIntermediateAccents(str)
|
||||
accents = {
|
||||
[/á\B/, /à\B/, /â\B/, /ä\B/, /ã\B/] => 'a',
|
||||
[/Ã\B/, /Ä\B/, /Â\B/, /À\B/, /Á\B/] => 'A',
|
||||
[/é\B/, /è\B/, /ê\B/, /ë\B/] => 'e',
|
||||
[/Ë\B/, /É\B/, /È\B/, /Ê\B/] => 'E',
|
||||
[/í\B/, /ì\B/, /î\B/, /ï\B/] => 'i',
|
||||
[/Í\B/, /Î\B/, /Ì\B/, /Ï\B/] => 'I',
|
||||
[/ó\B/, /ò\B/, /ô\B/, /ö\B/, /õ\B/] => 'o',
|
||||
[/Õ\B/, /Ö\B/, /Ô\B/, /Ò\B/, /Ó\B/] => 'O',
|
||||
[/ú\B/, /ù\B/, /û\B/, /ü\B/] => 'u',
|
||||
[/Ú\B/, /Û\B/, /Ù\B/, /Ü\B/] => 'U',
|
||||
[/ç/] => 'c', [/Ç/] => 'C',
|
||||
[/ñ/] => 'n', [/Ñ/] => 'N'
|
||||
}
|
||||
strRet = str.to_s()
|
||||
accents.each do |ac,rep|
|
||||
ac.each do |s|
|
||||
strRet.gsub!(s, rep)
|
||||
end
|
||||
end
|
||||
return strRet #.gsub(/[^\w\s.:,;@#§\[\]()=?!^"£$%&@°\\\/=*+-]\B/, "")
|
||||
end
|
||||
|
||||
def GetPurifiedString(strText)
|
||||
strRet = DropIntermediateAccents(strText.strip())
|
||||
#strRet = strText.strip()
|
||||
hCleaning = {"" => /\r|\n/, " " => /\s{2,}/, "'" => "’"}
|
||||
hCleaning.each do |strReplace, regMatch|
|
||||
strRet.gsub! regMatch, strReplace
|
||||
end
|
||||
return strRet.strip
|
||||
end
|
||||
|
||||
def ExtractSubsections(strSectionRaw)
|
||||
aSubsections = Array.new
|
||||
|
||||
regSubElement = /^\W*([^0-9;:,.-]+)[\s;:,.-]+([^0-9;:=-]+?)\b\W+(\d+)/
|
||||
regInteger = /^\d+$/
|
||||
|
||||
mSubElement = regSubElement.match(strSectionRaw)
|
||||
nStart = 0
|
||||
while mSubElement do
|
||||
diNew = DREAM_INFO.new(GetPurifiedString(mSubElement[1]), GetPurifiedString(mSubElement[2]), mSubElement[3].to_i())
|
||||
aSubsections << diNew
|
||||
bCriticalError = regInteger =~ diNew.strMeaning || regInteger =~ diNew.strDesc
|
||||
bCriticalError |= diNew.strMeaning.empty? || diNew.strDesc.empty?
|
||||
$stderr.puts "M: " + diNew.strMeaning if bCriticalError || /[-.:,;@#§+\\!"£$%&()=?^°\[\]*0-9]/ =~ diNew.strMeaning
|
||||
$stderr.puts "D: " + diNew.strDesc if bCriticalError || /[-.:,;@#§+\\!"£$%&()=?^°\[\]*0-9]/ =~ diNew.strDesc
|
||||
raise "Critical error" if bCriticalError
|
||||
# puts "#{diNew.strDesc}: #{diNew.strMeaning} (#{diNew.nNumber})"
|
||||
nStart += mSubElement.end(0)
|
||||
mSubElement = regSubElement.match(strSectionRaw[nStart..-1])
|
||||
end
|
||||
if aSubsections.length == 0
|
||||
return nil
|
||||
else
|
||||
return aSubsections
|
||||
end
|
||||
end
|
||||
|
||||
def ExtractElementsFromHtml(strReferenceUrl, strHtml)
|
||||
#The following query fetches <br> nodes from parent node "table" (ref: http://stackoverflow.com/questions/1485356/how-to-get-xpath-of-text-between-br-or-br)
|
||||
strXPathBR = "//table[@class=\"centraleUnico\"]/br/following-sibling::text() | //table[@class=\"centraleUnico\"]//br/preceding-sibling::text()"
|
||||
|
||||
regHead = /^(.+?)\b.*=\s*(\d+)/
|
||||
regDetails = /^.+?:.+?,\s*\d+/
|
||||
|
||||
bExpectingDetails = false
|
||||
diPrevItem = nil
|
||||
hRet = Hash.new
|
||||
doc = Nokogiri::HTML(strHtml)
|
||||
doc.xpath(strXPathBR).each do |link|
|
||||
strText = GetPurifiedString(link.content)
|
||||
bMatchHead, bMatchDetails = regHead =~ strText, regDetails =~ strText
|
||||
next unless bMatchHead || bMatchDetails
|
||||
|
||||
bExpectingDetails = diPrevItem && bMatchDetails
|
||||
|
||||
if bExpectingDetails then
|
||||
diPrevItem.aSubDescs = ExtractSubsections(strText)
|
||||
# raise "An item's details were expected" unless mExtract
|
||||
else
|
||||
mExtract = regHead.match(strText)
|
||||
raise "A new item was expected" unless mExtract
|
||||
diPrevItem = DREAM_INFO.new(mExtract[1], nil, mExtract[2].to_i(), nil, strReferenceUrl)
|
||||
hRet[diPrevItem.strDesc] = diPrevItem
|
||||
print diPrevItem.strDesc + " "
|
||||
end
|
||||
bExpectingDetails = !bExpectingDetails
|
||||
end
|
||||
|
||||
# strHtml.force_encoding("iso-8859-1").encode("UTF-8")
|
||||
return hRet
|
||||
end
|
||||
|
||||
aLetters = %w{a b c d e f g h i l m n o p q r s t u v z}
|
||||
|
||||
HOST_URL = "www.metropolino.com"
|
||||
hElements = Hash.new
|
||||
conn = Net::HTTP.new(HOST_URL, 80)
|
||||
aLetters.each do |strLetter|
|
||||
strPageAddress = "/smorfia/interpretazione-dei-sogni-" + strLetter + ".asp"
|
||||
response, strPage = conn.get(strPageAddress)
|
||||
if response.is_a? Net::HTTPSuccess then
|
||||
hElements.merge! ExtractElementsFromHtml(HOST_URL + strPageAddress, strPage)
|
||||
else
|
||||
puts "Error while retrieving #{strPageAddress} (Error #{response.code}: #{response.message})"
|
||||
response.error!
|
||||
end
|
||||
end
|
||||
puts
|
||||
|
||||
File.open(File.join($APP_PATH, "sogno_diz.yml"), "w") do |fDst|
|
||||
fDst.write(YAML::dump(hElements))
|
||||
end
|
Loading…
Reference in a new issue